gcc/config/i386/i386.c

   1 /* Subroutines used for code generation on IA-32.
   2    Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
   3    2002, 2003, 2004, 2005, 2006, 2007, 2008
   4    Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3, or (at your option)
  11 any later version.
  12
  13 GCC is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "rtl.h"
  27 #include "tree.h"
  28 #include "tm_p.h"
  29 #include "regs.h"
  30 #include "hard-reg-set.h"
  31 #include "real.h"
  32 #include "insn-config.h"
  33 #include "conditions.h"
  34 #include "output.h"
  35 #include "insn-codes.h"
  36 #include "insn-attr.h"
  37 #include "flags.h"
  38 #include "except.h"
  39 #include "function.h"
  40 #include "recog.h"
  41 #include "expr.h"
  42 #include "optabs.h"
  43 #include "toplev.h"
  44 #include "basic-block.h"
  45 #include "ggc.h"
  46 #include "target.h"
  47 #include "target-def.h"
  48 #include "langhooks.h"
  49 #include "cgraph.h"
  50 #include "tree-gimple.h"
  51 #include "dwarf2.h"
  52 #include "df.h"
  53 #include "tm-constrs.h"
  54 #include "params.h"
  55
  56 static int x86_builtin_vectorization_cost (bool);
  57
  58 #ifndef CHECK_STACK_LIMIT
  59 #define CHECK_STACK_LIMIT (-1)
  60 #endif
  61
  62 /* Return index of given mode in mult and division cost tables.  */
  63 #define MODE_INDEX(mode)                                        \
  64   ((mode) == QImode ? 0                                         \
  65    : (mode) == HImode ? 1                                       \
  66    : (mode) == SImode ? 2                                       \
  67    : (mode) == DImode ? 3                                       \
  68    : 4)
  69
  70 /* Processor costs (relative to an add) */
  71 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes.  */
  72 #define COSTS_N_BYTES(N) ((N) * 2)
  73
  74 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
  75
  76 static const
  77 struct processor_costs size_cost = {    /* costs for tuning for size */
  78   COSTS_N_BYTES (2),                    /* cost of an add instruction */
  79   COSTS_N_BYTES (3),                    /* cost of a lea instruction */
  80   COSTS_N_BYTES (2),                    /* variable shift costs */
  81   COSTS_N_BYTES (3),                    /* constant shift costs */
  82   {COSTS_N_BYTES (3),                   /* cost of starting multiply for QI */
  83    COSTS_N_BYTES (3),                   /*                               HI */
  84    COSTS_N_BYTES (3),                   /*                               SI */
  85    COSTS_N_BYTES (3),                   /*                               DI */
  86    COSTS_N_BYTES (5)},                  /*                            other */
  87   0,                                    /* cost of multiply per each bit set */
  88   {COSTS_N_BYTES (3),                   /* cost of a divide/mod for QI */
  89    COSTS_N_BYTES (3),                   /*                          HI */
  90    COSTS_N_BYTES (3),                   /*                          SI */
  91    COSTS_N_BYTES (3),                   /*                          DI */
  92    COSTS_N_BYTES (5)},                  /*                       other */
  93   COSTS_N_BYTES (3),                    /* cost of movsx */
  94   COSTS_N_BYTES (3),                    /* cost of movzx */
  95   0,                                    /* "large" insn */
  96   2,                                    /* MOVE_RATIO */
  97   2,                                    /* cost for loading QImode using movzbl */
  98   {2, 2, 2},                            /* cost of loading integer registers
  99                                            in QImode, HImode and SImode.
 100                                            Relative to reg-reg move (2).  */
 101   {2, 2, 2},                            /* cost of storing integer registers */
 102   2,                                    /* cost of reg,reg fld/fst */
 103   {2, 2, 2},                            /* cost of loading fp registers
 104                                            in SFmode, DFmode and XFmode */
 105   {2, 2, 2},                            /* cost of storing fp registers
 106                                            in SFmode, DFmode and XFmode */
 107   3,                                    /* cost of moving MMX register */
 108   {3, 3},                               /* cost of loading MMX registers
 109                                            in SImode and DImode */
 110   {3, 3},                               /* cost of storing MMX registers
 111                                            in SImode and DImode */
 112   3,                                    /* cost of moving SSE register */
 113   {3, 3, 3},                            /* cost of loading SSE registers
 114                                            in SImode, DImode and TImode */
 115   {3, 3, 3},                            /* cost of storing SSE registers
 116                                            in SImode, DImode and TImode */
 117   3,                                    /* MMX or SSE register to integer */
 118   0,                                    /* size of l1 cache  */
 119   0,                                    /* size of l2 cache  */
 120   0,                                    /* size of prefetch block */
 121   0,                                    /* number of parallel prefetches */
 122   2,                                    /* Branch cost */
 123   COSTS_N_BYTES (2),                    /* cost of FADD and FSUB insns.  */
 124   COSTS_N_BYTES (2),                    /* cost of FMUL instruction.  */
 125   COSTS_N_BYTES (2),                    /* cost of FDIV instruction.  */
 126   COSTS_N_BYTES (2),                    /* cost of FABS instruction.  */
 127   COSTS_N_BYTES (2),                    /* cost of FCHS instruction.  */
 128   COSTS_N_BYTES (2),                    /* cost of FSQRT instruction.  */
 129   {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
 130    {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
 131   {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
 132    {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
 133   1,                                    /* scalar_stmt_cost.  */
 134   1,                                    /* scalar load_cost.  */
 135   1,                                    /* scalar_store_cost.  */
 136   1,                                    /* vec_stmt_cost.  */
 137   1,                                    /* vec_to_scalar_cost.  */
 138   1,                                    /* scalar_to_vec_cost.  */
 139   1,                                    /* vec_align_load_cost.  */
 140   1,                                    /* vec_unalign_load_cost.  */
 141   1,                                    /* vec_store_cost.  */
 142   1,                                    /* cond_taken_branch_cost.  */
 143   1,                                    /* cond_not_taken_branch_cost.  */
 144 };
 145
 146 /* Processor costs (relative to an add) */
 147 static const
 148 struct processor_costs i386_cost = {    /* 386 specific costs */
 149   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 150   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 151   COSTS_N_INSNS (3),                    /* variable shift costs */
 152   COSTS_N_INSNS (2),                    /* constant shift costs */
 153   {COSTS_N_INSNS (6),                   /* cost of starting multiply for QI */
 154    COSTS_N_INSNS (6),                   /*                               HI */
 155    COSTS_N_INSNS (6),                   /*                               SI */
 156    COSTS_N_INSNS (6),                   /*                               DI */
 157    COSTS_N_INSNS (6)},                  /*                               other */
 158   COSTS_N_INSNS (1),                    /* cost of multiply per each bit set */
 159   {COSTS_N_INSNS (23),                  /* cost of a divide/mod for QI */
 160    COSTS_N_INSNS (23),                  /*                          HI */
 161    COSTS_N_INSNS (23),                  /*                          SI */
 162    COSTS_N_INSNS (23),                  /*                          DI */
 163    COSTS_N_INSNS (23)},                 /*                          other */
 164   COSTS_N_INSNS (3),                    /* cost of movsx */
 165   COSTS_N_INSNS (2),                    /* cost of movzx */
 166   15,                                   /* "large" insn */
 167   3,                                    /* MOVE_RATIO */
 168   4,                                    /* cost for loading QImode using movzbl */
 169   {2, 4, 2},                            /* cost of loading integer registers
 170                                            in QImode, HImode and SImode.
 171                                            Relative to reg-reg move (2).  */
 172   {2, 4, 2},                            /* cost of storing integer registers */
 173   2,                                    /* cost of reg,reg fld/fst */
 174   {8, 8, 8},                            /* cost of loading fp registers
 175                                            in SFmode, DFmode and XFmode */
 176   {8, 8, 8},                            /* cost of storing fp registers
 177                                            in SFmode, DFmode and XFmode */
 178   2,                                    /* cost of moving MMX register */
 179   {4, 8},                               /* cost of loading MMX registers
 180                                            in SImode and DImode */
 181   {4, 8},                               /* cost of storing MMX registers
 182                                            in SImode and DImode */
 183   2,                                    /* cost of moving SSE register */
 184   {4, 8, 16},                           /* cost of loading SSE registers
 185                                            in SImode, DImode and TImode */
 186   {4, 8, 16},                           /* cost of storing SSE registers
 187                                            in SImode, DImode and TImode */
 188   3,                                    /* MMX or SSE register to integer */
 189   0,                                    /* size of l1 cache  */
 190   0,                                    /* size of l2 cache  */
 191   0,                                    /* size of prefetch block */
 192   0,                                    /* number of parallel prefetches */
 193   1,                                    /* Branch cost */
 194   COSTS_N_INSNS (23),                   /* cost of FADD and FSUB insns.  */
 195   COSTS_N_INSNS (27),                   /* cost of FMUL instruction.  */
 196   COSTS_N_INSNS (88),                   /* cost of FDIV instruction.  */
 197   COSTS_N_INSNS (22),                   /* cost of FABS instruction.  */
 198   COSTS_N_INSNS (24),                   /* cost of FCHS instruction.  */
 199   COSTS_N_INSNS (122),                  /* cost of FSQRT instruction.  */
 200   {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
 201    DUMMY_STRINGOP_ALGS},
 202   {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
 203    DUMMY_STRINGOP_ALGS},
 204   1,                                    /* scalar_stmt_cost.  */
 205   1,                                    /* scalar load_cost.  */
 206   1,                                    /* scalar_store_cost.  */
 207   1,                                    /* vec_stmt_cost.  */
 208   1,                                    /* vec_to_scalar_cost.  */
 209   1,                                    /* scalar_to_vec_cost.  */
 210   1,                                    /* vec_align_load_cost.  */
 211   2,                                    /* vec_unalign_load_cost.  */
 212   1,                                    /* vec_store_cost.  */
 213   3,                                    /* cond_taken_branch_cost.  */
 214   1,                                    /* cond_not_taken_branch_cost.  */
 215 };
 216
 217 static const
 218 struct processor_costs i486_cost = {    /* 486 specific costs */
 219   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 220   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 221   COSTS_N_INSNS (3),                    /* variable shift costs */
 222   COSTS_N_INSNS (2),                    /* constant shift costs */
 223   {COSTS_N_INSNS (12),                  /* cost of starting multiply for QI */
 224    COSTS_N_INSNS (12),                  /*                               HI */
 225    COSTS_N_INSNS (12),                  /*                               SI */
 226    COSTS_N_INSNS (12),                  /*                               DI */
 227    COSTS_N_INSNS (12)},                 /*                               other */
 228   1,                                    /* cost of multiply per each bit set */
 229   {COSTS_N_INSNS (40),                  /* cost of a divide/mod for QI */
 230    COSTS_N_INSNS (40),                  /*                          HI */
 231    COSTS_N_INSNS (40),                  /*                          SI */
 232    COSTS_N_INSNS (40),                  /*                          DI */
 233    COSTS_N_INSNS (40)},                 /*                          other */
 234   COSTS_N_INSNS (3),                    /* cost of movsx */
 235   COSTS_N_INSNS (2),                    /* cost of movzx */
 236   15,                                   /* "large" insn */
 237   3,                                    /* MOVE_RATIO */
 238   4,                                    /* cost for loading QImode using movzbl */
 239   {2, 4, 2},                            /* cost of loading integer registers
 240                                            in QImode, HImode and SImode.
 241                                            Relative to reg-reg move (2).  */
 242   {2, 4, 2},                            /* cost of storing integer registers */
 243   2,                                    /* cost of reg,reg fld/fst */
 244   {8, 8, 8},                            /* cost of loading fp registers
 245                                            in SFmode, DFmode and XFmode */
 246   {8, 8, 8},                            /* cost of storing fp registers
 247                                            in SFmode, DFmode and XFmode */
 248   2,                                    /* cost of moving MMX register */
 249   {4, 8},                               /* cost of loading MMX registers
 250                                            in SImode and DImode */
 251   {4, 8},                               /* cost of storing MMX registers
 252                                            in SImode and DImode */
 253   2,                                    /* cost of moving SSE register */
 254   {4, 8, 16},                           /* cost of loading SSE registers
 255                                            in SImode, DImode and TImode */
 256   {4, 8, 16},                           /* cost of storing SSE registers
 257                                            in SImode, DImode and TImode */
 258   3,                                    /* MMX or SSE register to integer */
 259   4,                                    /* size of l1 cache.  486 has 8kB cache
 260                                            shared for code and data, so 4kB is
 261                                            not really precise.  */
 262   4,                                    /* size of l2 cache  */
 263   0,                                    /* size of prefetch block */
 264   0,                                    /* number of parallel prefetches */
 265   1,                                    /* Branch cost */
 266   COSTS_N_INSNS (8),                    /* cost of FADD and FSUB insns.  */
 267   COSTS_N_INSNS (16),                   /* cost of FMUL instruction.  */
 268   COSTS_N_INSNS (73),                   /* cost of FDIV instruction.  */
 269   COSTS_N_INSNS (3),                    /* cost of FABS instruction.  */
 270   COSTS_N_INSNS (3),                    /* cost of FCHS instruction.  */
 271   COSTS_N_INSNS (83),                   /* cost of FSQRT instruction.  */
 272   {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
 273    DUMMY_STRINGOP_ALGS},
 274   {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
 275    DUMMY_STRINGOP_ALGS},
 276   1,                                    /* scalar_stmt_cost.  */
 277   1,                                    /* scalar load_cost.  */
 278   1,                                    /* scalar_store_cost.  */
 279   1,                                    /* vec_stmt_cost.  */
 280   1,                                    /* vec_to_scalar_cost.  */
 281   1,                                    /* scalar_to_vec_cost.  */
 282   1,                                    /* vec_align_load_cost.  */
 283   2,                                    /* vec_unalign_load_cost.  */
 284   1,                                    /* vec_store_cost.  */
 285   3,                                    /* cond_taken_branch_cost.  */
 286   1,                                    /* cond_not_taken_branch_cost.  */
 287 };
 288
 289 static const
 290 struct processor_costs pentium_cost = {
 291   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 292   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 293   COSTS_N_INSNS (4),                    /* variable shift costs */
 294   COSTS_N_INSNS (1),                    /* constant shift costs */
 295   {COSTS_N_INSNS (11),                  /* cost of starting multiply for QI */
 296    COSTS_N_INSNS (11),                  /*                               HI */
 297    COSTS_N_INSNS (11),                  /*                               SI */
 298    COSTS_N_INSNS (11),                  /*                               DI */
 299    COSTS_N_INSNS (11)},                 /*                               other */
 300   0,                                    /* cost of multiply per each bit set */
 301   {COSTS_N_INSNS (25),                  /* cost of a divide/mod for QI */
 302    COSTS_N_INSNS (25),                  /*                          HI */
 303    COSTS_N_INSNS (25),                  /*                          SI */
 304    COSTS_N_INSNS (25),                  /*                          DI */
 305    COSTS_N_INSNS (25)},                 /*                          other */
 306   COSTS_N_INSNS (3),                    /* cost of movsx */
 307   COSTS_N_INSNS (2),                    /* cost of movzx */
 308   8,                                    /* "large" insn */
 309   6,                                    /* MOVE_RATIO */
 310   6,                                    /* cost for loading QImode using movzbl */
 311   {2, 4, 2},                            /* cost of loading integer registers
 312                                            in QImode, HImode and SImode.
 313                                            Relative to reg-reg move (2).  */
 314   {2, 4, 2},                            /* cost of storing integer registers */
 315   2,                                    /* cost of reg,reg fld/fst */
 316   {2, 2, 6},                            /* cost of loading fp registers
 317                                            in SFmode, DFmode and XFmode */
 318   {4, 4, 6},                            /* cost of storing fp registers
 319                                            in SFmode, DFmode and XFmode */
 320   8,                                    /* cost of moving MMX register */
 321   {8, 8},                               /* cost of loading MMX registers
 322                                            in SImode and DImode */
 323   {8, 8},                               /* cost of storing MMX registers
 324                                            in SImode and DImode */
 325   2,                                    /* cost of moving SSE register */
 326   {4, 8, 16},                           /* cost of loading SSE registers
 327                                            in SImode, DImode and TImode */
 328   {4, 8, 16},                           /* cost of storing SSE registers
 329                                            in SImode, DImode and TImode */
 330   3,                                    /* MMX or SSE register to integer */
 331   8,                                    /* size of l1 cache.  */
 332   8,                                    /* size of l2 cache  */
 333   0,                                    /* size of prefetch block */
 334   0,                                    /* number of parallel prefetches */
 335   2,                                    /* Branch cost */
 336   COSTS_N_INSNS (3),                    /* cost of FADD and FSUB insns.  */
 337   COSTS_N_INSNS (3),                    /* cost of FMUL instruction.  */
 338   COSTS_N_INSNS (39),                   /* cost of FDIV instruction.  */
 339   COSTS_N_INSNS (1),                    /* cost of FABS instruction.  */
 340   COSTS_N_INSNS (1),                    /* cost of FCHS instruction.  */
 341   COSTS_N_INSNS (70),                   /* cost of FSQRT instruction.  */
 342   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 343    DUMMY_STRINGOP_ALGS},
 344   {{libcall, {{-1, rep_prefix_4_byte}}},
 345    DUMMY_STRINGOP_ALGS},
 346   1,                                    /* scalar_stmt_cost.  */
 347   1,                                    /* scalar load_cost.  */
 348   1,                                    /* scalar_store_cost.  */
 349   1,                                    /* vec_stmt_cost.  */
 350   1,                                    /* vec_to_scalar_cost.  */
 351   1,                                    /* scalar_to_vec_cost.  */
 352   1,                                    /* vec_align_load_cost.  */
 353   2,                                    /* vec_unalign_load_cost.  */
 354   1,                                    /* vec_store_cost.  */
 355   3,                                    /* cond_taken_branch_cost.  */
 356   1,                                    /* cond_not_taken_branch_cost.  */
 357 };
 358
 359 static const
 360 struct processor_costs pentiumpro_cost = {
 361   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 362   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 363   COSTS_N_INSNS (1),                    /* variable shift costs */
 364   COSTS_N_INSNS (1),                    /* constant shift costs */
 365   {COSTS_N_INSNS (4),                   /* cost of starting multiply for QI */
 366    COSTS_N_INSNS (4),                   /*                               HI */
 367    COSTS_N_INSNS (4),                   /*                               SI */
 368    COSTS_N_INSNS (4),                   /*                               DI */
 369    COSTS_N_INSNS (4)},                  /*                               other */
 370   0,                                    /* cost of multiply per each bit set */
 371   {COSTS_N_INSNS (17),                  /* cost of a divide/mod for QI */
 372    COSTS_N_INSNS (17),                  /*                          HI */
 373    COSTS_N_INSNS (17),                  /*                          SI */
 374    COSTS_N_INSNS (17),                  /*                          DI */
 375    COSTS_N_INSNS (17)},                 /*                          other */
 376   COSTS_N_INSNS (1),                    /* cost of movsx */
 377   COSTS_N_INSNS (1),                    /* cost of movzx */
 378   8,                                    /* "large" insn */
 379   6,                                    /* MOVE_RATIO */
 380   2,                                    /* cost for loading QImode using movzbl */
 381   {4, 4, 4},                            /* cost of loading integer registers
 382                                            in QImode, HImode and SImode.
 383                                            Relative to reg-reg move (2).  */
 384   {2, 2, 2},                            /* cost of storing integer registers */
 385   2,                                    /* cost of reg,reg fld/fst */
 386   {2, 2, 6},                            /* cost of loading fp registers
 387                                            in SFmode, DFmode and XFmode */
 388   {4, 4, 6},                            /* cost of storing fp registers
 389                                            in SFmode, DFmode and XFmode */
 390   2,                                    /* cost of moving MMX register */
 391   {2, 2},                               /* cost of loading MMX registers
 392                                            in SImode and DImode */
 393   {2, 2},                               /* cost of storing MMX registers
 394                                            in SImode and DImode */
 395   2,                                    /* cost of moving SSE register */
 396   {2, 2, 8},                            /* cost of loading SSE registers
 397                                            in SImode, DImode and TImode */
 398   {2, 2, 8},                            /* cost of storing SSE registers
 399                                            in SImode, DImode and TImode */
 400   3,                                    /* MMX or SSE register to integer */
 401   8,                                    /* size of l1 cache.  */
 402   256,                                  /* size of l2 cache  */
 403   32,                                   /* size of prefetch block */
 404   6,                                    /* number of parallel prefetches */
 405   2,                                    /* Branch cost */
 406   COSTS_N_INSNS (3),                    /* cost of FADD and FSUB insns.  */
 407   COSTS_N_INSNS (5),                    /* cost of FMUL instruction.  */
 408   COSTS_N_INSNS (56),                   /* cost of FDIV instruction.  */
 409   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 410   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 411   COSTS_N_INSNS (56),                   /* cost of FSQRT instruction.  */
 412   /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
 413      the alignment).  For small blocks inline loop is still a noticeable win, for bigger
 414      blocks either rep movsl or rep movsb is way to go.  Rep movsb has apparently
 415      more expensive startup time in CPU, but after 4K the difference is down in the noise.
 416    */
 417   {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
 418                         {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
 419    DUMMY_STRINGOP_ALGS},
 420   {{rep_prefix_4_byte, {{1024, unrolled_loop},
 421                         {8192, rep_prefix_4_byte}, {-1, libcall}}},
 422    DUMMY_STRINGOP_ALGS},
 423   1,                                    /* scalar_stmt_cost.  */
 424   1,                                    /* scalar load_cost.  */
 425   1,                                    /* scalar_store_cost.  */
 426   1,                                    /* vec_stmt_cost.  */
 427   1,                                    /* vec_to_scalar_cost.  */
 428   1,                                    /* scalar_to_vec_cost.  */
 429   1,                                    /* vec_align_load_cost.  */
 430   2,                                    /* vec_unalign_load_cost.  */
 431   1,                                    /* vec_store_cost.  */
 432   3,                                    /* cond_taken_branch_cost.  */
 433   1,                                    /* cond_not_taken_branch_cost.  */
 434 };
 435
 436 static const
 437 struct processor_costs geode_cost = {
 438   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 439   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 440   COSTS_N_INSNS (2),                    /* variable shift costs */
 441   COSTS_N_INSNS (1),                    /* constant shift costs */
 442   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 443    COSTS_N_INSNS (4),                   /*                               HI */
 444    COSTS_N_INSNS (7),                   /*                               SI */
 445    COSTS_N_INSNS (7),                   /*                               DI */
 446    COSTS_N_INSNS (7)},                  /*                               other */
 447   0,                                    /* cost of multiply per each bit set */
 448   {COSTS_N_INSNS (15),                  /* cost of a divide/mod for QI */
 449    COSTS_N_INSNS (23),                  /*                          HI */
 450    COSTS_N_INSNS (39),                  /*                          SI */
 451    COSTS_N_INSNS (39),                  /*                          DI */
 452    COSTS_N_INSNS (39)},                 /*                          other */
 453   COSTS_N_INSNS (1),                    /* cost of movsx */
 454   COSTS_N_INSNS (1),                    /* cost of movzx */
 455   8,                                    /* "large" insn */
 456   4,                                    /* MOVE_RATIO */
 457   1,                                    /* cost for loading QImode using movzbl */
 458   {1, 1, 1},                            /* cost of loading integer registers
 459                                            in QImode, HImode and SImode.
 460                                            Relative to reg-reg move (2).  */
 461   {1, 1, 1},                            /* cost of storing integer registers */
 462   1,                                    /* cost of reg,reg fld/fst */
 463   {1, 1, 1},                            /* cost of loading fp registers
 464                                            in SFmode, DFmode and XFmode */
 465   {4, 6, 6},                            /* cost of storing fp registers
 466                                            in SFmode, DFmode and XFmode */
 467
 468   1,                                    /* cost of moving MMX register */
 469   {1, 1},                               /* cost of loading MMX registers
 470                                            in SImode and DImode */
 471   {1, 1},                               /* cost of storing MMX registers
 472                                            in SImode and DImode */
 473   1,                                    /* cost of moving SSE register */
 474   {1, 1, 1},                            /* cost of loading SSE registers
 475                                            in SImode, DImode and TImode */
 476   {1, 1, 1},                            /* cost of storing SSE registers
 477                                            in SImode, DImode and TImode */
 478   1,                                    /* MMX or SSE register to integer */
 479   64,                                   /* size of l1 cache.  */
 480   128,                                  /* size of l2 cache.  */
 481   32,                                   /* size of prefetch block */
 482   1,                                    /* number of parallel prefetches */
 483   1,                                    /* Branch cost */
 484   COSTS_N_INSNS (6),                    /* cost of FADD and FSUB insns.  */
 485   COSTS_N_INSNS (11),                   /* cost of FMUL instruction.  */
 486   COSTS_N_INSNS (47),                   /* cost of FDIV instruction.  */
 487   COSTS_N_INSNS (1),                    /* cost of FABS instruction.  */
 488   COSTS_N_INSNS (1),                    /* cost of FCHS instruction.  */
 489   COSTS_N_INSNS (54),                   /* cost of FSQRT instruction.  */
 490   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 491    DUMMY_STRINGOP_ALGS},
 492   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 493    DUMMY_STRINGOP_ALGS},
 494   1,                                    /* scalar_stmt_cost.  */
 495   1,                                    /* scalar load_cost.  */
 496   1,                                    /* scalar_store_cost.  */
 497   1,                                    /* vec_stmt_cost.  */
 498   1,                                    /* vec_to_scalar_cost.  */
 499   1,                                    /* scalar_to_vec_cost.  */
 500   1,                                    /* vec_align_load_cost.  */
 501   2,                                    /* vec_unalign_load_cost.  */
 502   1,                                    /* vec_store_cost.  */
 503   3,                                    /* cond_taken_branch_cost.  */
 504   1,                                    /* cond_not_taken_branch_cost.  */
 505 };
 506
 507 static const
 508 struct processor_costs k6_cost = {
 509   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 510   COSTS_N_INSNS (2),                    /* cost of a lea instruction */
 511   COSTS_N_INSNS (1),                    /* variable shift costs */
 512   COSTS_N_INSNS (1),                    /* constant shift costs */
 513   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 514    COSTS_N_INSNS (3),                   /*                               HI */
 515    COSTS_N_INSNS (3),                   /*                               SI */
 516    COSTS_N_INSNS (3),                   /*                               DI */
 517    COSTS_N_INSNS (3)},                  /*                               other */
 518   0,                                    /* cost of multiply per each bit set */
 519   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
 520    COSTS_N_INSNS (18),                  /*                          HI */
 521    COSTS_N_INSNS (18),                  /*                          SI */
 522    COSTS_N_INSNS (18),                  /*                          DI */
 523    COSTS_N_INSNS (18)},                 /*                          other */
 524   COSTS_N_INSNS (2),                    /* cost of movsx */
 525   COSTS_N_INSNS (2),                    /* cost of movzx */
 526   8,                                    /* "large" insn */
 527   4,                                    /* MOVE_RATIO */
 528   3,                                    /* cost for loading QImode using movzbl */
 529   {4, 5, 4},                            /* cost of loading integer registers
 530                                            in QImode, HImode and SImode.
 531                                            Relative to reg-reg move (2).  */
 532   {2, 3, 2},                            /* cost of storing integer registers */
 533   4,                                    /* cost of reg,reg fld/fst */
 534   {6, 6, 6},                            /* cost of loading fp registers
 535                                            in SFmode, DFmode and XFmode */
 536   {4, 4, 4},                            /* cost of storing fp registers
 537                                            in SFmode, DFmode and XFmode */
 538   2,                                    /* cost of moving MMX register */
 539   {2, 2},                               /* cost of loading MMX registers
 540                                            in SImode and DImode */
 541   {2, 2},                               /* cost of storing MMX registers
 542                                            in SImode and DImode */
 543   2,                                    /* cost of moving SSE register */
 544   {2, 2, 8},                            /* cost of loading SSE registers
 545                                            in SImode, DImode and TImode */
 546   {2, 2, 8},                            /* cost of storing SSE registers
 547                                            in SImode, DImode and TImode */
 548   6,                                    /* MMX or SSE register to integer */
 549   32,                                   /* size of l1 cache.  */
 550   32,                                   /* size of l2 cache.  Some models
 551                                            have integrated l2 cache, but
 552                                            optimizing for k6 is not important
 553                                            enough to worry about that.  */
 554   32,                                   /* size of prefetch block */
 555   1,                                    /* number of parallel prefetches */
 556   1,                                    /* Branch cost */
 557   COSTS_N_INSNS (2),                    /* cost of FADD and FSUB insns.  */
 558   COSTS_N_INSNS (2),                    /* cost of FMUL instruction.  */
 559   COSTS_N_INSNS (56),                   /* cost of FDIV instruction.  */
 560   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 561   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 562   COSTS_N_INSNS (56),                   /* cost of FSQRT instruction.  */
 563   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 564    DUMMY_STRINGOP_ALGS},
 565   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 566    DUMMY_STRINGOP_ALGS},
 567   1,                                    /* scalar_stmt_cost.  */
 568   1,                                    /* scalar load_cost.  */
 569   1,                                    /* scalar_store_cost.  */
 570   1,                                    /* vec_stmt_cost.  */
 571   1,                                    /* vec_to_scalar_cost.  */
 572   1,                                    /* scalar_to_vec_cost.  */
 573   1,                                    /* vec_align_load_cost.  */
 574   2,                                    /* vec_unalign_load_cost.  */
 575   1,                                    /* vec_store_cost.  */
 576   3,                                    /* cond_taken_branch_cost.  */
 577   1,                                    /* cond_not_taken_branch_cost.  */
 578 };
 579
 580 static const
 581 struct processor_costs athlon_cost = {
 582   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 583   COSTS_N_INSNS (2),                    /* cost of a lea instruction */
 584   COSTS_N_INSNS (1),                    /* variable shift costs */
 585   COSTS_N_INSNS (1),                    /* constant shift costs */
 586   {COSTS_N_INSNS (5),                   /* cost of starting multiply for QI */
 587    COSTS_N_INSNS (5),                   /*                               HI */
 588    COSTS_N_INSNS (5),                   /*                               SI */
 589    COSTS_N_INSNS (5),                   /*                               DI */
 590    COSTS_N_INSNS (5)},                  /*                               other */
 591   0,                                    /* cost of multiply per each bit set */
 592   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
 593    COSTS_N_INSNS (26),                  /*                          HI */
 594    COSTS_N_INSNS (42),                  /*                          SI */
 595    COSTS_N_INSNS (74),                  /*                          DI */
 596    COSTS_N_INSNS (74)},                 /*                          other */
 597   COSTS_N_INSNS (1),                    /* cost of movsx */
 598   COSTS_N_INSNS (1),                    /* cost of movzx */
 599   8,                                    /* "large" insn */
 600   9,                                    /* MOVE_RATIO */
 601   4,                                    /* cost for loading QImode using movzbl */
 602   {3, 4, 3},                            /* cost of loading integer registers
 603                                            in QImode, HImode and SImode.
 604                                            Relative to reg-reg move (2).  */
 605   {3, 4, 3},                            /* cost of storing integer registers */
 606   4,                                    /* cost of reg,reg fld/fst */
 607   {4, 4, 12},                           /* cost of loading fp registers
 608                                            in SFmode, DFmode and XFmode */
 609   {6, 6, 8},                            /* cost of storing fp registers
 610                                            in SFmode, DFmode and XFmode */
 611   2,                                    /* cost of moving MMX register */
 612   {4, 4},                               /* cost of loading MMX registers
 613                                            in SImode and DImode */
 614   {4, 4},                               /* cost of storing MMX registers
 615                                            in SImode and DImode */
 616   2,                                    /* cost of moving SSE register */
 617   {4, 4, 6},                            /* cost of loading SSE registers
 618                                            in SImode, DImode and TImode */
 619   {4, 4, 5},                            /* cost of storing SSE registers
 620                                            in SImode, DImode and TImode */
 621   5,                                    /* MMX or SSE register to integer */
 622   64,                                   /* size of l1 cache.  */
 623   256,                                  /* size of l2 cache.  */
 624   64,                                   /* size of prefetch block */
 625   6,                                    /* number of parallel prefetches */
 626   5,                                    /* Branch cost */
 627   COSTS_N_INSNS (4),                    /* cost of FADD and FSUB insns.  */
 628   COSTS_N_INSNS (4),                    /* cost of FMUL instruction.  */
 629   COSTS_N_INSNS (24),                   /* cost of FDIV instruction.  */
 630   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 631   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 632   COSTS_N_INSNS (35),                   /* cost of FSQRT instruction.  */
 633   /* For some reason, Athlon deals better with REP prefix (relative to loops)
 634      compared to K8. Alignment becomes important after 8 bytes for memcpy and
 635      128 bytes for memset.  */
 636   {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
 637    DUMMY_STRINGOP_ALGS},
 638   {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
 639    DUMMY_STRINGOP_ALGS},
 640   1,                                    /* scalar_stmt_cost.  */
 641   1,                                    /* scalar load_cost.  */
 642   1,                                    /* scalar_store_cost.  */
 643   1,                                    /* vec_stmt_cost.  */
 644   1,                                    /* vec_to_scalar_cost.  */
 645   1,                                    /* scalar_to_vec_cost.  */
 646   1,                                    /* vec_align_load_cost.  */
 647   2,                                    /* vec_unalign_load_cost.  */
 648   1,                                    /* vec_store_cost.  */
 649   3,                                    /* cond_taken_branch_cost.  */
 650   1,                                    /* cond_not_taken_branch_cost.  */
 651 };
 652
 653 static const
 654 struct processor_costs k8_cost = {
 655   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 656   COSTS_N_INSNS (2),                    /* cost of a lea instruction */
 657   COSTS_N_INSNS (1),                    /* variable shift costs */
 658   COSTS_N_INSNS (1),                    /* constant shift costs */
 659   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 660    COSTS_N_INSNS (4),                   /*                               HI */
 661    COSTS_N_INSNS (3),                   /*                               SI */
 662    COSTS_N_INSNS (4),                   /*                               DI */
 663    COSTS_N_INSNS (5)},                  /*                               other */
 664   0,                                    /* cost of multiply per each bit set */
 665   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
 666    COSTS_N_INSNS (26),                  /*                          HI */
 667    COSTS_N_INSNS (42),                  /*                          SI */
 668    COSTS_N_INSNS (74),                  /*                          DI */
 669    COSTS_N_INSNS (74)},                 /*                          other */
 670   COSTS_N_INSNS (1),                    /* cost of movsx */
 671   COSTS_N_INSNS (1),                    /* cost of movzx */
 672   8,                                    /* "large" insn */
 673   9,                                    /* MOVE_RATIO */
 674   4,                                    /* cost for loading QImode using movzbl */
 675   {3, 4, 3},                            /* cost of loading integer registers
 676                                            in QImode, HImode and SImode.
 677                                            Relative to reg-reg move (2).  */
 678   {3, 4, 3},                            /* cost of storing integer registers */
 679   4,                                    /* cost of reg,reg fld/fst */
 680   {4, 4, 12},                           /* cost of loading fp registers
 681                                            in SFmode, DFmode and XFmode */
 682   {6, 6, 8},                            /* cost of storing fp registers
 683                                            in SFmode, DFmode and XFmode */
 684   2,                                    /* cost of moving MMX register */
 685   {3, 3},                               /* cost of loading MMX registers
 686                                            in SImode and DImode */
 687   {4, 4},                               /* cost of storing MMX registers
 688                                            in SImode and DImode */
 689   2,                                    /* cost of moving SSE register */
 690   {4, 3, 6},                            /* cost of loading SSE registers
 691                                            in SImode, DImode and TImode */
 692   {4, 4, 5},                            /* cost of storing SSE registers
 693                                            in SImode, DImode and TImode */
 694   5,                                    /* MMX or SSE register to integer */
 695   64,                                   /* size of l1 cache.  */
 696   512,                                  /* size of l2 cache.  */
 697   64,                                   /* size of prefetch block */
 698   /* New AMD processors never drop prefetches; if they cannot be performed
 699      immediately, they are queued.  We set number of simultaneous prefetches
 700      to a large constant to reflect this (it probably is not a good idea not
 701      to limit number of prefetches at all, as their execution also takes some
 702      time).  */
 703   100,                                  /* number of parallel prefetches */
 704   3,                                    /* Branch cost */
 705   COSTS_N_INSNS (4),                    /* cost of FADD and FSUB insns.  */
 706   COSTS_N_INSNS (4),                    /* cost of FMUL instruction.  */
 707   COSTS_N_INSNS (19),                   /* cost of FDIV instruction.  */
 708   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 709   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 710   COSTS_N_INSNS (35),                   /* cost of FSQRT instruction.  */
 711   /* K8 has optimized REP instruction for medium sized blocks, but for very small
 712      blocks it is better to use loop. For large blocks, libcall can do
 713      nontemporary accesses and beat inline considerably.  */
 714   {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
 715    {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 716   {{libcall, {{8, loop}, {24, unrolled_loop},
 717               {2048, rep_prefix_4_byte}, {-1, libcall}}},
 718    {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 719   4,                                    /* scalar_stmt_cost.  */
 720   2,                                    /* scalar load_cost.  */
 721   2,                                    /* scalar_store_cost.  */
 722   5,                                    /* vec_stmt_cost.  */
 723   0,                                    /* vec_to_scalar_cost.  */
 724   2,                                    /* scalar_to_vec_cost.  */
 725   2,                                    /* vec_align_load_cost.  */
 726   3,                                    /* vec_unalign_load_cost.  */
 727   3,                                    /* vec_store_cost.  */
 728   3,                                    /* cond_taken_branch_cost.  */
 729   2,                                    /* cond_not_taken_branch_cost.  */
 730 };
 731
 732 struct processor_costs amdfam10_cost = {
 733   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 734   COSTS_N_INSNS (2),                    /* cost of a lea instruction */
 735   COSTS_N_INSNS (1),                    /* variable shift costs */
 736   COSTS_N_INSNS (1),                    /* constant shift costs */
 737   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 738    COSTS_N_INSNS (4),                   /*                               HI */
 739    COSTS_N_INSNS (3),                   /*                               SI */
 740    COSTS_N_INSNS (4),                   /*                               DI */
 741    COSTS_N_INSNS (5)},                  /*                               other */
 742   0,                                    /* cost of multiply per each bit set */
 743   {COSTS_N_INSNS (19),                  /* cost of a divide/mod for QI */
 744    COSTS_N_INSNS (35),                  /*                          HI */
 745    COSTS_N_INSNS (51),                  /*                          SI */
 746    COSTS_N_INSNS (83),                  /*                          DI */
 747    COSTS_N_INSNS (83)},                 /*                          other */
 748   COSTS_N_INSNS (1),                    /* cost of movsx */
 749   COSTS_N_INSNS (1),                    /* cost of movzx */
 750   8,                                    /* "large" insn */
 751   9,                                    /* MOVE_RATIO */
 752   4,                                    /* cost for loading QImode using movzbl */
 753   {3, 4, 3},                            /* cost of loading integer registers
 754                                            in QImode, HImode and SImode.
 755                                            Relative to reg-reg move (2).  */
 756   {3, 4, 3},                            /* cost of storing integer registers */
 757   4,                                    /* cost of reg,reg fld/fst */
 758   {4, 4, 12},                           /* cost of loading fp registers
 759                                            in SFmode, DFmode and XFmode */
 760   {6, 6, 8},                            /* cost of storing fp registers
 761                                            in SFmode, DFmode and XFmode */
 762   2,                                    /* cost of moving MMX register */
 763   {3, 3},                               /* cost of loading MMX registers
 764                                            in SImode and DImode */
 765   {4, 4},                               /* cost of storing MMX registers
 766                                            in SImode and DImode */
 767   2,                                    /* cost of moving SSE register */
 768   {4, 4, 3},                            /* cost of loading SSE registers
 769                                            in SImode, DImode and TImode */
 770   {4, 4, 5},                            /* cost of storing SSE registers
 771                                            in SImode, DImode and TImode */
 772   3,                                    /* MMX or SSE register to integer */
 773                                         /* On K8
 774                                             MOVD reg64, xmmreg  Double  FSTORE 4
 775                                             MOVD reg32, xmmreg  Double  FSTORE 4
 776                                            On AMDFAM10
 777                                             MOVD reg64, xmmreg  Double  FADD 3
 778                                                                 1/1  1/1
 779                                             MOVD reg32, xmmreg  Double  FADD 3
 780                                                                 1/1  1/1 */
 781   64,                                   /* size of l1 cache.  */
 782   512,                                  /* size of l2 cache.  */
 783   64,                                   /* size of prefetch block */
 784   /* New AMD processors never drop prefetches; if they cannot be performed
 785      immediately, they are queued.  We set number of simultaneous prefetches
 786      to a large constant to reflect this (it probably is not a good idea not
 787      to limit number of prefetches at all, as their execution also takes some
 788      time).  */
 789   100,                                  /* number of parallel prefetches */
 790   2,                                    /* Branch cost */
 791   COSTS_N_INSNS (4),                    /* cost of FADD and FSUB insns.  */
 792   COSTS_N_INSNS (4),                    /* cost of FMUL instruction.  */
 793   COSTS_N_INSNS (19),                   /* cost of FDIV instruction.  */
 794   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 795   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 796   COSTS_N_INSNS (35),                   /* cost of FSQRT instruction.  */
 797
 798   /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
 799      very small blocks it is better to use loop. For large blocks, libcall can
 800      do nontemporary accesses and beat inline considerably.  */
 801   {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
 802    {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 803   {{libcall, {{8, loop}, {24, unrolled_loop},
 804               {2048, rep_prefix_4_byte}, {-1, libcall}}},
 805    {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 806   4,                                    /* scalar_stmt_cost.  */
 807   2,                                    /* scalar load_cost.  */
 808   2,                                    /* scalar_store_cost.  */
 809   6,                                    /* vec_stmt_cost.  */
 810   0,                                    /* vec_to_scalar_cost.  */
 811   2,                                    /* scalar_to_vec_cost.  */
 812   2,                                    /* vec_align_load_cost.  */
 813   2,                                    /* vec_unalign_load_cost.  */
 814   2,                                    /* vec_store_cost.  */
 815   2,                                    /* cond_taken_branch_cost.  */
 816   1,                                    /* cond_not_taken_branch_cost.  */
 817 };
 818
 819 static const
 820 struct processor_costs pentium4_cost = {
 821   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 822   COSTS_N_INSNS (3),                    /* cost of a lea instruction */
 823   COSTS_N_INSNS (4),                    /* variable shift costs */
 824   COSTS_N_INSNS (4),                    /* constant shift costs */
 825   {COSTS_N_INSNS (15),                  /* cost of starting multiply for QI */
 826    COSTS_N_INSNS (15),                  /*                               HI */
 827    COSTS_N_INSNS (15),                  /*                               SI */
 828    COSTS_N_INSNS (15),                  /*                               DI */
 829    COSTS_N_INSNS (15)},                 /*                               other */
 830   0,                                    /* cost of multiply per each bit set */
 831   {COSTS_N_INSNS (56),                  /* cost of a divide/mod for QI */
 832    COSTS_N_INSNS (56),                  /*                          HI */
 833    COSTS_N_INSNS (56),                  /*                          SI */
 834    COSTS_N_INSNS (56),                  /*                          DI */
 835    COSTS_N_INSNS (56)},                 /*                          other */
 836   COSTS_N_INSNS (1),                    /* cost of movsx */
 837   COSTS_N_INSNS (1),                    /* cost of movzx */
 838   16,                                   /* "large" insn */
 839   6,                                    /* MOVE_RATIO */
 840   2,                                    /* cost for loading QImode using movzbl */
 841   {4, 5, 4},                            /* cost of loading integer registers
 842                                            in QImode, HImode and SImode.
 843                                            Relative to reg-reg move (2).  */
 844   {2, 3, 2},                            /* cost of storing integer registers */
 845   2,                                    /* cost of reg,reg fld/fst */
 846   {2, 2, 6},                            /* cost of loading fp registers
 847                                            in SFmode, DFmode and XFmode */
 848   {4, 4, 6},                            /* cost of storing fp registers
 849                                            in SFmode, DFmode and XFmode */
 850   2,                                    /* cost of moving MMX register */
 851   {2, 2},                               /* cost of loading MMX registers
 852                                            in SImode and DImode */
 853   {2, 2},                               /* cost of storing MMX registers
 854                                            in SImode and DImode */
 855   12,                                   /* cost of moving SSE register */
 856   {12, 12, 12},                         /* cost of loading SSE registers
 857                                            in SImode, DImode and TImode */
 858   {2, 2, 8},                            /* cost of storing SSE registers
 859                                            in SImode, DImode and TImode */
 860   10,                                   /* MMX or SSE register to integer */
 861   8,                                    /* size of l1 cache.  */
 862   256,                                  /* size of l2 cache.  */
 863   64,                                   /* size of prefetch block */
 864   6,                                    /* number of parallel prefetches */
 865   2,                                    /* Branch cost */
 866   COSTS_N_INSNS (5),                    /* cost of FADD and FSUB insns.  */
 867   COSTS_N_INSNS (7),                    /* cost of FMUL instruction.  */
 868   COSTS_N_INSNS (43),                   /* cost of FDIV instruction.  */
 869   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 870   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 871   COSTS_N_INSNS (43),                   /* cost of FSQRT instruction.  */
 872   {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
 873    DUMMY_STRINGOP_ALGS},
 874   {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
 875    {-1, libcall}}},
 876    DUMMY_STRINGOP_ALGS},
 877   1,                                    /* scalar_stmt_cost.  */
 878   1,                                    /* scalar load_cost.  */
 879   1,                                    /* scalar_store_cost.  */
 880   1,                                    /* vec_stmt_cost.  */
 881   1,                                    /* vec_to_scalar_cost.  */
 882   1,                                    /* scalar_to_vec_cost.  */
 883   1,                                    /* vec_align_load_cost.  */
 884   2,                                    /* vec_unalign_load_cost.  */
 885   1,                                    /* vec_store_cost.  */
 886   3,                                    /* cond_taken_branch_cost.  */
 887   1,                                    /* cond_not_taken_branch_cost.  */
 888 };
 889
 890 static const
 891 struct processor_costs nocona_cost = {
 892   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 893   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 894   COSTS_N_INSNS (1),                    /* variable shift costs */
 895   COSTS_N_INSNS (1),                    /* constant shift costs */
 896   {COSTS_N_INSNS (10),                  /* cost of starting multiply for QI */
 897    COSTS_N_INSNS (10),                  /*                               HI */
 898    COSTS_N_INSNS (10),                  /*                               SI */
 899    COSTS_N_INSNS (10),                  /*                               DI */
 900    COSTS_N_INSNS (10)},                 /*                               other */
 901   0,                                    /* cost of multiply per each bit set */
 902   {COSTS_N_INSNS (66),                  /* cost of a divide/mod for QI */
 903    COSTS_N_INSNS (66),                  /*                          HI */
 904    COSTS_N_INSNS (66),                  /*                          SI */
 905    COSTS_N_INSNS (66),                  /*                          DI */
 906    COSTS_N_INSNS (66)},                 /*                          other */
 907   COSTS_N_INSNS (1),                    /* cost of movsx */
 908   COSTS_N_INSNS (1),                    /* cost of movzx */
 909   16,                                   /* "large" insn */
 910   17,                                   /* MOVE_RATIO */
 911   4,                                    /* cost for loading QImode using movzbl */
 912   {4, 4, 4},                            /* cost of loading integer registers
 913                                            in QImode, HImode and SImode.
 914                                            Relative to reg-reg move (2).  */
 915   {4, 4, 4},                            /* cost of storing integer registers */
 916   3,                                    /* cost of reg,reg fld/fst */
 917   {12, 12, 12},                         /* cost of loading fp registers
 918                                            in SFmode, DFmode and XFmode */
 919   {4, 4, 4},                            /* cost of storing fp registers
 920                                            in SFmode, DFmode and XFmode */
 921   6,                                    /* cost of moving MMX register */
 922   {12, 12},                             /* cost of loading MMX registers
 923                                            in SImode and DImode */
 924   {12, 12},                             /* cost of storing MMX registers
 925                                            in SImode and DImode */
 926   6,                                    /* cost of moving SSE register */
 927   {12, 12, 12},                         /* cost of loading SSE registers
 928                                            in SImode, DImode and TImode */
 929   {12, 12, 12},                         /* cost of storing SSE registers
 930                                            in SImode, DImode and TImode */
 931   8,                                    /* MMX or SSE register to integer */
 932   8,                                    /* size of l1 cache.  */
 933   1024,                                 /* size of l2 cache.  */
 934   128,                                  /* size of prefetch block */
 935   8,                                    /* number of parallel prefetches */
 936   1,                                    /* Branch cost */
 937   COSTS_N_INSNS (6),                    /* cost of FADD and FSUB insns.  */
 938   COSTS_N_INSNS (8),                    /* cost of FMUL instruction.  */
 939   COSTS_N_INSNS (40),                   /* cost of FDIV instruction.  */
 940   COSTS_N_INSNS (3),                    /* cost of FABS instruction.  */
 941   COSTS_N_INSNS (3),                    /* cost of FCHS instruction.  */
 942   COSTS_N_INSNS (44),                   /* cost of FSQRT instruction.  */
 943   {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
 944    {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
 945               {100000, unrolled_loop}, {-1, libcall}}}},
 946   {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
 947    {-1, libcall}}},
 948    {libcall, {{24, loop}, {64, unrolled_loop},
 949               {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 950   1,                                    /* scalar_stmt_cost.  */
 951   1,                                    /* scalar load_cost.  */
 952   1,                                    /* scalar_store_cost.  */
 953   1,                                    /* vec_stmt_cost.  */
 954   1,                                    /* vec_to_scalar_cost.  */
 955   1,                                    /* scalar_to_vec_cost.  */
 956   1,                                    /* vec_align_load_cost.  */
 957   2,                                    /* vec_unalign_load_cost.  */
 958   1,                                    /* vec_store_cost.  */
 959   3,                                    /* cond_taken_branch_cost.  */
 960   1,                                    /* cond_not_taken_branch_cost.  */
 961 };
 962
 963 static const
 964 struct processor_costs core2_cost = {
 965   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 966   COSTS_N_INSNS (1) + 1,                /* cost of a lea instruction */
 967   COSTS_N_INSNS (1),                    /* variable shift costs */
 968   COSTS_N_INSNS (1),                    /* constant shift costs */
 969   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 970    COSTS_N_INSNS (3),                   /*                               HI */
 971    COSTS_N_INSNS (3),                   /*                               SI */
 972    COSTS_N_INSNS (3),                   /*                               DI */
 973    COSTS_N_INSNS (3)},                  /*                               other */
 974   0,                                    /* cost of multiply per each bit set */
 975   {COSTS_N_INSNS (22),                  /* cost of a divide/mod for QI */
 976    COSTS_N_INSNS (22),                  /*                          HI */
 977    COSTS_N_INSNS (22),                  /*                          SI */
 978    COSTS_N_INSNS (22),                  /*                          DI */
 979    COSTS_N_INSNS (22)},                 /*                          other */
 980   COSTS_N_INSNS (1),                    /* cost of movsx */
 981   COSTS_N_INSNS (1),                    /* cost of movzx */
 982   8,                                    /* "large" insn */
 983   16,                                   /* MOVE_RATIO */
 984   2,                                    /* cost for loading QImode using movzbl */
 985   {6, 6, 6},                            /* cost of loading integer registers
 986                                            in QImode, HImode and SImode.
 987                                            Relative to reg-reg move (2).  */
 988   {4, 4, 4},                            /* cost of storing integer registers */
 989   2,                                    /* cost of reg,reg fld/fst */
 990   {6, 6, 6},                            /* cost of loading fp registers
 991                                            in SFmode, DFmode and XFmode */
 992   {4, 4, 4},                            /* cost of loading integer registers */
 993   2,                                    /* cost of moving MMX register */
 994   {6, 6},                               /* cost of loading MMX registers
 995                                            in SImode and DImode */
 996   {4, 4},                               /* cost of storing MMX registers
 997                                            in SImode and DImode */
 998   2,                                    /* cost of moving SSE register */
 999   {6, 6, 6},                            /* cost of loading SSE registers
1000                                            in SImode, DImode and TImode */
1001   {4, 4, 4},                            /* cost of storing SSE registers
1002                                            in SImode, DImode and TImode */
1003   2,                                    /* MMX or SSE register to integer */
1004   32,                                   /* size of l1 cache.  */
1005   2048,                                 /* size of l2 cache.  */
1006   128,                                  /* size of prefetch block */
1007   8,                                    /* number of parallel prefetches */
1008   3,                                    /* Branch cost */
1009   COSTS_N_INSNS (3),                    /* cost of FADD and FSUB insns.  */
1010   COSTS_N_INSNS (5),                    /* cost of FMUL instruction.  */
1011   COSTS_N_INSNS (32),                   /* cost of FDIV instruction.  */
1012   COSTS_N_INSNS (1),                    /* cost of FABS instruction.  */
1013   COSTS_N_INSNS (1),                    /* cost of FCHS instruction.  */
1014   COSTS_N_INSNS (58),                   /* cost of FSQRT instruction.  */
1015   {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1016    {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1017               {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1018   {{libcall, {{8, loop}, {15, unrolled_loop},
1019               {2048, rep_prefix_4_byte}, {-1, libcall}}},
1020    {libcall, {{24, loop}, {32, unrolled_loop},
1021               {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022   1,                                    /* scalar_stmt_cost.  */
1023   1,                                    /* scalar load_cost.  */
1024   1,                                    /* scalar_store_cost.  */
1025   1,                                    /* vec_stmt_cost.  */
1026   1,                                    /* vec_to_scalar_cost.  */
1027   1,                                    /* scalar_to_vec_cost.  */
1028   1,                                    /* vec_align_load_cost.  */
1029   2,                                    /* vec_unalign_load_cost.  */
1030   1,                                    /* vec_store_cost.  */
1031   3,                                    /* cond_taken_branch_cost.  */
1032   1,                                    /* cond_not_taken_branch_cost.  */
1033 };
1034
1035 /* Generic64 should produce code tuned for Nocona and K8.  */
1036 static const
1037 struct processor_costs generic64_cost = {
1038   COSTS_N_INSNS (1),                    /* cost of an add instruction */
1039   /* On all chips taken into consideration lea is 2 cycles and more.  With
1040      this cost however our current implementation of synth_mult results in
1041      use of unnecessary temporary registers causing regression on several
1042      SPECfp benchmarks.  */
1043   COSTS_N_INSNS (1) + 1,                /* cost of a lea instruction */
1044   COSTS_N_INSNS (1),                    /* variable shift costs */
1045   COSTS_N_INSNS (1),                    /* constant shift costs */
1046   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
1047    COSTS_N_INSNS (4),                   /*                               HI */
1048    COSTS_N_INSNS (3),                   /*                               SI */
1049    COSTS_N_INSNS (4),                   /*                               DI */
1050    COSTS_N_INSNS (2)},                  /*                               other */
1051   0,                                    /* cost of multiply per each bit set */
1052   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
1053    COSTS_N_INSNS (26),                  /*                          HI */
1054    COSTS_N_INSNS (42),                  /*                          SI */
1055    COSTS_N_INSNS (74),                  /*                          DI */
1056    COSTS_N_INSNS (74)},                 /*                          other */
1057   COSTS_N_INSNS (1),                    /* cost of movsx */
1058   COSTS_N_INSNS (1),                    /* cost of movzx */
1059   8,                                    /* "large" insn */
1060   17,                                   /* MOVE_RATIO */
1061   4,                                    /* cost for loading QImode using movzbl */
1062   {4, 4, 4},                            /* cost of loading integer registers
1063                                            in QImode, HImode and SImode.
1064                                            Relative to reg-reg move (2).  */
1065   {4, 4, 4},                            /* cost of storing integer registers */
1066   4,                                    /* cost of reg,reg fld/fst */
1067   {12, 12, 12},                         /* cost of loading fp registers
1068                                            in SFmode, DFmode and XFmode */
1069   {6, 6, 8},                            /* cost of storing fp registers
1070                                            in SFmode, DFmode and XFmode */
1071   2,                                    /* cost of moving MMX register */
1072   {8, 8},                               /* cost of loading MMX registers
1073                                            in SImode and DImode */
1074   {8, 8},                               /* cost of storing MMX registers
1075                                            in SImode and DImode */
1076   2,                                    /* cost of moving SSE register */
1077   {8, 8, 8},                            /* cost of loading SSE registers
1078                                            in SImode, DImode and TImode */
1079   {8, 8, 8},                            /* cost of storing SSE registers
1080                                            in SImode, DImode and TImode */
1081   5,                                    /* MMX or SSE register to integer */
1082   32,                                   /* size of l1 cache.  */
1083   512,                                  /* size of l2 cache.  */
1084   64,                                   /* size of prefetch block */
1085   6,                                    /* number of parallel prefetches */
1086   /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1087      is increased to perhaps more appropriate value of 5.  */
1088   3,                                    /* Branch cost */
1089   COSTS_N_INSNS (8),                    /* cost of FADD and FSUB insns.  */
1090   COSTS_N_INSNS (8),                    /* cost of FMUL instruction.  */
1091   COSTS_N_INSNS (20),                   /* cost of FDIV instruction.  */
1092   COSTS_N_INSNS (8),                    /* cost of FABS instruction.  */
1093   COSTS_N_INSNS (8),                    /* cost of FCHS instruction.  */
1094   COSTS_N_INSNS (40),                   /* cost of FSQRT instruction.  */
1095   {DUMMY_STRINGOP_ALGS,
1096    {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1097   {DUMMY_STRINGOP_ALGS,
1098    {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099   1,                                    /* scalar_stmt_cost.  */
1100   1,                                    /* scalar load_cost.  */
1101   1,                                    /* scalar_store_cost.  */
1102   1,                                    /* vec_stmt_cost.  */
1103   1,                                    /* vec_to_scalar_cost.  */
1104   1,                                    /* scalar_to_vec_cost.  */
1105   1,                                    /* vec_align_load_cost.  */
1106   2,                                    /* vec_unalign_load_cost.  */
1107   1,                                    /* vec_store_cost.  */
1108   3,                                    /* cond_taken_branch_cost.  */
1109   1,                                    /* cond_not_taken_branch_cost.  */
1110 };
1111
1112 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8.  */
1113 static const
1114 struct processor_costs generic32_cost = {
1115   COSTS_N_INSNS (1),                    /* cost of an add instruction */
1116   COSTS_N_INSNS (1) + 1,                /* cost of a lea instruction */
1117   COSTS_N_INSNS (1),                    /* variable shift costs */
1118   COSTS_N_INSNS (1),                    /* constant shift costs */
1119   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
1120    COSTS_N_INSNS (4),                   /*                               HI */
1121    COSTS_N_INSNS (3),                   /*                               SI */
1122    COSTS_N_INSNS (4),                   /*                               DI */
1123    COSTS_N_INSNS (2)},                  /*                               other */
1124   0,                                    /* cost of multiply per each bit set */
1125   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
1126    COSTS_N_INSNS (26),                  /*                          HI */
1127    COSTS_N_INSNS (42),                  /*                          SI */
1128    COSTS_N_INSNS (74),                  /*                          DI */
1129    COSTS_N_INSNS (74)},                 /*                          other */
1130   COSTS_N_INSNS (1),                    /* cost of movsx */
1131   COSTS_N_INSNS (1),                    /* cost of movzx */
1132   8,                                    /* "large" insn */
1133   17,                                   /* MOVE_RATIO */
1134   4,                                    /* cost for loading QImode using movzbl */
1135   {4, 4, 4},                            /* cost of loading integer registers
1136                                            in QImode, HImode and SImode.
1137                                            Relative to reg-reg move (2).  */
1138   {4, 4, 4},                            /* cost of storing integer registers */
1139   4,                                    /* cost of reg,reg fld/fst */
1140   {12, 12, 12},                         /* cost of loading fp registers
1141                                            in SFmode, DFmode and XFmode */
1142   {6, 6, 8},                            /* cost of storing fp registers
1143                                            in SFmode, DFmode and XFmode */
1144   2,                                    /* cost of moving MMX register */
1145   {8, 8},                               /* cost of loading MMX registers
1146                                            in SImode and DImode */
1147   {8, 8},                               /* cost of storing MMX registers
1148                                            in SImode and DImode */
1149   2,                                    /* cost of moving SSE register */
1150   {8, 8, 8},                            /* cost of loading SSE registers
1151                                            in SImode, DImode and TImode */
1152   {8, 8, 8},                            /* cost of storing SSE registers
1153                                            in SImode, DImode and TImode */
1154   5,                                    /* MMX or SSE register to integer */
1155   32,                                   /* size of l1 cache.  */
1156   256,                                  /* size of l2 cache.  */
1157   64,                                   /* size of prefetch block */
1158   6,                                    /* number of parallel prefetches */
1159   3,                                    /* Branch cost */
1160   COSTS_N_INSNS (8),                    /* cost of FADD and FSUB insns.  */
1161   COSTS_N_INSNS (8),                    /* cost of FMUL instruction.  */
1162   COSTS_N_INSNS (20),                   /* cost of FDIV instruction.  */
1163   COSTS_N_INSNS (8),                    /* cost of FABS instruction.  */
1164   COSTS_N_INSNS (8),                    /* cost of FCHS instruction.  */
1165   COSTS_N_INSNS (40),                   /* cost of FSQRT instruction.  */
1166   {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1167    DUMMY_STRINGOP_ALGS},
1168   {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1169    DUMMY_STRINGOP_ALGS},
1170   1,                                    /* scalar_stmt_cost.  */
1171   1,                                    /* scalar load_cost.  */
1172   1,                                    /* scalar_store_cost.  */
1173   1,                                    /* vec_stmt_cost.  */
1174   1,                                    /* vec_to_scalar_cost.  */
1175   1,                                    /* scalar_to_vec_cost.  */
1176   1,                                    /* vec_align_load_cost.  */
1177   2,                                    /* vec_unalign_load_cost.  */
1178   1,                                    /* vec_store_cost.  */
1179   3,                                    /* cond_taken_branch_cost.  */
1180   1,                                    /* cond_not_taken_branch_cost.  */
1181 };
1182
1183 const struct processor_costs *ix86_cost = &pentium_cost;
1184
1185 /* Processor feature/optimization bitmasks.  */
1186 #define m_386 (1<<PROCESSOR_I386)
1187 #define m_486 (1<<PROCESSOR_I486)
1188 #define m_PENT (1<<PROCESSOR_PENTIUM)
1189 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1190 #define m_PENT4  (1<<PROCESSOR_PENTIUM4)
1191 #define m_NOCONA  (1<<PROCESSOR_NOCONA)
1192 #define m_CORE2  (1<<PROCESSOR_CORE2)
1193
1194 #define m_GEODE  (1<<PROCESSOR_GEODE)
1195 #define m_K6  (1<<PROCESSOR_K6)
1196 #define m_K6_GEODE  (m_K6 | m_GEODE)
1197 #define m_K8  (1<<PROCESSOR_K8)
1198 #define m_ATHLON  (1<<PROCESSOR_ATHLON)
1199 #define m_ATHLON_K8  (m_K8 | m_ATHLON)
1200 #define m_AMDFAM10  (1<<PROCESSOR_AMDFAM10)
1201 #define m_AMD_MULTIPLE  (m_K8 | m_ATHLON | m_AMDFAM10)
1202
1203 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1204 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1205
1206 /* Generic instruction choice should be common subset of supported CPUs
1207    (PPro/PENT4/NOCONA/CORE2/Athlon/K8).  */
1208 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1209
1210 /* Feature tests against the various tunings.  */
1211 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1212   /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1213      negatively, so enabling for Generic64 seems like good code size
1214      tradeoff.  We can't enable it for 32bit generic because it does not
1215      work well with PPro base chips.  */
1216   m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1217
1218   /* X86_TUNE_PUSH_MEMORY */
1219   m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1220   | m_NOCONA | m_CORE2 | m_GENERIC,
1221
1222   /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1223   m_486 | m_PENT,
1224
1225   /* X86_TUNE_USE_BIT_TEST */
1226   m_386,
1227
1228   /* X86_TUNE_UNROLL_STRLEN */
1229   m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1230
1231   /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1232   m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1233
1234   /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1235      on simulation result. But after P4 was made, no performance benefit
1236      was observed with branch hints.  It also increases the code size.
1237      As a result, icc never generates branch hints.  */
1238   0,
1239
1240   /* X86_TUNE_DOUBLE_WITH_ADD */
1241   ~m_386,
1242
1243   /* X86_TUNE_USE_SAHF */
1244   m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1245   | m_NOCONA | m_CORE2 | m_GENERIC,
1246
1247   /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1248      partial dependencies.  */
1249   m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1250   | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1251
1252   /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1253      register stalls on Generic32 compilation setting as well.  However
1254      in current implementation the partial register stalls are not eliminated
1255      very well - they can be introduced via subregs synthesized by combine
1256      and can happen in caller/callee saving sequences.  Because this option
1257      pays back little on PPro based chips and is in conflict with partial reg
1258      dependencies used by Athlon/P4 based chips, it is better to leave it off
1259      for generic32 for now.  */
1260   m_PPRO,
1261
1262   /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1263   m_CORE2 | m_GENERIC,
1264
1265   /* X86_TUNE_USE_HIMODE_FIOP */
1266   m_386 | m_486 | m_K6_GEODE,
1267
1268   /* X86_TUNE_USE_SIMODE_FIOP */
1269   ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1270
1271   /* X86_TUNE_USE_MOV0 */
1272   m_K6,
1273
1274   /* X86_TUNE_USE_CLTD */
1275   ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1276
1277   /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx.  */
1278   m_PENT4,
1279
1280   /* X86_TUNE_SPLIT_LONG_MOVES */
1281   m_PPRO,
1282
1283   /* X86_TUNE_READ_MODIFY_WRITE */
1284   ~m_PENT,
1285
1286   /* X86_TUNE_READ_MODIFY */
1287   ~(m_PENT | m_PPRO),
1288
1289   /* X86_TUNE_PROMOTE_QIMODE */
1290   m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1291   | m_GENERIC /* | m_PENT4 ? */,
1292
1293   /* X86_TUNE_FAST_PREFIX */
1294   ~(m_PENT | m_486 | m_386),
1295
1296   /* X86_TUNE_SINGLE_STRINGOP */
1297   m_386 | m_PENT4 | m_NOCONA,
1298
1299   /* X86_TUNE_QIMODE_MATH */
1300   ~0,
1301
1302   /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1303      register stalls.  Just like X86_TUNE_PARTIAL_REG_STALL this option
1304      might be considered for Generic32 if our scheme for avoiding partial
1305      stalls was more effective.  */
1306   ~m_PPRO,
1307
1308   /* X86_TUNE_PROMOTE_QI_REGS */
1309   0,
1310
1311   /* X86_TUNE_PROMOTE_HI_REGS */
1312   m_PPRO,
1313
1314   /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop.  */
1315   m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1316
1317   /* X86_TUNE_ADD_ESP_8 */
1318   m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1319   | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1320
1321   /* X86_TUNE_SUB_ESP_4 */
1322   m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1323
1324   /* X86_TUNE_SUB_ESP_8 */
1325   m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1326   | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1327
1328   /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1329      for DFmode copies */
1330   ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1331     | m_GENERIC | m_GEODE),
1332
1333   /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1334   m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1335
1336   /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1337      conflict here in between PPro/Pentium4 based chips that thread 128bit
1338      SSE registers as single units versus K8 based chips that divide SSE
1339      registers to two 64bit halves.  This knob promotes all store destinations
1340      to be 128bit to allow register renaming on 128bit SSE units, but usually
1341      results in one extra microop on 64bit SSE units.  Experimental results
1342      shows that disabling this option on P4 brings over 20% SPECfp regression,
1343      while enabling it on K8 brings roughly 2.4% regression that can be partly
1344      masked by careful scheduling of moves.  */
1345   m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1346
1347   /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1348   m_AMDFAM10,
1349
1350   /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1351      are resolved on SSE register parts instead of whole registers, so we may
1352      maintain just lower part of scalar values in proper format leaving the
1353      upper part undefined.  */
1354   m_ATHLON_K8,
1355
1356   /* X86_TUNE_SSE_TYPELESS_STORES */
1357   m_AMD_MULTIPLE,
1358
1359   /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1360   m_PPRO | m_PENT4 | m_NOCONA,
1361
1362   /* X86_TUNE_MEMORY_MISMATCH_STALL */
1363   m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1364
1365   /* X86_TUNE_PROLOGUE_USING_MOVE */
1366   m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1367
1368   /* X86_TUNE_EPILOGUE_USING_MOVE */
1369   m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1370
1371   /* X86_TUNE_SHIFT1 */
1372   ~m_486,
1373
1374   /* X86_TUNE_USE_FFREEP */
1375   m_AMD_MULTIPLE,
1376
1377   /* X86_TUNE_INTER_UNIT_MOVES */
1378   ~(m_AMD_MULTIPLE | m_GENERIC),
1379
1380   /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1381   ~(m_AMDFAM10),
1382
1383   /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1384      than 4 branch instructions in the 16 byte window.  */
1385   m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1386
1387   /* X86_TUNE_SCHEDULE */
1388   m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1389
1390   /* X86_TUNE_USE_BT */
1391   m_AMD_MULTIPLE,
1392
1393   /* X86_TUNE_USE_INCDEC */
1394   ~(m_PENT4 | m_NOCONA | m_GENERIC),
1395
1396   /* X86_TUNE_PAD_RETURNS */
1397   m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1398
1399   /* X86_TUNE_EXT_80387_CONSTANTS */
1400   m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1401
1402   /* X86_TUNE_SHORTEN_X87_SSE */
1403   ~m_K8,
1404
1405   /* X86_TUNE_AVOID_VECTOR_DECODE */
1406   m_K8 | m_GENERIC64,
1407
1408   /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1409      and SImode multiply, but 386 and 486 do HImode multiply faster.  */
1410   ~(m_386 | m_486),
1411
1412   /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1413      vector path on AMD machines.  */
1414   m_K8 | m_GENERIC64 | m_AMDFAM10,
1415
1416   /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1417      machines.  */
1418   m_K8 | m_GENERIC64 | m_AMDFAM10,
1419
1420   /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1421      than a MOV.  */
1422   m_PENT,
1423
1424   /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1425      but one byte longer.  */
1426   m_PENT,
1427
1428   /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1429      operand that cannot be represented using a modRM byte.  The XOR
1430      replacement is long decoded, so this split helps here as well.  */
1431   m_K6,
1432
1433   /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1434      from integer to FP. */
1435   m_AMDFAM10,
1436 };
1437
1438 /* Feature tests against the various architecture variations.  */
1439 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1440   /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro.  */
1441   ~(m_386 | m_486 | m_PENT | m_K6),
1442
1443   /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486.  */
1444   ~m_386,
1445
1446   /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1447   ~(m_386 | m_486),
1448
1449   /* X86_ARCH_XADD: Exchange and add was added for 80486.  */
1450   ~m_386,
1451
1452   /* X86_ARCH_BSWAP: Byteswap was added for 80486.  */
1453   ~m_386,
1454 };
1455
1456 static const unsigned int x86_accumulate_outgoing_args
1457   = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1458
1459 static const unsigned int x86_arch_always_fancy_math_387
1460   = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1461     | m_NOCONA | m_CORE2 | m_GENERIC;
1462
1463 static enum stringop_alg stringop_alg = no_stringop;
1464
1465 /* In case the average insn count for single function invocation is
1466    lower than this constant, emit fast (but longer) prologue and
1467    epilogue code.  */
1468 #define FAST_PROLOGUE_INSN_COUNT 20
1469
1470 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
1471 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1472 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1473 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1474
1475 /* Array of the smallest class containing reg number REGNO, indexed by
1476    REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
1477
1478 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1479 {
1480   /* ax, dx, cx, bx */
1481   AREG, DREG, CREG, BREG,
1482   /* si, di, bp, sp */
1483   SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1484   /* FP registers */
1485   FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1486   FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1487   /* arg pointer */
1488   NON_Q_REGS,
1489   /* flags, fpsr, fpcr, frame */
1490   NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1491   /* SSE registers */
1492   SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1493   SSE_REGS, SSE_REGS,
1494   /* MMX registers */
1495   MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1496   MMX_REGS, MMX_REGS,
1497   /* REX registers */
1498   NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1499   NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1500   /* SSE REX registers */
1501   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1502   SSE_REGS, SSE_REGS,
1503 };
1504
1505 /* The "default" register map used in 32bit mode.  */
1506
1507 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1508 {
1509   0, 2, 1, 3, 6, 7, 4, 5,               /* general regs */
1510   12, 13, 14, 15, 16, 17, 18, 19,       /* fp regs */
1511   -1, -1, -1, -1, -1,                   /* arg, flags, fpsr, fpcr, frame */
1512   21, 22, 23, 24, 25, 26, 27, 28,       /* SSE */
1513   29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
1514   -1, -1, -1, -1, -1, -1, -1, -1,       /* extended integer registers */
1515   -1, -1, -1, -1, -1, -1, -1, -1,       /* extended SSE registers */
1516 };
1517
1518 static int const x86_64_int_parameter_registers[6] =
1519 {
1520   5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1521   FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1522 };
1523
1524 static int const x86_64_ms_abi_int_parameter_registers[4] =
1525 {
1526   2 /*RCX*/, 1 /*RDX*/,
1527   FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1528 };
1529
1530 static int const x86_64_int_return_registers[4] =
1531 {
1532   0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1533 };
1534
1535 /* The "default" register map used in 64bit mode.  */
1536 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1537 {
1538   0, 1, 2, 3, 4, 5, 6, 7,               /* general regs */
1539   33, 34, 35, 36, 37, 38, 39, 40,       /* fp regs */
1540   -1, -1, -1, -1, -1,                   /* arg, flags, fpsr, fpcr, frame */
1541   17, 18, 19, 20, 21, 22, 23, 24,       /* SSE */
1542   41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
1543   8,9,10,11,12,13,14,15,                /* extended integer registers */
1544   25, 26, 27, 28, 29, 30, 31, 32,       /* extended SSE registers */
1545 };
1546
1547 /* Define the register numbers to be used in Dwarf debugging information.
1548    The SVR4 reference port C compiler uses the following register numbers
1549    in its Dwarf output code:
1550         0 for %eax (gcc regno = 0)
1551         1 for %ecx (gcc regno = 2)
1552         2 for %edx (gcc regno = 1)
1553         3 for %ebx (gcc regno = 3)
1554         4 for %esp (gcc regno = 7)
1555         5 for %ebp (gcc regno = 6)
1556         6 for %esi (gcc regno = 4)
1557         7 for %edi (gcc regno = 5)
1558    The following three DWARF register numbers are never generated by
1559    the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1560    believes these numbers have these meanings.
1561         8  for %eip    (no gcc equivalent)
1562         9  for %eflags (gcc regno = 17)
1563         10 for %trapno (no gcc equivalent)
1564    It is not at all clear how we should number the FP stack registers
1565    for the x86 architecture.  If the version of SDB on x86/svr4 were
1566    a bit less brain dead with respect to floating-point then we would
1567    have a precedent to follow with respect to DWARF register numbers
1568    for x86 FP registers, but the SDB on x86/svr4 is so completely
1569    broken with respect to FP registers that it is hardly worth thinking
1570    of it as something to strive for compatibility with.
1571    The version of x86/svr4 SDB I have at the moment does (partially)
1572    seem to believe that DWARF register number 11 is associated with
1573    the x86 register %st(0), but that's about all.  Higher DWARF
1574    register numbers don't seem to be associated with anything in
1575    particular, and even for DWARF regno 11, SDB only seems to under-
1576    stand that it should say that a variable lives in %st(0) (when
1577    asked via an `=' command) if we said it was in DWARF regno 11,
1578    but SDB still prints garbage when asked for the value of the
1579    variable in question (via a `/' command).
1580    (Also note that the labels SDB prints for various FP stack regs
1581    when doing an `x' command are all wrong.)
1582    Note that these problems generally don't affect the native SVR4
1583    C compiler because it doesn't allow the use of -O with -g and
1584    because when it is *not* optimizing, it allocates a memory
1585    location for each floating-point variable, and the memory
1586    location is what gets described in the DWARF AT_location
1587    attribute for the variable in question.
1588    Regardless of the severe mental illness of the x86/svr4 SDB, we
1589    do something sensible here and we use the following DWARF
1590    register numbers.  Note that these are all stack-top-relative
1591    numbers.
1592         11 for %st(0) (gcc regno = 8)
1593         12 for %st(1) (gcc regno = 9)
1594         13 for %st(2) (gcc regno = 10)
1595         14 for %st(3) (gcc regno = 11)
1596         15 for %st(4) (gcc regno = 12)
1597         16 for %st(5) (gcc regno = 13)
1598         17 for %st(6) (gcc regno = 14)
1599         18 for %st(7) (gcc regno = 15)
1600 */
1601 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1602 {
1603   0, 2, 1, 3, 6, 7, 5, 4,               /* general regs */
1604   11, 12, 13, 14, 15, 16, 17, 18,       /* fp regs */
1605   -1, 9, -1, -1, -1,                    /* arg, flags, fpsr, fpcr, frame */
1606   21, 22, 23, 24, 25, 26, 27, 28,       /* SSE registers */
1607   29, 30, 31, 32, 33, 34, 35, 36,       /* MMX registers */
1608   -1, -1, -1, -1, -1, -1, -1, -1,       /* extended integer registers */
1609   -1, -1, -1, -1, -1, -1, -1, -1,       /* extended SSE registers */
1610 };
1611
1612 /* Test and compare insns in i386.md store the information needed to
1613    generate branch and scc insns here.  */
1614
1615 rtx ix86_compare_op0 = NULL_RTX;
1616 rtx ix86_compare_op1 = NULL_RTX;
1617 rtx ix86_compare_emitted = NULL_RTX;
1618
1619 /* Size of the register save area.  */
1620 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1621
1622 /* Define the structure for the machine field in struct function.  */
1623
1624 struct stack_local_entry GTY(())
1625 {
1626   unsigned short mode;
1627   unsigned short n;
1628   rtx rtl;
1629   struct stack_local_entry *next;
1630 };
1631
1632 /* Structure describing stack frame layout.
1633    Stack grows downward:
1634
1635    [arguments]
1636                                               <- ARG_POINTER
1637    saved pc
1638
1639    saved frame pointer if frame_pointer_needed
1640                                               <- HARD_FRAME_POINTER
1641    [saved regs]
1642
1643    [padding1]          \
1644                         )
1645    [va_arg registers]  (
1646                         > to_allocate         <- FRAME_POINTER
1647    [frame]             (
1648                         )
1649    [padding2]          /
1650   */
1651 struct ix86_frame
1652 {
1653   int nregs;
1654   int padding1;
1655   int va_arg_size;
1656   HOST_WIDE_INT frame;
1657   int padding2;
1658   int outgoing_arguments_size;
1659   int red_zone_size;
1660
1661   HOST_WIDE_INT to_allocate;
1662   /* The offsets relative to ARG_POINTER.  */
1663   HOST_WIDE_INT frame_pointer_offset;
1664   HOST_WIDE_INT hard_frame_pointer_offset;
1665   HOST_WIDE_INT stack_pointer_offset;
1666
1667   /* When save_regs_using_mov is set, emit prologue using
1668      move instead of push instructions.  */
1669   bool save_regs_using_mov;
1670 };
1671
1672 /* Code model option.  */
1673 enum cmodel ix86_cmodel;
1674 /* Asm dialect.  */
1675 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1676 /* TLS dialects.  */
1677 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1678
1679 /* Which unit we are generating floating point math for.  */
1680 enum fpmath_unit ix86_fpmath;
1681
1682 /* Which cpu are we scheduling for.  */
1683 enum processor_type ix86_tune;
1684
1685 /* Which instruction set architecture to use.  */
1686 enum processor_type ix86_arch;
1687
1688 /* true if sse prefetch instruction is not NOOP.  */
1689 int x86_prefetch_sse;
1690
1691 /* ix86_regparm_string as a number */
1692 static int ix86_regparm;
1693
1694 /* -mstackrealign option */
1695 extern int ix86_force_align_arg_pointer;
1696 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1697
1698 /* Preferred alignment for stack boundary in bits.  */
1699 unsigned int ix86_preferred_stack_boundary;
1700
1701 /* Values 1-5: see jump.c */
1702 int ix86_branch_cost;
1703
1704 /* Variables which are this size or smaller are put in the data/bss
1705    or ldata/lbss sections.  */
1706
1707 int ix86_section_threshold = 65536;
1708
1709 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
1710 char internal_label_prefix[16];
1711 int internal_label_prefix_len;
1712
1713 /* Fence to use after loop using movnt.  */
1714 tree x86_mfence;
1715
1716 /* Register class used for passing given 64bit part of the argument.
1717    These represent classes as documented by the PS ABI, with the exception
1718    of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1719    use SF or DFmode move instead of DImode to avoid reformatting penalties.
1720
1721    Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1722    whenever possible (upper half does contain padding).  */
1723 enum x86_64_reg_class
1724   {
1725     X86_64_NO_CLASS,
1726     X86_64_INTEGER_CLASS,
1727     X86_64_INTEGERSI_CLASS,
1728     X86_64_SSE_CLASS,
1729     X86_64_SSESF_CLASS,
1730     X86_64_SSEDF_CLASS,
1731     X86_64_SSEUP_CLASS,
1732     X86_64_X87_CLASS,
1733     X86_64_X87UP_CLASS,
1734     X86_64_COMPLEX_X87_CLASS,
1735     X86_64_MEMORY_CLASS
1736   };
1737 static const char * const x86_64_reg_class_name[] =
1738 {
1739   "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1740   "sseup", "x87", "x87up", "cplx87", "no"
1741 };
1742
1743 #define MAX_CLASSES 4
1744
1745 /* Table of constants used by fldpi, fldln2, etc....  */
1746 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1747 static bool ext_80387_constants_init = 0;
1748
1749 \f
1750 static struct machine_function * ix86_init_machine_status (void);
1751 static rtx ix86_function_value (const_tree, const_tree, bool);
1752 static int ix86_function_regparm (const_tree, const_tree);
1753 static void ix86_compute_frame_layout (struct ix86_frame *);
1754 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1755                                                  rtx, rtx, int);
1756
1757 \f
1758 /* The svr4 ABI for the i386 says that records and unions are returned
1759    in memory.  */
1760 #ifndef DEFAULT_PCC_STRUCT_RETURN
1761 #define DEFAULT_PCC_STRUCT_RETURN 1
1762 #endif
1763
1764 /* Bit flags that specify the ISA we are compiling for.  */
1765 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1766
1767 /* A mask of ix86_isa_flags that includes bit X if X
1768    was set or cleared on the command line.  */
1769 static int ix86_isa_flags_explicit;
1770
1771 /* Define a set of ISAs which aren't available for a given ISA. MMX
1772    and SSE ISAs are handled separately.  */
1773
1774 #define OPTION_MASK_ISA_MMX_UNSET \
1775   (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1776 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1777
1778 #define OPTION_MASK_ISA_SSE_UNSET \
1779   (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1780 #define OPTION_MASK_ISA_SSE2_UNSET \
1781   (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1782 #define OPTION_MASK_ISA_SSE3_UNSET \
1783   (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1784 #define OPTION_MASK_ISA_SSSE3_UNSET \
1785   (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1786 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1787   (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1788 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1789
1790 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1791    as -msse4.1 -msse4.2.  -mno-sse4 should the same as -mno-sse4.1. */
1792 #define OPTION_MASK_ISA_SSE4 \
1793   (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1794 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1795
1796 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1797
1798 #define OPTION_MASK_ISA_SSE5_UNSET \
1799   (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1800
1801 /* Vectorization library interface and handlers.  */
1802 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1803 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1804
1805 /* Implement TARGET_HANDLE_OPTION.  */
1806
1807 static bool
1808 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1809 {
1810   switch (code)
1811     {
1812     case OPT_mmmx:
1813       ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
1814       if (!value)
1815         {
1816           ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1817           ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1818         }
1819       return true;
1820
1821     case OPT_m3dnow:
1822       ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
1823       if (!value)
1824         {
1825           ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1826           ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1827         }
1828       return true;
1829
1830     case OPT_m3dnowa:
1831       return false;
1832
1833     case OPT_msse:
1834       ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
1835       if (!value)
1836         {
1837           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1838           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1839         }
1840       return true;
1841
1842     case OPT_msse2:
1843       ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
1844       if (!value)
1845         {
1846           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1847           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1848         }
1849       return true;
1850
1851     case OPT_msse3:
1852       ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
1853       if (!value)
1854         {
1855           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1856           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1857         }
1858       return true;
1859
1860     case OPT_mssse3:
1861       ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
1862       if (!value)
1863         {
1864           ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1865           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1866         }
1867       return true;
1868
1869     case OPT_msse4_1:
1870       ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
1871       if (!value)
1872         {
1873           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1874           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1875         }
1876       return true;
1877
1878     case OPT_msse4_2:
1879       ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
1880       if (!value)
1881         {
1882           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1883           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1884         }
1885       return true;
1886
1887     case OPT_msse4:
1888       ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
1889       ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
1890       return true;
1891
1892     case OPT_mno_sse4:
1893       ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1894       ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1895       return true;
1896
1897     case OPT_msse4a:
1898       ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
1899       if (!value)
1900         {
1901           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1902           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1903         }
1904       return true;
1905
1906     case OPT_msse5:
1907       ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5;
1908       if (!value)
1909         {
1910           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
1911           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
1912         }
1913       return true;
1914
1915     default:
1916       return true;
1917     }
1918 }
1919
1920 /* Sometimes certain combinations of command options do not make
1921    sense on a particular target machine.  You can define a macro
1922    `OVERRIDE_OPTIONS' to take account of this.  This macro, if
1923    defined, is executed once just after all the command options have
1924    been parsed.
1925
1926    Don't use this macro to turn on various extra optimizations for
1927    `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
1928
1929 void
1930 override_options (void)
1931 {
1932   int i;
1933   int ix86_tune_defaulted = 0;
1934   int ix86_arch_specified = 0;
1935   unsigned int ix86_arch_mask, ix86_tune_mask;
1936
1937   /* Comes from final.c -- no real reason to change it.  */
1938 #define MAX_CODE_ALIGN 16
1939
1940   static struct ptt
1941     {
1942       const struct processor_costs *cost;       /* Processor costs */
1943       const int align_loop;                     /* Default alignments.  */
1944       const int align_loop_max_skip;
1945       const int align_jump;
1946       const int align_jump_max_skip;
1947       const int align_func;
1948     }
1949   const processor_target_table[PROCESSOR_max] =
1950     {
1951       {&i386_cost, 4, 3, 4, 3, 4},
1952       {&i486_cost, 16, 15, 16, 15, 16},
1953       {&pentium_cost, 16, 7, 16, 7, 16},
1954       {&pentiumpro_cost, 16, 15, 16, 10, 16},
1955       {&geode_cost, 0, 0, 0, 0, 0},
1956       {&k6_cost, 32, 7, 32, 7, 32},
1957       {&athlon_cost, 16, 7, 16, 7, 16},
1958       {&pentium4_cost, 0, 0, 0, 0, 0},
1959       {&k8_cost, 16, 7, 16, 7, 16},
1960       {&nocona_cost, 0, 0, 0, 0, 0},
1961       {&core2_cost, 16, 10, 16, 10, 16},
1962       {&generic32_cost, 16, 7, 16, 7, 16},
1963       {&generic64_cost, 16, 10, 16, 10, 16},
1964       {&amdfam10_cost, 32, 24, 32, 7, 32}
1965     };
1966
1967   static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1968     {
1969       "generic",
1970       "i386",
1971       "i486",
1972       "pentium",
1973       "pentium-mmx",
1974       "pentiumpro",
1975       "pentium2",
1976       "pentium3",
1977       "pentium4",
1978       "pentium-m",
1979       "prescott",
1980       "nocona",
1981       "core2",
1982       "geode",
1983       "k6",
1984       "k6-2",
1985       "k6-3",
1986       "athlon",
1987       "athlon-4",
1988       "k8",
1989       "amdfam10"
1990     };
1991
1992   enum pta_flags
1993     {
1994       PTA_SSE = 1 << 0,
1995       PTA_SSE2 = 1 << 1,
1996       PTA_SSE3 = 1 << 2,
1997       PTA_MMX = 1 << 3,
1998       PTA_PREFETCH_SSE = 1 << 4,
1999       PTA_3DNOW = 1 << 5,
2000       PTA_3DNOW_A = 1 << 6,
2001       PTA_64BIT = 1 << 7,
2002       PTA_SSSE3 = 1 << 8,
2003       PTA_CX16 = 1 << 9,
2004       PTA_POPCNT = 1 << 10,
2005       PTA_ABM = 1 << 11,
2006       PTA_SSE4A = 1 << 12,
2007       PTA_NO_SAHF = 1 << 13,
2008       PTA_SSE4_1 = 1 << 14,
2009       PTA_SSE4_2 = 1 << 15,
2010       PTA_SSE5 = 1 << 16
2011     };
2012
2013   static struct pta
2014     {
2015       const char *const name;           /* processor name or nickname.  */
2016       const enum processor_type processor;
2017       const unsigned /*enum pta_flags*/ flags;
2018     }
2019   const processor_alias_table[] =
2020     {
2021       {"i386", PROCESSOR_I386, 0},
2022       {"i486", PROCESSOR_I486, 0},
2023       {"i586", PROCESSOR_PENTIUM, 0},
2024       {"pentium", PROCESSOR_PENTIUM, 0},
2025       {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2026       {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2027       {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2028       {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2029       {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2030       {"i686", PROCESSOR_PENTIUMPRO, 0},
2031       {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2032       {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2033       {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2034       {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2035       {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2036       {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2037       {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2038       {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2039       {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2040                                     | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2041                                     | PTA_CX16 | PTA_NO_SAHF)},
2042       {"core2", PROCESSOR_CORE2, (PTA_64BIT
2043                                   | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2044                                   | PTA_SSSE3
2045                                   | PTA_CX16)},
2046       {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2047                                   |PTA_PREFETCH_SSE)},
2048       {"k6", PROCESSOR_K6, PTA_MMX},
2049       {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2050       {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2051       {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2052                                     | PTA_PREFETCH_SSE)},
2053       {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2054                                           | PTA_PREFETCH_SSE)},
2055       {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2056                                       | PTA_SSE)},
2057       {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2058                                        | PTA_SSE)},
2059       {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2060                                        | PTA_SSE)},
2061       {"x86-64", PROCESSOR_K8, (PTA_64BIT
2062                                 | PTA_MMX | PTA_SSE | PTA_SSE2
2063                                 | PTA_NO_SAHF)},
2064       {"k8", PROCESSOR_K8, (PTA_64BIT
2065                             | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2066                             | PTA_SSE | PTA_SSE2
2067                             | PTA_NO_SAHF)},
2068       {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2069                                  | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2070                                  | PTA_SSE | PTA_SSE2 | PTA_SSE3
2071                                  | PTA_NO_SAHF)},
2072       {"opteron", PROCESSOR_K8, (PTA_64BIT
2073                                  | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2074                                  | PTA_SSE | PTA_SSE2
2075                                  | PTA_NO_SAHF)},
2076       {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2077                                       | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2078                                       | PTA_SSE | PTA_SSE2 | PTA_SSE3
2079                                       | PTA_NO_SAHF)},
2080       {"athlon64", PROCESSOR_K8, (PTA_64BIT
2081                                   | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2082                                   | PTA_SSE | PTA_SSE2
2083                                   | PTA_NO_SAHF)},
2084       {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2085                                        | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2086                                        | PTA_SSE | PTA_SSE2 | PTA_SSE3
2087                                        | PTA_NO_SAHF)},
2088       {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2089                                    | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2090                                    | PTA_SSE | PTA_SSE2
2091                                    | PTA_NO_SAHF)},
2092       {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2093                                         | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2094                                         | PTA_SSE | PTA_SSE2 | PTA_SSE3
2095                                         | PTA_SSE4A
2096                                         | PTA_CX16 | PTA_ABM)},
2097       {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2098                                          | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2099                                          | PTA_SSE | PTA_SSE2 | PTA_SSE3
2100                                          | PTA_SSE4A
2101                                          | PTA_CX16 | PTA_ABM)},
2102       {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch.  */ },
2103       {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch.  */ },
2104     };
2105
2106   int const pta_size = ARRAY_SIZE (processor_alias_table);
2107
2108 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2109   SUBTARGET_OVERRIDE_OPTIONS;
2110 #endif
2111
2112 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2113   SUBSUBTARGET_OVERRIDE_OPTIONS;
2114 #endif
2115
2116   /* -fPIC is the default for x86_64.  */
2117   if (TARGET_MACHO && TARGET_64BIT)
2118     flag_pic = 2;
2119
2120   /* Set the default values for switches whose default depends on TARGET_64BIT
2121      in case they weren't overwritten by command line options.  */
2122   if (TARGET_64BIT)
2123     {
2124       /* Mach-O doesn't support omitting the frame pointer for now.  */
2125       if (flag_omit_frame_pointer == 2)
2126         flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2127       if (flag_asynchronous_unwind_tables == 2)
2128         flag_asynchronous_unwind_tables = 1;
2129       if (flag_pcc_struct_return == 2)
2130         flag_pcc_struct_return = 0;
2131     }
2132   else
2133     {
2134       if (flag_omit_frame_pointer == 2)
2135         flag_omit_frame_pointer = 0;
2136       if (flag_asynchronous_unwind_tables == 2)
2137         flag_asynchronous_unwind_tables = 0;
2138       if (flag_pcc_struct_return == 2)
2139         flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2140     }
2141
2142   /* Need to check -mtune=generic first.  */
2143   if (ix86_tune_string)
2144     {
2145       if (!strcmp (ix86_tune_string, "generic")
2146           || !strcmp (ix86_tune_string, "i686")
2147           /* As special support for cross compilers we read -mtune=native
2148              as -mtune=generic.  With native compilers we won't see the
2149              -mtune=native, as it was changed by the driver.  */
2150           || !strcmp (ix86_tune_string, "native"))
2151         {
2152           if (TARGET_64BIT)
2153             ix86_tune_string = "generic64";
2154           else
2155             ix86_tune_string = "generic32";
2156         }
2157       else if (!strncmp (ix86_tune_string, "generic", 7))
2158         error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2159     }
2160   else
2161     {
2162       if (ix86_arch_string)
2163         ix86_tune_string = ix86_arch_string;
2164       if (!ix86_tune_string)
2165         {
2166           ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2167           ix86_tune_defaulted = 1;
2168         }
2169
2170       /* ix86_tune_string is set to ix86_arch_string or defaulted.  We
2171          need to use a sensible tune option.  */
2172       if (!strcmp (ix86_tune_string, "generic")
2173           || !strcmp (ix86_tune_string, "x86-64")
2174           || !strcmp (ix86_tune_string, "i686"))
2175         {
2176           if (TARGET_64BIT)
2177             ix86_tune_string = "generic64";
2178           else
2179             ix86_tune_string = "generic32";
2180         }
2181     }
2182   if (ix86_stringop_string)
2183     {
2184       if (!strcmp (ix86_stringop_string, "rep_byte"))
2185         stringop_alg = rep_prefix_1_byte;
2186       else if (!strcmp (ix86_stringop_string, "libcall"))
2187         stringop_alg = libcall;
2188       else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2189         stringop_alg = rep_prefix_4_byte;
2190       else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2191         stringop_alg = rep_prefix_8_byte;
2192       else if (!strcmp (ix86_stringop_string, "byte_loop"))
2193         stringop_alg = loop_1_byte;
2194       else if (!strcmp (ix86_stringop_string, "loop"))
2195         stringop_alg = loop;
2196       else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2197         stringop_alg = unrolled_loop;
2198       else
2199         error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2200     }
2201   if (!strcmp (ix86_tune_string, "x86-64"))
2202     warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated.  Use -mtune=k8 or "
2203              "-mtune=generic instead as appropriate.");
2204
2205   if (!ix86_arch_string)
2206     ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2207   else
2208     ix86_arch_specified = 1;
2209
2210   if (!strcmp (ix86_arch_string, "generic"))
2211     error ("generic CPU can be used only for -mtune= switch");
2212   if (!strncmp (ix86_arch_string, "generic", 7))
2213     error ("bad value (%s) for -march= switch", ix86_arch_string);
2214
2215   if (ix86_cmodel_string != 0)
2216     {
2217       if (!strcmp (ix86_cmodel_string, "small"))
2218         ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2219       else if (!strcmp (ix86_cmodel_string, "medium"))
2220         ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2221       else if (!strcmp (ix86_cmodel_string, "large"))
2222         ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2223       else if (flag_pic)
2224         error ("code model %s does not support PIC mode", ix86_cmodel_string);
2225       else if (!strcmp (ix86_cmodel_string, "32"))
2226         ix86_cmodel = CM_32;
2227       else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2228         ix86_cmodel = CM_KERNEL;
2229       else
2230         error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2231     }
2232   else
2233     {
2234       /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2235          use of rip-relative addressing.  This eliminates fixups that
2236          would otherwise be needed if this object is to be placed in a
2237          DLL, and is essentially just as efficient as direct addressing.  */
2238       if (TARGET_64BIT_MS_ABI)
2239         ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2240       else if (TARGET_64BIT)
2241         ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2242       else
2243         ix86_cmodel = CM_32;
2244     }
2245   if (ix86_asm_string != 0)
2246     {
2247       if (! TARGET_MACHO
2248           && !strcmp (ix86_asm_string, "intel"))
2249         ix86_asm_dialect = ASM_INTEL;
2250       else if (!strcmp (ix86_asm_string, "att"))
2251         ix86_asm_dialect = ASM_ATT;
2252       else
2253         error ("bad value (%s) for -masm= switch", ix86_asm_string);
2254     }
2255   if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2256     error ("code model %qs not supported in the %s bit mode",
2257            ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2258   if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2259     sorry ("%i-bit mode not compiled in",
2260            (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2261
2262   for (i = 0; i < pta_size; i++)
2263     if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2264       {
2265         ix86_arch = processor_alias_table[i].processor;
2266         /* Default cpu tuning to the architecture.  */
2267         ix86_tune = ix86_arch;
2268
2269         if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2270           error ("CPU you selected does not support x86-64 "
2271                  "instruction set");
2272
2273         if (processor_alias_table[i].flags & PTA_MMX
2274             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2275           ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2276         if (processor_alias_table[i].flags & PTA_3DNOW
2277             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2278           ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2279         if (processor_alias_table[i].flags & PTA_3DNOW_A
2280             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2281           ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2282         if (processor_alias_table[i].flags & PTA_SSE
2283             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2284           ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2285         if (processor_alias_table[i].flags & PTA_SSE2
2286             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2287           ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2288         if (processor_alias_table[i].flags & PTA_SSE3
2289             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2290           ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2291         if (processor_alias_table[i].flags & PTA_SSSE3
2292             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2293           ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2294         if (processor_alias_table[i].flags & PTA_SSE4_1
2295             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2296           ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2297         if (processor_alias_table[i].flags & PTA_SSE4_2
2298             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2299           ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2300         if (processor_alias_table[i].flags & PTA_SSE4A
2301             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2302           ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2303         if (processor_alias_table[i].flags & PTA_SSE5
2304             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2305           ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2306
2307         if (processor_alias_table[i].flags & PTA_ABM)
2308           x86_abm = true;
2309         if (processor_alias_table[i].flags & PTA_CX16)
2310           x86_cmpxchg16b = true;
2311         if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2312           x86_popcnt = true;
2313         if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2314           x86_prefetch_sse = true;
2315         if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2316           x86_sahf = true;
2317
2318         break;
2319       }
2320
2321   if (i == pta_size)
2322     error ("bad value (%s) for -march= switch", ix86_arch_string);
2323
2324   ix86_arch_mask = 1u << ix86_arch;
2325   for (i = 0; i < X86_ARCH_LAST; ++i)
2326     ix86_arch_features[i] &= ix86_arch_mask;
2327
2328   for (i = 0; i < pta_size; i++)
2329     if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2330       {
2331         ix86_tune = processor_alias_table[i].processor;
2332         if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2333           {
2334             if (ix86_tune_defaulted)
2335               {
2336                 ix86_tune_string = "x86-64";
2337                 for (i = 0; i < pta_size; i++)
2338                   if (! strcmp (ix86_tune_string,
2339                                 processor_alias_table[i].name))
2340                     break;
2341                 ix86_tune = processor_alias_table[i].processor;
2342               }
2343             else
2344               error ("CPU you selected does not support x86-64 "
2345                      "instruction set");
2346           }
2347         /* Intel CPUs have always interpreted SSE prefetch instructions as
2348            NOPs; so, we can enable SSE prefetch instructions even when
2349            -mtune (rather than -march) points us to a processor that has them.
2350            However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2351            higher processors.  */
2352         if (TARGET_CMOVE
2353             && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2354           x86_prefetch_sse = true;
2355         break;
2356       }
2357   if (i == pta_size)
2358     error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2359
2360   ix86_tune_mask = 1u << ix86_tune;
2361   for (i = 0; i < X86_TUNE_LAST; ++i)
2362     ix86_tune_features[i] &= ix86_tune_mask;
2363
2364   if (optimize_size)
2365     ix86_cost = &size_cost;
2366   else
2367     ix86_cost = processor_target_table[ix86_tune].cost;
2368
2369   /* Arrange to set up i386_stack_locals for all functions.  */
2370   init_machine_status = ix86_init_machine_status;
2371
2372   /* Validate -mregparm= value.  */
2373   if (ix86_regparm_string)
2374     {
2375       if (TARGET_64BIT)
2376         warning (0, "-mregparm is ignored in 64-bit mode");
2377       i = atoi (ix86_regparm_string);
2378       if (i < 0 || i > REGPARM_MAX)
2379         error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2380       else
2381         ix86_regparm = i;
2382     }
2383   if (TARGET_64BIT)
2384     ix86_regparm = REGPARM_MAX;
2385
2386   /* If the user has provided any of the -malign-* options,
2387      warn and use that value only if -falign-* is not set.
2388      Remove this code in GCC 3.2 or later.  */
2389   if (ix86_align_loops_string)
2390     {
2391       warning (0, "-malign-loops is obsolete, use -falign-loops");
2392       if (align_loops == 0)
2393         {
2394           i = atoi (ix86_align_loops_string);
2395           if (i < 0 || i > MAX_CODE_ALIGN)
2396             error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2397           else
2398             align_loops = 1 << i;
2399         }
2400     }
2401
2402   if (ix86_align_jumps_string)
2403     {
2404       warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2405       if (align_jumps == 0)
2406         {
2407           i = atoi (ix86_align_jumps_string);
2408           if (i < 0 || i > MAX_CODE_ALIGN)
2409             error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2410           else
2411             align_jumps = 1 << i;
2412         }
2413     }
2414
2415   if (ix86_align_funcs_string)
2416     {
2417       warning (0, "-malign-functions is obsolete, use -falign-functions");
2418       if (align_functions == 0)
2419         {
2420           i = atoi (ix86_align_funcs_string);
2421           if (i < 0 || i > MAX_CODE_ALIGN)
2422             error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2423           else
2424             align_functions = 1 << i;
2425         }
2426     }
2427
2428   /* Default align_* from the processor table.  */
2429   if (align_loops == 0)
2430     {
2431       align_loops = processor_target_table[ix86_tune].align_loop;
2432       align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2433     }
2434   if (align_jumps == 0)
2435     {
2436       align_jumps = processor_target_table[ix86_tune].align_jump;
2437       align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2438     }
2439   if (align_functions == 0)
2440     {
2441       align_functions = processor_target_table[ix86_tune].align_func;
2442     }
2443
2444   /* Validate -mbranch-cost= value, or provide default.  */
2445   ix86_branch_cost = ix86_cost->branch_cost;
2446   if (ix86_branch_cost_string)
2447     {
2448       i = atoi (ix86_branch_cost_string);
2449       if (i < 0 || i > 5)
2450         error ("-mbranch-cost=%d is not between 0 and 5", i);
2451       else
2452         ix86_branch_cost = i;
2453     }
2454   if (ix86_section_threshold_string)
2455     {
2456       i = atoi (ix86_section_threshold_string);
2457       if (i < 0)
2458         error ("-mlarge-data-threshold=%d is negative", i);
2459       else
2460         ix86_section_threshold = i;
2461     }
2462
2463   if (ix86_tls_dialect_string)
2464     {
2465       if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2466         ix86_tls_dialect = TLS_DIALECT_GNU;
2467       else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2468         ix86_tls_dialect = TLS_DIALECT_GNU2;
2469       else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2470         ix86_tls_dialect = TLS_DIALECT_SUN;
2471       else
2472         error ("bad value (%s) for -mtls-dialect= switch",
2473                ix86_tls_dialect_string);
2474     }
2475
2476   if (ix87_precision_string)
2477     {
2478       i = atoi (ix87_precision_string);
2479       if (i != 32 && i != 64 && i != 80)
2480         error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2481     }
2482
2483   if (TARGET_64BIT)
2484     {
2485       target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2486
2487       /* Enable by default the SSE and MMX builtins.  Do allow the user to
2488          explicitly disable any of these.  In particular, disabling SSE and
2489          MMX for kernel code is extremely useful.  */
2490       if (!ix86_arch_specified)
2491       ix86_isa_flags
2492         |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2493              | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2494
2495       if (TARGET_RTD)
2496         warning (0, "-mrtd is ignored in 64bit mode");
2497     }
2498   else
2499     {
2500       target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2501
2502       if (!ix86_arch_specified)
2503       ix86_isa_flags
2504         |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2505
2506       /* i386 ABI does not specify red zone.  It still makes sense to use it
2507          when programmer takes care to stack from being destroyed.  */
2508       if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2509         target_flags |= MASK_NO_RED_ZONE;
2510     }
2511
2512   /* Keep nonleaf frame pointers.  */
2513   if (flag_omit_frame_pointer)
2514     target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2515   else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2516     flag_omit_frame_pointer = 1;
2517
2518   /* If we're doing fast math, we don't care about comparison order
2519      wrt NaNs.  This lets us use a shorter comparison sequence.  */
2520   if (flag_finite_math_only)
2521     target_flags &= ~MASK_IEEE_FP;
2522
2523   /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2524      since the insns won't need emulation.  */
2525   if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2526     target_flags &= ~MASK_NO_FANCY_MATH_387;
2527
2528   /* Likewise, if the target doesn't have a 387, or we've specified
2529      software floating point, don't use 387 inline intrinsics.  */
2530   if (!TARGET_80387)
2531     target_flags |= MASK_NO_FANCY_MATH_387;
2532
2533   /* Turn on SSE4A bultins for -msse5.  */
2534   if (TARGET_SSE5)
2535     ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2536
2537   /* Turn on SSE4.1 builtins for -msse4.2.  */
2538   if (TARGET_SSE4_2)
2539     ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2540
2541   /* Turn on SSSE3 builtins for -msse4.1.  */
2542   if (TARGET_SSE4_1)
2543     ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2544
2545   /* Turn on SSE3 builtins for -mssse3.  */
2546   if (TARGET_SSSE3)
2547     ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2548
2549   /* Turn on SSE3 builtins for -msse4a.  */
2550   if (TARGET_SSE4A)
2551     ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2552
2553   /* Turn on SSE2 builtins for -msse3.  */
2554   if (TARGET_SSE3)
2555     ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2556
2557   /* Turn on SSE builtins for -msse2.  */
2558   if (TARGET_SSE2)
2559     ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2560
2561   /* Turn on MMX builtins for -msse.  */
2562   if (TARGET_SSE)
2563     {
2564       ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2565       x86_prefetch_sse = true;
2566     }
2567
2568   /* Turn on MMX builtins for 3Dnow.  */
2569   if (TARGET_3DNOW)
2570     ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2571
2572   /* Turn on popcnt instruction for -msse4.2 or -mabm.  */
2573   if (TARGET_SSE4_2 || TARGET_ABM)
2574     x86_popcnt = true;
2575
2576   /* Validate -mpreferred-stack-boundary= value, or provide default.
2577      The default of 128 bits is for Pentium III's SSE __m128.  We can't
2578      change it because of optimize_size.  Otherwise, we can't mix object
2579      files compiled with -Os and -On.  */
2580   ix86_preferred_stack_boundary = 128;
2581   if (ix86_preferred_stack_boundary_string)
2582     {
2583       i = atoi (ix86_preferred_stack_boundary_string);
2584       if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2585         error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2586                TARGET_64BIT ? 4 : 2);
2587       else
2588         ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2589     }
2590
2591   /* Accept -msseregparm only if at least SSE support is enabled.  */
2592   if (TARGET_SSEREGPARM
2593       && ! TARGET_SSE)
2594     error ("-msseregparm used without SSE enabled");
2595
2596   ix86_fpmath = TARGET_FPMATH_DEFAULT;
2597   if (ix86_fpmath_string != 0)
2598     {
2599       if (! strcmp (ix86_fpmath_string, "387"))
2600         ix86_fpmath = FPMATH_387;
2601       else if (! strcmp (ix86_fpmath_string, "sse"))
2602         {
2603           if (!TARGET_SSE)
2604             {
2605               warning (0, "SSE instruction set disabled, using 387 arithmetics");
2606               ix86_fpmath = FPMATH_387;
2607             }
2608           else
2609             ix86_fpmath = FPMATH_SSE;
2610         }
2611       else if (! strcmp (ix86_fpmath_string, "387,sse")
2612                || ! strcmp (ix86_fpmath_string, "sse,387"))
2613         {
2614           if (!TARGET_SSE)
2615             {
2616               warning (0, "SSE instruction set disabled, using 387 arithmetics");
2617               ix86_fpmath = FPMATH_387;
2618             }
2619           else if (!TARGET_80387)
2620             {
2621               warning (0, "387 instruction set disabled, using SSE arithmetics");
2622               ix86_fpmath = FPMATH_SSE;
2623             }
2624           else
2625             ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2626         }
2627       else
2628         error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2629     }
2630
2631   /* If the i387 is disabled, then do not return values in it. */
2632   if (!TARGET_80387)
2633     target_flags &= ~MASK_FLOAT_RETURNS;
2634
2635   /* Use external vectorized library in vectorizing intrinsics.  */
2636   if (ix86_veclibabi_string)
2637     {
2638       if (strcmp (ix86_veclibabi_string, "acml") == 0)
2639         ix86_veclib_handler = ix86_veclibabi_acml;
2640       else
2641         error ("unknown vectorization library ABI type (%s) for "
2642                "-mveclibabi= switch", ix86_veclibabi_string);
2643     }
2644
2645   if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2646       && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2647       && !optimize_size)
2648     target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2649
2650   /* ??? Unwind info is not correct around the CFG unless either a frame
2651      pointer is present or M_A_O_A is set.  Fixing this requires rewriting
2652      unwind info generation to be aware of the CFG and propagating states
2653      around edges.  */
2654   if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2655        || flag_exceptions || flag_non_call_exceptions)
2656       && flag_omit_frame_pointer
2657       && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2658     {
2659       if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2660         warning (0, "unwind tables currently require either a frame pointer "
2661                  "or -maccumulate-outgoing-args for correctness");
2662       target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2663     }
2664
2665   /* For sane SSE instruction set generation we need fcomi instruction.
2666      It is safe to enable all CMOVE instructions.  */
2667   if (TARGET_SSE)
2668     TARGET_CMOVE = 1;
2669
2670   /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
2671   {
2672     char *p;
2673     ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2674     p = strchr (internal_label_prefix, 'X');
2675     internal_label_prefix_len = p - internal_label_prefix;
2676     *p = '\0';
2677   }
2678
2679   /* When scheduling description is not available, disable scheduler pass
2680      so it won't slow down the compilation and make x87 code slower.  */
2681   if (!TARGET_SCHEDULE)
2682     flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2683
2684   if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2685     set_param_value ("simultaneous-prefetches",
2686                      ix86_cost->simultaneous_prefetches);
2687   if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2688     set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2689   if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2690     set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2691   if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2692     set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2693
2694   /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
2695      can be optimized to ap = __builtin_next_arg (0).  */
2696   if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
2697     targetm.expand_builtin_va_start = NULL;
2698 }
2699 \f
2700 /* Return true if this goes in large data/bss.  */
2701
2702 static bool
2703 ix86_in_large_data_p (tree exp)
2704 {
2705   if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2706     return false;
2707
2708   /* Functions are never large data.  */
2709   if (TREE_CODE (exp) == FUNCTION_DECL)
2710     return false;
2711
2712   if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2713     {
2714       const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2715       if (strcmp (section, ".ldata") == 0
2716           || strcmp (section, ".lbss") == 0)
2717         return true;
2718       return false;
2719     }
2720   else
2721     {
2722       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2723
2724       /* If this is an incomplete type with size 0, then we can't put it
2725          in data because it might be too big when completed.  */
2726       if (!size || size > ix86_section_threshold)
2727         return true;
2728     }
2729
2730   return false;
2731 }
2732
2733 /* Switch to the appropriate section for output of DECL.
2734    DECL is either a `VAR_DECL' node or a constant of some sort.
2735    RELOC indicates whether forming the initial value of DECL requires
2736    link-time relocations.  */
2737
2738 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2739         ATTRIBUTE_UNUSED;
2740
2741 static section *
2742 x86_64_elf_select_section (tree decl, int reloc,
2743                            unsigned HOST_WIDE_INT align)
2744 {
2745   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2746       && ix86_in_large_data_p (decl))
2747     {
2748       const char *sname = NULL;
2749       unsigned int flags = SECTION_WRITE;
2750       switch (categorize_decl_for_section (decl, reloc))
2751         {
2752         case SECCAT_DATA:
2753           sname = ".ldata";
2754           break;
2755         case SECCAT_DATA_REL:
2756           sname = ".ldata.rel";
2757           break;
2758         case SECCAT_DATA_REL_LOCAL:
2759           sname = ".ldata.rel.local";
2760           break;
2761         case SECCAT_DATA_REL_RO:
2762           sname = ".ldata.rel.ro";
2763           break;
2764         case SECCAT_DATA_REL_RO_LOCAL:
2765           sname = ".ldata.rel.ro.local";
2766           break;
2767         case SECCAT_BSS:
2768           sname = ".lbss";
2769           flags |= SECTION_BSS;
2770           break;
2771         case SECCAT_RODATA:
2772         case SECCAT_RODATA_MERGE_STR:
2773         case SECCAT_RODATA_MERGE_STR_INIT:
2774         case SECCAT_RODATA_MERGE_CONST:
2775           sname = ".lrodata";
2776           flags = 0;
2777           break;
2778         case SECCAT_SRODATA:
2779         case SECCAT_SDATA:
2780         case SECCAT_SBSS:
2781           gcc_unreachable ();
2782         case SECCAT_TEXT:
2783         case SECCAT_TDATA:
2784         case SECCAT_TBSS:
2785           /* We don't split these for medium model.  Place them into
2786              default sections and hope for best.  */
2787           break;
2788         }
2789       if (sname)
2790         {
2791           /* We might get called with string constants, but get_named_section
2792              doesn't like them as they are not DECLs.  Also, we need to set
2793              flags in that case.  */
2794           if (!DECL_P (decl))
2795             return get_section (sname, flags, NULL);
2796           return get_named_section (decl, sname, reloc);
2797         }
2798     }
2799   return default_elf_select_section (decl, reloc, align);
2800 }
2801
2802 /* Build up a unique section name, expressed as a
2803    STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2804    RELOC indicates whether the initial value of EXP requires
2805    link-time relocations.  */
2806
2807 static void ATTRIBUTE_UNUSED
2808 x86_64_elf_unique_section (tree decl, int reloc)
2809 {
2810   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2811       && ix86_in_large_data_p (decl))
2812     {
2813       const char *prefix = NULL;
2814       /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
2815       bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2816
2817       switch (categorize_decl_for_section (decl, reloc))
2818         {
2819         case SECCAT_DATA:
2820         case SECCAT_DATA_REL:
2821         case SECCAT_DATA_REL_LOCAL:
2822         case SECCAT_DATA_REL_RO:
2823         case SECCAT_DATA_REL_RO_LOCAL:
2824           prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2825           break;
2826         case SECCAT_BSS:
2827           prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2828           break;
2829         case SECCAT_RODATA:
2830         case SECCAT_RODATA_MERGE_STR:
2831         case SECCAT_RODATA_MERGE_STR_INIT:
2832         case SECCAT_RODATA_MERGE_CONST:
2833           prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2834           break;
2835         case SECCAT_SRODATA:
2836         case SECCAT_SDATA:
2837         case SECCAT_SBSS:
2838           gcc_unreachable ();
2839         case SECCAT_TEXT:
2840         case SECCAT_TDATA:
2841         case SECCAT_TBSS:
2842           /* We don't split these for medium model.  Place them into
2843              default sections and hope for best.  */
2844           break;
2845         }
2846       if (prefix)
2847         {
2848           const char *name;
2849           size_t nlen, plen;
2850           char *string;
2851           plen = strlen (prefix);
2852
2853           name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2854           name = targetm.strip_name_encoding (name);
2855           nlen = strlen (name);
2856
2857           string = (char *) alloca (nlen + plen + 1);
2858           memcpy (string, prefix, plen);
2859           memcpy (string + plen, name, nlen + 1);
2860
2861           DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2862           return;
2863         }
2864     }
2865   default_unique_section (decl, reloc);
2866 }
2867
2868 #ifdef COMMON_ASM_OP
2869 /* This says how to output assembler code to declare an
2870    uninitialized external linkage data object.
2871
2872    For medium model x86-64 we need to use .largecomm opcode for
2873    large objects.  */
2874 void
2875 x86_elf_aligned_common (FILE *file,
2876                         const char *name, unsigned HOST_WIDE_INT size,
2877                         int align)
2878 {
2879   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2880       && size > (unsigned int)ix86_section_threshold)
2881     fprintf (file, ".largecomm\t");
2882   else
2883     fprintf (file, "%s", COMMON_ASM_OP);
2884   assemble_name (file, name);
2885   fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2886            size, align / BITS_PER_UNIT);
2887 }
2888 #endif
2889
2890 /* Utility function for targets to use in implementing
2891    ASM_OUTPUT_ALIGNED_BSS.  */
2892
2893 void
2894 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2895                         const char *name, unsigned HOST_WIDE_INT size,
2896                         int align)
2897 {
2898   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2899       && size > (unsigned int)ix86_section_threshold)
2900     switch_to_section (get_named_section (decl, ".lbss", 0));
2901   else
2902     switch_to_section (bss_section);
2903   ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2904 #ifdef ASM_DECLARE_OBJECT_NAME
2905   last_assemble_variable_decl = decl;
2906   ASM_DECLARE_OBJECT_NAME (file, name, decl);
2907 #else
2908   /* Standard thing is just output label for the object.  */
2909   ASM_OUTPUT_LABEL (file, name);
2910 #endif /* ASM_DECLARE_OBJECT_NAME */
2911   ASM_OUTPUT_SKIP (file, size ? size : 1);
2912 }
2913 \f
2914 void
2915 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2916 {
2917   /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
2918      make the problem with not enough registers even worse.  */
2919 #ifdef INSN_SCHEDULING
2920   if (level > 1)
2921     flag_schedule_insns = 0;
2922 #endif
2923
2924   if (TARGET_MACHO)
2925     /* The Darwin libraries never set errno, so we might as well
2926        avoid calling them when that's the only reason we would.  */
2927     flag_errno_math = 0;
2928
2929   /* The default values of these switches depend on the TARGET_64BIT
2930      that is not known at this moment.  Mark these values with 2 and
2931      let user the to override these.  In case there is no command line option
2932      specifying them, we will set the defaults in override_options.  */
2933   if (optimize >= 1)
2934     flag_omit_frame_pointer = 2;
2935   flag_pcc_struct_return = 2;
2936   flag_asynchronous_unwind_tables = 2;
2937   flag_vect_cost_model = 1;
2938 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2939   SUBTARGET_OPTIMIZATION_OPTIONS;
2940 #endif
2941 }
2942 \f
2943 /* Decide whether we can make a sibling call to a function.  DECL is the
2944    declaration of the function being targeted by the call and EXP is the
2945    CALL_EXPR representing the call.  */
2946
2947 static bool
2948 ix86_function_ok_for_sibcall (tree decl, tree exp)
2949 {
2950   tree func;
2951   rtx a, b;
2952
2953   /* If we are generating position-independent code, we cannot sibcall
2954      optimize any indirect call, or a direct call to a global function,
2955      as the PLT requires %ebx be live.  */
2956   if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2957     return false;
2958
2959   if (decl)
2960     func = decl;
2961   else
2962     {
2963       func = TREE_TYPE (CALL_EXPR_FN (exp));
2964       if (POINTER_TYPE_P (func))
2965         func = TREE_TYPE (func);
2966     }
2967
2968   /* Check that the return value locations are the same.  Like
2969      if we are returning floats on the 80387 register stack, we cannot
2970      make a sibcall from a function that doesn't return a float to a
2971      function that does or, conversely, from a function that does return
2972      a float to a function that doesn't; the necessary stack adjustment
2973      would not be executed.  This is also the place we notice
2974      differences in the return value ABI.  Note that it is ok for one
2975      of the functions to have void return type as long as the return
2976      value of the other is passed in a register.  */
2977   a = ix86_function_value (TREE_TYPE (exp), func, false);
2978   b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2979                            cfun->decl, false);
2980   if (STACK_REG_P (a) || STACK_REG_P (b))
2981     {
2982       if (!rtx_equal_p (a, b))
2983         return false;
2984     }
2985   else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2986     ;
2987   else if (!rtx_equal_p (a, b))
2988     return false;
2989
2990   /* If this call is indirect, we'll need to be able to use a call-clobbered
2991      register for the address of the target function.  Make sure that all
2992      such registers are not used for passing parameters.  */
2993   if (!decl && !TARGET_64BIT)
2994     {
2995       tree type;
2996
2997       /* We're looking at the CALL_EXPR, we need the type of the function.  */
2998       type = CALL_EXPR_FN (exp);                /* pointer expression */
2999       type = TREE_TYPE (type);                  /* pointer type */
3000       type = TREE_TYPE (type);                  /* function type */
3001
3002       if (ix86_function_regparm (type, NULL) >= 3)
3003         {
3004           /* ??? Need to count the actual number of registers to be used,
3005              not the possible number of registers.  Fix later.  */
3006           return false;
3007         }
3008     }
3009
3010   /* Dllimport'd functions are also called indirectly.  */
3011   if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
3012       && decl && DECL_DLLIMPORT_P (decl)
3013       && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
3014     return false;
3015
3016   /* If we forced aligned the stack, then sibcalling would unalign the
3017      stack, which may break the called function.  */
3018   if (cfun->machine->force_align_arg_pointer)
3019     return false;
3020
3021   /* Otherwise okay.  That also includes certain types of indirect calls.  */
3022   return true;
3023 }
3024
3025 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
3026    calling convention attributes;
3027    arguments as in struct attribute_spec.handler.  */
3028
3029 static tree
3030 ix86_handle_cconv_attribute (tree *node, tree name,
3031                                    tree args,
3032                                    int flags ATTRIBUTE_UNUSED,
3033                                    bool *no_add_attrs)
3034 {
3035   if (TREE_CODE (*node) != FUNCTION_TYPE
3036       && TREE_CODE (*node) != METHOD_TYPE
3037       && TREE_CODE (*node) != FIELD_DECL
3038       && TREE_CODE (*node) != TYPE_DECL)
3039     {
3040       warning (OPT_Wattributes, "%qs attribute only applies to functions",
3041                IDENTIFIER_POINTER (name));
3042       *no_add_attrs = true;
3043       return NULL_TREE;
3044     }
3045
3046   /* Can combine regparm with all attributes but fastcall.  */
3047   if (is_attribute_p ("regparm", name))
3048     {
3049       tree cst;
3050
3051       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3052         {
3053           error ("fastcall and regparm attributes are not compatible");
3054         }
3055
3056       cst = TREE_VALUE (args);
3057       if (TREE_CODE (cst) != INTEGER_CST)
3058         {
3059           warning (OPT_Wattributes,
3060                    "%qs attribute requires an integer constant argument",
3061                    IDENTIFIER_POINTER (name));
3062           *no_add_attrs = true;
3063         }
3064       else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3065         {
3066           warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
3067                    IDENTIFIER_POINTER (name), REGPARM_MAX);
3068           *no_add_attrs = true;
3069         }
3070
3071       if (!TARGET_64BIT
3072           && lookup_attribute (ix86_force_align_arg_pointer_string,
3073                                TYPE_ATTRIBUTES (*node))
3074           && compare_tree_int (cst, REGPARM_MAX-1))
3075         {
3076           error ("%s functions limited to %d register parameters",
3077                  ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3078         }
3079
3080       return NULL_TREE;
3081     }
3082
3083   if (TARGET_64BIT)
3084     {
3085       /* Do not warn when emulating the MS ABI.  */
3086       if (!TARGET_64BIT_MS_ABI)
3087         warning (OPT_Wattributes, "%qs attribute ignored",
3088                  IDENTIFIER_POINTER (name));
3089       *no_add_attrs = true;
3090       return NULL_TREE;
3091     }
3092
3093   /* Can combine fastcall with stdcall (redundant) and sseregparm.  */
3094   if (is_attribute_p ("fastcall", name))
3095     {
3096       if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3097         {
3098           error ("fastcall and cdecl attributes are not compatible");
3099         }
3100       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3101         {
3102           error ("fastcall and stdcall attributes are not compatible");
3103         }
3104       if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3105         {
3106           error ("fastcall and regparm attributes are not compatible");
3107         }
3108     }
3109
3110   /* Can combine stdcall with fastcall (redundant), regparm and
3111      sseregparm.  */
3112   else if (is_attribute_p ("stdcall", name))
3113     {
3114       if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3115         {
3116           error ("stdcall and cdecl attributes are not compatible");
3117         }
3118       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3119         {
3120           error ("stdcall and fastcall attributes are not compatible");
3121         }
3122     }
3123
3124   /* Can combine cdecl with regparm and sseregparm.  */
3125   else if (is_attribute_p ("cdecl", name))
3126     {
3127       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3128         {
3129           error ("stdcall and cdecl attributes are not compatible");
3130         }
3131       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3132         {
3133           error ("fastcall and cdecl attributes are not compatible");
3134         }
3135     }
3136
3137   /* Can combine sseregparm with all attributes.  */
3138
3139   return NULL_TREE;
3140 }
3141
3142 /* Return 0 if the attributes for two types are incompatible, 1 if they
3143    are compatible, and 2 if they are nearly compatible (which causes a
3144    warning to be generated).  */
3145
3146 static int
3147 ix86_comp_type_attributes (const_tree type1, const_tree type2)
3148 {
3149   /* Check for mismatch of non-default calling convention.  */
3150   const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
3151
3152   if (TREE_CODE (type1) != FUNCTION_TYPE
3153       && TREE_CODE (type1) != METHOD_TYPE)
3154     return 1;
3155
3156   /* Check for mismatched fastcall/regparm types.  */
3157   if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3158        != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3159       || (ix86_function_regparm (type1, NULL)
3160           != ix86_function_regparm (type2, NULL)))
3161     return 0;
3162
3163   /* Check for mismatched sseregparm types.  */
3164   if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3165       != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
3166     return 0;
3167
3168   /* Check for mismatched return types (cdecl vs stdcall).  */
3169   if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3170       != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
3171     return 0;
3172
3173   return 1;
3174 }
3175 \f
3176 /* Return the regparm value for a function with the indicated TYPE and DECL.
3177    DECL may be NULL when calling function indirectly
3178    or considering a libcall.  */
3179
3180 static int
3181 ix86_function_regparm (const_tree type, const_tree decl)
3182 {
3183   tree attr;
3184   int regparm = ix86_regparm;
3185
3186   if (TARGET_64BIT)
3187     return regparm;
3188
3189   attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3190   if (attr)
3191     return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3192
3193   if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3194     return 2;
3195
3196   /* Use register calling convention for local functions when possible.  */
3197   if (decl && TREE_CODE (decl) == FUNCTION_DECL
3198       && flag_unit_at_a_time && !profile_flag)
3199     {
3200       /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified.  */
3201       struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3202       if (i && i->local)
3203         {
3204           int local_regparm, globals = 0, regno;
3205           struct function *f;
3206
3207           /* Make sure no regparm register is taken by a
3208              fixed register variable.  */
3209           for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
3210             if (fixed_regs[local_regparm])
3211               break;
3212
3213           /* We can't use regparm(3) for nested functions as these use
3214              static chain pointer in third argument.  */
3215           if (local_regparm == 3
3216               && (decl_function_context (decl)
3217                   || ix86_force_align_arg_pointer)
3218               && !DECL_NO_STATIC_CHAIN (decl))
3219             local_regparm = 2;
3220
3221           /* If the function realigns its stackpointer, the prologue will
3222              clobber %ecx.  If we've already generated code for the callee,
3223              the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3224              scanning the attributes for the self-realigning property.  */
3225           f = DECL_STRUCT_FUNCTION (decl);
3226           if (local_regparm == 3
3227               && (f ? !!f->machine->force_align_arg_pointer
3228                   : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3229                                         TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3230             local_regparm = 2;
3231
3232           /* Each fixed register usage increases register pressure,
3233              so less registers should be used for argument passing.
3234              This functionality can be overriden by an explicit
3235              regparm value.  */
3236           for (regno = 0; regno <= DI_REG; regno++)
3237             if (fixed_regs[regno])
3238               globals++;
3239
3240           local_regparm
3241             = globals < local_regparm ? local_regparm - globals : 0;
3242
3243           if (local_regparm > regparm)
3244             regparm = local_regparm;
3245         }
3246     }
3247
3248   return regparm;
3249 }
3250
3251 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3252    DFmode (2) arguments in SSE registers for a function with the
3253    indicated TYPE and DECL.  DECL may be NULL when calling function
3254    indirectly or considering a libcall.  Otherwise return 0.  */
3255
3256 static int
3257 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
3258 {
3259   gcc_assert (!TARGET_64BIT);
3260
3261   /* Use SSE registers to pass SFmode and DFmode arguments if requested
3262      by the sseregparm attribute.  */
3263   if (TARGET_SSEREGPARM
3264       || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3265     {
3266       if (!TARGET_SSE)
3267         {
3268           if (warn)
3269             {
3270               if (decl)
3271                 error ("Calling %qD with attribute sseregparm without "
3272                        "SSE/SSE2 enabled", decl);
3273               else
3274                 error ("Calling %qT with attribute sseregparm without "
3275                        "SSE/SSE2 enabled", type);
3276             }
3277           return 0;
3278         }
3279
3280       return 2;
3281     }
3282
3283   /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3284      (and DFmode for SSE2) arguments in SSE registers.  */
3285   if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3286     {
3287       /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified.  */
3288       struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3289       if (i && i->local)
3290         return TARGET_SSE2 ? 2 : 1;
3291     }
3292
3293   return 0;
3294 }
3295
3296 /* Return true if EAX is live at the start of the function.  Used by
3297    ix86_expand_prologue to determine if we need special help before
3298    calling allocate_stack_worker.  */
3299
3300 static bool
3301 ix86_eax_live_at_start_p (void)
3302 {
3303   /* Cheat.  Don't bother working forward from ix86_function_regparm
3304      to the function type to whether an actual argument is located in
3305      eax.  Instead just look at cfg info, which is still close enough
3306      to correct at this point.  This gives false positives for broken
3307      functions that might use uninitialized data that happens to be
3308      allocated in eax, but who cares?  */
3309   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3310 }
3311
3312 /* Value is the number of bytes of arguments automatically
3313    popped when returning from a subroutine call.
3314    FUNDECL is the declaration node of the function (as a tree),
3315    FUNTYPE is the data type of the function (as a tree),
3316    or for a library call it is an identifier node for the subroutine name.
3317    SIZE is the number of bytes of arguments passed on the stack.
3318
3319    On the 80386, the RTD insn may be used to pop them if the number
3320      of args is fixed, but if the number is variable then the caller
3321      must pop them all.  RTD can't be used for library calls now
3322      because the library is compiled with the Unix compiler.
3323    Use of RTD is a selectable option, since it is incompatible with
3324    standard Unix calling sequences.  If the option is not selected,
3325    the caller must always pop the args.
3326
3327    The attribute stdcall is equivalent to RTD on a per module basis.  */
3328
3329 int
3330 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3331 {
3332   int rtd;
3333
3334   /* None of the 64-bit ABIs pop arguments.  */
3335   if (TARGET_64BIT)
3336     return 0;
3337
3338   rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3339
3340   /* Cdecl functions override -mrtd, and never pop the stack.  */
3341   if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3342     {
3343       /* Stdcall and fastcall functions will pop the stack if not
3344          variable args.  */
3345       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3346           || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3347         rtd = 1;
3348
3349       if (rtd && ! stdarg_p (funtype))
3350         return size;
3351     }
3352
3353   /* Lose any fake structure return argument if it is passed on the stack.  */
3354   if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3355       && !KEEP_AGGREGATE_RETURN_POINTER)
3356     {
3357       int nregs = ix86_function_regparm (funtype, fundecl);
3358       if (nregs == 0)
3359         return GET_MODE_SIZE (Pmode);
3360     }
3361
3362   return 0;
3363 }
3364 \f
3365 /* Argument support functions.  */
3366
3367 /* Return true when register may be used to pass function parameters.  */
3368 bool
3369 ix86_function_arg_regno_p (int regno)
3370 {
3371   int i;
3372   const int *parm_regs;
3373
3374   if (!TARGET_64BIT)
3375     {
3376       if (TARGET_MACHO)
3377         return (regno < REGPARM_MAX
3378                 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3379       else
3380         return (regno < REGPARM_MAX
3381                 || (TARGET_MMX && MMX_REGNO_P (regno)
3382                     && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3383                 || (TARGET_SSE && SSE_REGNO_P (regno)
3384                     && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3385     }
3386
3387   if (TARGET_MACHO)
3388     {
3389       if (SSE_REGNO_P (regno) && TARGET_SSE)
3390         return true;
3391     }
3392   else
3393     {
3394       if (TARGET_SSE && SSE_REGNO_P (regno)
3395           && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3396         return true;
3397     }
3398
3399   /* RAX is used as hidden argument to va_arg functions.  */
3400   if (!TARGET_64BIT_MS_ABI && regno == AX_REG)
3401     return true;
3402
3403   if (TARGET_64BIT_MS_ABI)
3404     parm_regs = x86_64_ms_abi_int_parameter_registers;
3405   else
3406     parm_regs = x86_64_int_parameter_registers;
3407   for (i = 0; i < REGPARM_MAX; i++)
3408     if (regno == parm_regs[i])
3409       return true;
3410   return false;
3411 }
3412
3413 /* Return if we do not know how to pass TYPE solely in registers.  */
3414
3415 static bool
3416 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3417 {
3418   if (must_pass_in_stack_var_size_or_pad (mode, type))
3419     return true;
3420
3421   /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
3422      The layout_type routine is crafty and tries to trick us into passing
3423      currently unsupported vector types on the stack by using TImode.  */
3424   return (!TARGET_64BIT && mode == TImode
3425           && type && TREE_CODE (type) != VECTOR_TYPE);
3426 }
3427
3428 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3429    for a call to a function whose data type is FNTYPE.
3430    For a library call, FNTYPE is 0.  */
3431
3432 void
3433 init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
3434                       tree fntype,      /* tree ptr for function decl */
3435                       rtx libname,      /* SYMBOL_REF of library name or 0 */
3436                       tree fndecl)
3437 {
3438   struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
3439   memset (cum, 0, sizeof (*cum));
3440
3441   /* Set up the number of registers to use for passing arguments.  */
3442   cum->nregs = ix86_regparm;
3443   if (TARGET_SSE)
3444     cum->sse_nregs = SSE_REGPARM_MAX;
3445   if (TARGET_MMX)
3446     cum->mmx_nregs = MMX_REGPARM_MAX;
3447   cum->warn_sse = true;
3448   cum->warn_mmx = true;
3449
3450   /* Because type might mismatch in between caller and callee, we need to
3451      use actual type of function for local calls.
3452      FIXME: cgraph_analyze can be told to actually record if function uses
3453      va_start so for local functions maybe_vaarg can be made aggressive
3454      helping K&R code.
3455      FIXME: once typesytem is fixed, we won't need this code anymore.  */
3456   if (i && i->local)
3457     fntype = TREE_TYPE (fndecl);
3458   cum->maybe_vaarg = (fntype
3459                       ? (!prototype_p (fntype) || stdarg_p (fntype))
3460                       : !libname);
3461
3462   if (!TARGET_64BIT)
3463     {
3464       /* If there are variable arguments, then we won't pass anything
3465          in registers in 32-bit mode. */
3466       if (cum->maybe_vaarg)
3467         {
3468           cum->nregs = 0;
3469           cum->sse_nregs = 0;
3470           cum->mmx_nregs = 0;
3471           cum->warn_sse = 0;
3472           cum->warn_mmx = 0;
3473           return;
3474         }
3475
3476       /* Use ecx and edx registers if function has fastcall attribute,
3477          else look for regparm information.  */
3478       if (fntype)
3479         {
3480           if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3481             {
3482               cum->nregs = 2;
3483               cum->fastcall = 1;
3484             }
3485           else
3486             cum->nregs = ix86_function_regparm (fntype, fndecl);
3487         }
3488
3489       /* Set up the number of SSE registers used for passing SFmode
3490          and DFmode arguments.  Warn for mismatching ABI.  */
3491       cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
3492     }
3493 }
3494
3495 /* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
3496    But in the case of vector types, it is some vector mode.
3497
3498    When we have only some of our vector isa extensions enabled, then there
3499    are some modes for which vector_mode_supported_p is false.  For these
3500    modes, the generic vector support in gcc will choose some non-vector mode
3501    in order to implement the type.  By computing the natural mode, we'll
3502    select the proper ABI location for the operand and not depend on whatever
3503    the middle-end decides to do with these vector types.  */
3504
3505 static enum machine_mode
3506 type_natural_mode (const_tree type)
3507 {
3508   enum machine_mode mode = TYPE_MODE (type);
3509
3510   if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3511     {
3512       HOST_WIDE_INT size = int_size_in_bytes (type);
3513       if ((size == 8 || size == 16)
3514           /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
3515           && TYPE_VECTOR_SUBPARTS (type) > 1)
3516         {
3517           enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3518
3519           if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3520             mode = MIN_MODE_VECTOR_FLOAT;
3521           else
3522             mode = MIN_MODE_VECTOR_INT;
3523
3524           /* Get the mode which has this inner mode and number of units.  */
3525           for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3526             if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3527                 && GET_MODE_INNER (mode) == innermode)
3528               return mode;
3529
3530           gcc_unreachable ();
3531         }
3532     }
3533
3534   return mode;
3535 }
3536
3537 /* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
3538    this may not agree with the mode that the type system has chosen for the
3539    register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
3540    go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
3541
3542 static rtx
3543 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3544                      unsigned int regno)
3545 {
3546   rtx tmp;
3547
3548   if (orig_mode != BLKmode)
3549     tmp = gen_rtx_REG (orig_mode, regno);
3550   else
3551     {
3552       tmp = gen_rtx_REG (mode, regno);
3553       tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3554       tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3555     }
3556
3557   return tmp;
3558 }
3559
3560 /* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
3561    of this code is to classify each 8bytes of incoming argument by the register
3562    class and assign registers accordingly.  */
3563
3564 /* Return the union class of CLASS1 and CLASS2.
3565    See the x86-64 PS ABI for details.  */
3566
3567 static enum x86_64_reg_class
3568 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3569 {
3570   /* Rule #1: If both classes are equal, this is the resulting class.  */
3571   if (class1 == class2)
3572     return class1;
3573
3574   /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3575      the other class.  */
3576   if (class1 == X86_64_NO_CLASS)
3577     return class2;
3578   if (class2 == X86_64_NO_CLASS)
3579     return class1;
3580
3581   /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
3582   if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3583     return X86_64_MEMORY_CLASS;
3584
3585   /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
3586   if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3587       || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3588     return X86_64_INTEGERSI_CLASS;
3589   if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3590       || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3591     return X86_64_INTEGER_CLASS;
3592
3593   /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3594      MEMORY is used.  */
3595   if (class1 == X86_64_X87_CLASS
3596       || class1 == X86_64_X87UP_CLASS
3597       || class1 == X86_64_COMPLEX_X87_CLASS
3598       || class2 == X86_64_X87_CLASS
3599       || class2 == X86_64_X87UP_CLASS
3600       || class2 == X86_64_COMPLEX_X87_CLASS)
3601     return X86_64_MEMORY_CLASS;
3602
3603   /* Rule #6: Otherwise class SSE is used.  */
3604   return X86_64_SSE_CLASS;
3605 }
3606
3607 /* Classify the argument of type TYPE and mode MODE.
3608    CLASSES will be filled by the register class used to pass each word
3609    of the operand.  The number of words is returned.  In case the parameter
3610    should be passed in memory, 0 is returned. As a special case for zero
3611    sized containers, classes[0] will be NO_CLASS and 1 is returned.
3612
3613    BIT_OFFSET is used internally for handling records and specifies offset
3614    of the offset in bits modulo 256 to avoid overflow cases.
3615
3616    See the x86-64 PS ABI for details.
3617 */
3618
3619 static int
3620 classify_argument (enum machine_mode mode, const_tree type,
3621                    enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3622 {
3623   HOST_WIDE_INT bytes =
3624     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3625   int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3626
3627   /* Variable sized entities are always passed/returned in memory.  */
3628   if (bytes < 0)
3629     return 0;
3630
3631   if (mode != VOIDmode
3632       && targetm.calls.must_pass_in_stack (mode, type))
3633     return 0;
3634
3635   if (type && AGGREGATE_TYPE_P (type))
3636     {
3637       int i;
3638       tree field;
3639       enum x86_64_reg_class subclasses[MAX_CLASSES];
3640
3641       /* On x86-64 we pass structures larger than 16 bytes on the stack.  */
3642       if (bytes > 16)
3643         return 0;
3644
3645       for (i = 0; i < words; i++)
3646         classes[i] = X86_64_NO_CLASS;
3647
3648       /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
3649          signalize memory class, so handle it as special case.  */
3650       if (!words)
3651         {
3652           classes[0] = X86_64_NO_CLASS;
3653           return 1;
3654         }
3655
3656       /* Classify each field of record and merge classes.  */
3657       switch (TREE_CODE (type))
3658         {
3659         case RECORD_TYPE:
3660           /* And now merge the fields of structure.  */
3661           for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3662             {
3663               if (TREE_CODE (field) == FIELD_DECL)
3664                 {
3665                   int num;
3666
3667                   if (TREE_TYPE (field) == error_mark_node)
3668                     continue;
3669
3670                   /* Bitfields are always classified as integer.  Handle them
3671                      early, since later code would consider them to be
3672                      misaligned integers.  */
3673                   if (DECL_BIT_FIELD (field))
3674                     {
3675                       for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3676                            i < ((int_bit_position (field) + (bit_offset % 64))
3677                                 + tree_low_cst (DECL_SIZE (field), 0)
3678                                 + 63) / 8 / 8; i++)
3679                         classes[i] =
3680                           merge_classes (X86_64_INTEGER_CLASS,
3681                                          classes[i]);
3682                     }
3683                   else
3684                     {
3685                       num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3686                                                TREE_TYPE (field), subclasses,
3687                                                (int_bit_position (field)
3688                                                 + bit_offset) % 256);
3689                       if (!num)
3690                         return 0;
3691                       for (i = 0; i < num; i++)
3692                         {
3693                           int pos =
3694                             (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3695                           classes[i + pos] =
3696                             merge_classes (subclasses[i], classes[i + pos]);
3697                         }
3698                     }
3699                 }
3700             }
3701           break;
3702
3703         case ARRAY_TYPE:
3704           /* Arrays are handled as small records.  */
3705           {
3706             int num;
3707             num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3708                                      TREE_TYPE (type), subclasses, bit_offset);
3709             if (!num)
3710               return 0;
3711
3712             /* The partial classes are now full classes.  */
3713             if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3714               subclasses[0] = X86_64_SSE_CLASS;
3715             if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3716               subclasses[0] = X86_64_INTEGER_CLASS;
3717
3718             for (i = 0; i < words; i++)
3719               classes[i] = subclasses[i % num];
3720
3721             break;
3722           }
3723         case UNION_TYPE:
3724         case QUAL_UNION_TYPE:
3725           /* Unions are similar to RECORD_TYPE but offset is always 0.
3726              */
3727           for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3728             {
3729               if (TREE_CODE (field) == FIELD_DECL)
3730                 {
3731                   int num;
3732
3733                   if (TREE_TYPE (field) == error_mark_node)
3734                     continue;
3735
3736                   num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3737                                            TREE_TYPE (field), subclasses,
3738                                            bit_offset);
3739                   if (!num)
3740                     return 0;
3741                   for (i = 0; i < num; i++)
3742                     classes[i] = merge_classes (subclasses[i], classes[i]);
3743                 }
3744             }
3745           break;
3746
3747         default:
3748           gcc_unreachable ();
3749         }
3750
3751       /* Final merger cleanup.  */
3752       for (i = 0; i < words; i++)
3753         {
3754           /* If one class is MEMORY, everything should be passed in
3755              memory.  */
3756           if (classes[i] == X86_64_MEMORY_CLASS)
3757             return 0;
3758
3759           /* The X86_64_SSEUP_CLASS should be always preceded by
3760              X86_64_SSE_CLASS.  */
3761           if (classes[i] == X86_64_SSEUP_CLASS
3762               && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3763             classes[i] = X86_64_SSE_CLASS;
3764
3765           /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
3766           if (classes[i] == X86_64_X87UP_CLASS
3767               && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3768             classes[i] = X86_64_SSE_CLASS;
3769         }
3770       return words;
3771     }
3772
3773   /* Compute alignment needed.  We align all types to natural boundaries with
3774      exception of XFmode that is aligned to 64bits.  */
3775   if (mode != VOIDmode && mode != BLKmode)
3776     {
3777       int mode_alignment = GET_MODE_BITSIZE (mode);
3778
3779       if (mode == XFmode)
3780         mode_alignment = 128;
3781       else if (mode == XCmode)
3782         mode_alignment = 256;
3783       if (COMPLEX_MODE_P (mode))
3784         mode_alignment /= 2;
3785       /* Misaligned fields are always returned in memory.  */
3786       if (bit_offset % mode_alignment)
3787         return 0;
3788     }
3789
3790   /* for V1xx modes, just use the base mode */
3791   if (VECTOR_MODE_P (mode)
3792       && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3793     mode = GET_MODE_INNER (mode);
3794
3795   /* Classification of atomic types.  */
3796   switch (mode)
3797     {
3798     case SDmode:
3799     case DDmode:
3800       classes[0] = X86_64_SSE_CLASS;
3801       return 1;
3802     case TDmode:
3803       classes[0] = X86_64_SSE_CLASS;
3804       classes[1] = X86_64_SSEUP_CLASS;
3805       return 2;
3806     case DImode:
3807     case SImode:
3808     case HImode:
3809     case QImode:
3810     case CSImode:
3811     case CHImode:
3812     case CQImode:
3813       if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3814         classes[0] = X86_64_INTEGERSI_CLASS;
3815       else
3816         classes[0] = X86_64_INTEGER_CLASS;
3817       return 1;
3818     case CDImode:
3819     case TImode:
3820       classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3821       return 2;
3822     case CTImode:
3823       return 0;
3824     case SFmode:
3825       if (!(bit_offset % 64))
3826         classes[0] = X86_64_SSESF_CLASS;
3827       else
3828         classes[0] = X86_64_SSE_CLASS;
3829       return 1;
3830     case DFmode:
3831       classes[0] = X86_64_SSEDF_CLASS;
3832       return 1;
3833     case XFmode:
3834       classes[0] = X86_64_X87_CLASS;
3835       classes[1] = X86_64_X87UP_CLASS;
3836       return 2;
3837     case TFmode:
3838       classes[0] = X86_64_SSE_CLASS;
3839       classes[1] = X86_64_SSEUP_CLASS;
3840       return 2;
3841     case SCmode:
3842       classes[0] = X86_64_SSE_CLASS;
3843       return 1;
3844     case DCmode:
3845       classes[0] = X86_64_SSEDF_CLASS;
3846       classes[1] = X86_64_SSEDF_CLASS;
3847       return 2;
3848     case XCmode:
3849       classes[0] = X86_64_COMPLEX_X87_CLASS;
3850       return 1;
3851     case TCmode:
3852       /* This modes is larger than 16 bytes.  */
3853       return 0;
3854     case V4SFmode:
3855     case V4SImode:
3856     case V16QImode:
3857     case V8HImode:
3858     case V2DFmode:
3859     case V2DImode:
3860       classes[0] = X86_64_SSE_CLASS;
3861       classes[1] = X86_64_SSEUP_CLASS;
3862       return 2;
3863     case V2SFmode:
3864     case V2SImode:
3865     case V4HImode:
3866     case V8QImode:
3867       classes[0] = X86_64_SSE_CLASS;
3868       return 1;
3869     case BLKmode:
3870     case VOIDmode:
3871       return 0;
3872     default:
3873       gcc_assert (VECTOR_MODE_P (mode));
3874
3875       if (bytes > 16)
3876         return 0;
3877
3878       gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3879
3880       if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3881         classes[0] = X86_64_INTEGERSI_CLASS;
3882       else
3883         classes[0] = X86_64_INTEGER_CLASS;
3884       classes[1] = X86_64_INTEGER_CLASS;
3885       return 1 + (bytes > 8);
3886     }
3887 }
3888
3889 /* Examine the argument and return set number of register required in each
3890    class.  Return 0 iff parameter should be passed in memory.  */
3891 static int
3892 examine_argument (enum machine_mode mode, const_tree type, int in_return,
3893                   int *int_nregs, int *sse_nregs)
3894 {
3895   enum x86_64_reg_class regclass[MAX_CLASSES];
3896   int n = classify_argument (mode, type, regclass, 0);
3897
3898   *int_nregs = 0;
3899   *sse_nregs = 0;
3900   if (!n)
3901     return 0;
3902   for (n--; n >= 0; n--)
3903     switch (regclass[n])
3904       {
3905       case X86_64_INTEGER_CLASS:
3906       case X86_64_INTEGERSI_CLASS:
3907         (*int_nregs)++;
3908         break;
3909       case X86_64_SSE_CLASS:
3910       case X86_64_SSESF_CLASS:
3911       case X86_64_SSEDF_CLASS:
3912         (*sse_nregs)++;
3913         break;
3914       case X86_64_NO_CLASS:
3915       case X86_64_SSEUP_CLASS:
3916         break;
3917       case X86_64_X87_CLASS:
3918       case X86_64_X87UP_CLASS:
3919         if (!in_return)
3920           return 0;
3921         break;
3922       case X86_64_COMPLEX_X87_CLASS:
3923         return in_return ? 2 : 0;
3924       case X86_64_MEMORY_CLASS:
3925         gcc_unreachable ();
3926       }
3927   return 1;
3928 }
3929
3930 /* Construct container for the argument used by GCC interface.  See
3931    FUNCTION_ARG for the detailed description.  */
3932
3933 static rtx
3934 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3935                      const_tree type, int in_return, int nintregs, int nsseregs,
3936                      const int *intreg, int sse_regno)
3937 {
3938   /* The following variables hold the static issued_error state.  */
3939   static bool issued_sse_arg_error;
3940   static bool issued_sse_ret_error;
3941   static bool issued_x87_ret_error;
3942
3943   enum machine_mode tmpmode;
3944   int bytes =
3945     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3946   enum x86_64_reg_class regclass[MAX_CLASSES];
3947   int n;
3948   int i;
3949   int nexps = 0;
3950   int needed_sseregs, needed_intregs;
3951   rtx exp[MAX_CLASSES];
3952   rtx ret;
3953
3954   n = classify_argument (mode, type, regclass, 0);
3955   if (!n)
3956     return NULL;
3957   if (!examine_argument (mode, type, in_return, &needed_intregs,
3958                          &needed_sseregs))
3959     return NULL;
3960   if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3961     return NULL;
3962
3963   /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
3964      some less clueful developer tries to use floating-point anyway.  */
3965   if (needed_sseregs && !TARGET_SSE)
3966     {
3967       if (in_return)
3968         {
3969           if (!issued_sse_ret_error)
3970             {
3971               error ("SSE register return with SSE disabled");
3972               issued_sse_ret_error = true;
3973             }
3974         }
3975       else if (!issued_sse_arg_error)
3976         {
3977           error ("SSE register argument with SSE disabled");
3978           issued_sse_arg_error = true;
3979         }
3980       return NULL;
3981     }
3982
3983   /* Likewise, error if the ABI requires us to return values in the
3984      x87 registers and the user specified -mno-80387.  */
3985   if (!TARGET_80387 && in_return)
3986     for (i = 0; i < n; i++)
3987       if (regclass[i] == X86_64_X87_CLASS
3988           || regclass[i] == X86_64_X87UP_CLASS
3989           || regclass[i] == X86_64_COMPLEX_X87_CLASS)
3990         {
3991           if (!issued_x87_ret_error)
3992             {
3993               error ("x87 register return with x87 disabled");
3994               issued_x87_ret_error = true;
3995             }
3996           return NULL;
3997         }
3998
3999   /* First construct simple cases.  Avoid SCmode, since we want to use
4000      single register to pass this type.  */
4001   if (n == 1 && mode != SCmode)
4002     switch (regclass[0])
4003       {
4004       case X86_64_INTEGER_CLASS:
4005       case X86_64_INTEGERSI_CLASS:
4006         return gen_rtx_REG (mode, intreg[0]);
4007       case X86_64_SSE_CLASS:
4008       case X86_64_SSESF_CLASS:
4009       case X86_64_SSEDF_CLASS:
4010         return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
4011       case X86_64_X87_CLASS:
4012       case X86_64_COMPLEX_X87_CLASS:
4013         return gen_rtx_REG (mode, FIRST_STACK_REG);
4014       case X86_64_NO_CLASS:
4015         /* Zero sized array, struct or class.  */
4016         return NULL;
4017       default:
4018         gcc_unreachable ();
4019       }
4020   if (n == 2 && regclass[0] == X86_64_SSE_CLASS
4021       && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
4022     return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
4023
4024   if (n == 2
4025       && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
4026     return gen_rtx_REG (XFmode, FIRST_STACK_REG);
4027   if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
4028       && regclass[1] == X86_64_INTEGER_CLASS
4029       && (mode == CDImode || mode == TImode || mode == TFmode)
4030       && intreg[0] + 1 == intreg[1])
4031     return gen_rtx_REG (mode, intreg[0]);
4032
4033   /* Otherwise figure out the entries of the PARALLEL.  */
4034   for (i = 0; i < n; i++)
4035     {
4036       switch (regclass[i])
4037         {
4038           case X86_64_NO_CLASS:
4039             break;
4040           case X86_64_INTEGER_CLASS:
4041           case X86_64_INTEGERSI_CLASS:
4042             /* Merge TImodes on aligned occasions here too.  */
4043             if (i * 8 + 8 > bytes)
4044               tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
4045             else if (regclass[i] == X86_64_INTEGERSI_CLASS)
4046               tmpmode = SImode;
4047             else
4048               tmpmode = DImode;
4049             /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
4050             if (tmpmode == BLKmode)
4051               tmpmode = DImode;
4052             exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4053                                                gen_rtx_REG (tmpmode, *intreg),
4054                                                GEN_INT (i*8));
4055             intreg++;
4056             break;
4057           case X86_64_SSESF_CLASS:
4058             exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4059                                                gen_rtx_REG (SFmode,
4060                                                             SSE_REGNO (sse_regno)),
4061                                                GEN_INT (i*8));
4062             sse_regno++;
4063             break;
4064           case X86_64_SSEDF_CLASS:
4065             exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4066                                                gen_rtx_REG (DFmode,
4067                                                             SSE_REGNO (sse_regno)),
4068                                                GEN_INT (i*8));
4069             sse_regno++;
4070             break;
4071           case X86_64_SSE_CLASS:
4072             if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
4073               tmpmode = TImode;
4074             else
4075               tmpmode = DImode;
4076             exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4077                                                gen_rtx_REG (tmpmode,
4078                                                             SSE_REGNO (sse_regno)),
4079                                                GEN_INT (i*8));
4080             if (tmpmode == TImode)
4081               i++;
4082             sse_regno++;
4083             break;
4084           default:
4085             gcc_unreachable ();
4086         }
4087     }
4088
4089   /* Empty aligned struct, union or class.  */
4090   if (nexps == 0)
4091     return NULL;
4092
4093   ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4094   for (i = 0; i < nexps; i++)
4095     XVECEXP (ret, 0, i) = exp [i];
4096   return ret;
4097 }
4098
4099 /* Update the data in CUM to advance over an argument of mode MODE
4100    and data type TYPE.  (TYPE is null for libcalls where that information
4101    may not be available.)  */
4102
4103 static void
4104 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4105                          tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4106 {
4107   switch (mode)
4108     {
4109     default:
4110       break;
4111
4112     case BLKmode:
4113       if (bytes < 0)
4114         break;
4115       /* FALLTHRU */
4116
4117     case DImode:
4118     case SImode:
4119     case HImode:
4120     case QImode:
4121       cum->words += words;
4122       cum->nregs -= words;
4123       cum->regno += words;
4124
4125       if (cum->nregs <= 0)
4126         {
4127           cum->nregs = 0;
4128           cum->regno = 0;
4129         }
4130       break;
4131
4132     case DFmode:
4133       if (cum->float_in_sse < 2)
4134         break;
4135     case SFmode:
4136       if (cum->float_in_sse < 1)
4137         break;
4138       /* FALLTHRU */
4139
4140     case TImode:
4141     case V16QImode:
4142     case V8HImode:
4143     case V4SImode:
4144     case V2DImode:
4145     case V4SFmode:
4146     case V2DFmode:
4147       if (!type || !AGGREGATE_TYPE_P (type))
4148         {
4149           cum->sse_words += words;
4150           cum->sse_nregs -= 1;
4151           cum->sse_regno += 1;
4152           if (cum->sse_nregs <= 0)
4153             {
4154               cum->sse_nregs = 0;
4155               cum->sse_regno = 0;
4156             }
4157         }
4158       break;
4159
4160     case V8QImode:
4161     case V4HImode:
4162     case V2SImode:
4163     case V2SFmode:
4164       if (!type || !AGGREGATE_TYPE_P (type))
4165         {
4166           cum->mmx_words += words;
4167           cum->mmx_nregs -= 1;
4168           cum->mmx_regno += 1;
4169           if (cum->mmx_nregs <= 0)
4170             {
4171               cum->mmx_nregs = 0;
4172               cum->mmx_regno = 0;
4173             }
4174         }
4175       break;
4176     }
4177 }
4178
4179 static void
4180 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4181                          tree type, HOST_WIDE_INT words)
4182 {
4183   int int_nregs, sse_nregs;
4184
4185   if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4186     cum->words += words;
4187   else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4188     {
4189       cum->nregs -= int_nregs;
4190       cum->sse_nregs -= sse_nregs;
4191       cum->regno += int_nregs;
4192       cum->sse_regno += sse_nregs;
4193     }
4194   else
4195     cum->words += words;
4196 }
4197
4198 static void
4199 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4200                             HOST_WIDE_INT words)
4201 {
4202   /* Otherwise, this should be passed indirect.  */
4203   gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4204
4205   cum->words += words;
4206   if (cum->nregs > 0)
4207     {
4208       cum->nregs -= 1;
4209       cum->regno += 1;
4210     }
4211 }
4212
4213 void
4214 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4215                       tree type, int named ATTRIBUTE_UNUSED)
4216 {
4217   HOST_WIDE_INT bytes, words;
4218
4219   if (mode == BLKmode)
4220     bytes = int_size_in_bytes (type);
4221   else
4222     bytes = GET_MODE_SIZE (mode);
4223   words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4224
4225   if (type)
4226     mode = type_natural_mode (type);
4227
4228   if (TARGET_64BIT_MS_ABI)
4229     function_arg_advance_ms_64 (cum, bytes, words);
4230   else if (TARGET_64BIT)
4231     function_arg_advance_64 (cum, mode, type, words);
4232   else
4233     function_arg_advance_32 (cum, mode, type, bytes, words);
4234 }
4235
4236 /* Define where to put the arguments to a function.
4237    Value is zero to push the argument on the stack,
4238    or a hard register in which to store the argument.
4239
4240    MODE is the argument's machine mode.
4241    TYPE is the data type of the argument (as a tree).
4242     This is null for libcalls where that information may
4243     not be available.
4244    CUM is a variable of type CUMULATIVE_ARGS which gives info about
4245     the preceding args and about the function being called.
4246    NAMED is nonzero if this argument is a named parameter
4247     (otherwise it is an extra parameter matching an ellipsis).  */
4248
4249 static rtx
4250 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4251                  enum machine_mode orig_mode, tree type,
4252                  HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4253 {
4254   static bool warnedsse, warnedmmx;
4255
4256   /* Avoid the AL settings for the Unix64 ABI.  */
4257   if (mode == VOIDmode)
4258     return constm1_rtx;
4259
4260   switch (mode)
4261     {
4262     default:
4263       break;
4264
4265     case BLKmode:
4266       if (bytes < 0)
4267         break;
4268       /* FALLTHRU */
4269     case DImode:
4270     case SImode:
4271     case HImode:
4272     case QImode:
4273       if (words <= cum->nregs)
4274         {
4275           int regno = cum->regno;
4276
4277           /* Fastcall allocates the first two DWORD (SImode) or
4278             smaller arguments to ECX and EDX if it isn't an
4279             aggregate type .  */
4280           if (cum->fastcall)
4281             {
4282               if (mode == BLKmode
4283                   || mode == DImode
4284                   || (type && AGGREGATE_TYPE_P (type)))
4285                 break;
4286
4287               /* ECX not EAX is the first allocated register.  */
4288               if (regno == AX_REG)
4289                 regno = CX_REG;
4290             }
4291           return gen_rtx_REG (mode, regno);
4292         }
4293       break;
4294
4295     case DFmode:
4296       if (cum->float_in_sse < 2)
4297         break;
4298     case SFmode:
4299       if (cum->float_in_sse < 1)
4300         break;
4301       /* FALLTHRU */
4302     case TImode:
4303     case V16QImode:
4304     case V8HImode:
4305     case V4SImode:
4306     case V2DImode:
4307     case V4SFmode:
4308     case V2DFmode:
4309       if (!type || !AGGREGATE_TYPE_P (type))
4310         {
4311           if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4312             {
4313               warnedsse = true;
4314               warning (0, "SSE vector argument without SSE enabled "
4315                        "changes the ABI");
4316             }
4317           if (cum->sse_nregs)
4318             return gen_reg_or_parallel (mode, orig_mode,
4319                                         cum->sse_regno + FIRST_SSE_REG);
4320         }
4321       break;
4322
4323     case V8QImode:
4324     case V4HImode:
4325     case V2SImode:
4326     case V2SFmode:
4327       if (!type || !AGGREGATE_TYPE_P (type))
4328         {
4329           if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4330             {
4331               warnedmmx = true;
4332               warning (0, "MMX vector argument without MMX enabled "
4333                        "changes the ABI");
4334             }
4335           if (cum->mmx_nregs)
4336             return gen_reg_or_parallel (mode, orig_mode,
4337                                         cum->mmx_regno + FIRST_MMX_REG);
4338         }
4339       break;
4340     }
4341
4342   return NULL_RTX;
4343 }
4344
4345 static rtx
4346 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4347                  enum machine_mode orig_mode, tree type)
4348 {
4349   /* Handle a hidden AL argument containing number of registers
4350      for varargs x86-64 functions.  */
4351   if (mode == VOIDmode)
4352     return GEN_INT (cum->maybe_vaarg
4353                     ? (cum->sse_nregs < 0
4354                        ? SSE_REGPARM_MAX
4355                        : cum->sse_regno)
4356                     : -1);
4357
4358   return construct_container (mode, orig_mode, type, 0, cum->nregs,
4359                               cum->sse_nregs,
4360                               &x86_64_int_parameter_registers [cum->regno],
4361                               cum->sse_regno);
4362 }
4363
4364 static rtx
4365 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4366                     enum machine_mode orig_mode, int named)
4367 {
4368   unsigned int regno;
4369
4370   /* Avoid the AL settings for the Unix64 ABI.  */
4371   if (mode == VOIDmode)
4372     return constm1_rtx;
4373
4374   /* If we've run out of registers, it goes on the stack.  */
4375   if (cum->nregs == 0)
4376     return NULL_RTX;
4377
4378   regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4379
4380   /* Only floating point modes are passed in anything but integer regs.  */
4381   if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4382     {
4383       if (named)
4384         regno = cum->regno + FIRST_SSE_REG;
4385       else
4386         {
4387           rtx t1, t2;
4388
4389           /* Unnamed floating parameters are passed in both the
4390              SSE and integer registers.  */
4391           t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4392           t2 = gen_rtx_REG (mode, regno);
4393           t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4394           t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4395           return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4396         }
4397     }
4398
4399   return gen_reg_or_parallel (mode, orig_mode, regno);
4400 }
4401
4402 rtx
4403 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4404               tree type, int named)
4405 {
4406   enum machine_mode mode = omode;
4407   HOST_WIDE_INT bytes, words;
4408
4409   if (mode == BLKmode)
4410     bytes = int_size_in_bytes (type);
4411   else
4412     bytes = GET_MODE_SIZE (mode);
4413   words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4414
4415   /* To simplify the code below, represent vector types with a vector mode
4416      even if MMX/SSE are not active.  */
4417   if (type && TREE_CODE (type) == VECTOR_TYPE)
4418     mode = type_natural_mode (type);
4419
4420   if (TARGET_64BIT_MS_ABI)
4421     return function_arg_ms_64 (cum, mode, omode, named);
4422   else if (TARGET_64BIT)
4423     return function_arg_64 (cum, mode, omode, type);
4424   else
4425     return function_arg_32 (cum, mode, omode, type, bytes, words);
4426 }
4427
4428 /* A C expression that indicates when an argument must be passed by
4429    reference.  If nonzero for an argument, a copy of that argument is
4430    made in memory and a pointer to the argument is passed instead of
4431    the argument itself.  The pointer is passed in whatever way is
4432    appropriate for passing a pointer to that type.  */
4433
4434 static bool
4435 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4436                         enum machine_mode mode ATTRIBUTE_UNUSED,
4437                         const_tree type, bool named ATTRIBUTE_UNUSED)
4438 {
4439   if (TARGET_64BIT_MS_ABI)
4440     {
4441       if (type)
4442         {
4443           /* Arrays are passed by reference.  */
4444           if (TREE_CODE (type) == ARRAY_TYPE)
4445             return true;
4446
4447           if (AGGREGATE_TYPE_P (type))
4448             {
4449               /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4450                  are passed by reference.  */
4451               int el2 = exact_log2 (int_size_in_bytes (type));
4452               return !(el2 >= 0 && el2 <= 3);
4453             }
4454         }
4455
4456       /* __m128 is passed by reference.  */
4457       /* ??? How to handle complex?  For now treat them as structs,
4458          and pass them by reference if they're too large.  */
4459       if (GET_MODE_SIZE (mode) > 8)
4460         return true;
4461     }
4462   else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4463     return 1;
4464
4465   return 0;
4466 }
4467
4468 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4469    ABI.  Only called if TARGET_SSE.  */
4470 static bool
4471 contains_128bit_aligned_vector_p (tree type)
4472 {
4473   enum machine_mode mode = TYPE_MODE (type);
4474   if (SSE_REG_MODE_P (mode)
4475       && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4476     return true;
4477   if (TYPE_ALIGN (type) < 128)
4478     return false;
4479
4480   if (AGGREGATE_TYPE_P (type))
4481     {
4482       /* Walk the aggregates recursively.  */
4483       switch (TREE_CODE (type))
4484         {
4485         case RECORD_TYPE:
4486         case UNION_TYPE:
4487         case QUAL_UNION_TYPE:
4488           {
4489             tree field;
4490
4491             /* Walk all the structure fields.  */
4492             for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4493               {
4494                 if (TREE_CODE (field) == FIELD_DECL
4495                     && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4496                   return true;
4497               }
4498             break;
4499           }
4500
4501         case ARRAY_TYPE:
4502           /* Just for use if some languages passes arrays by value.  */
4503           if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4504             return true;
4505           break;
4506
4507         default:
4508           gcc_unreachable ();
4509         }
4510     }
4511   return false;
4512 }
4513
4514 /* Gives the alignment boundary, in bits, of an argument with the
4515    specified mode and type.  */
4516
4517 int
4518 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4519 {
4520   int align;
4521   if (type)
4522     align = TYPE_ALIGN (type);
4523   else
4524     align = GET_MODE_ALIGNMENT (mode);
4525   if (align < PARM_BOUNDARY)
4526     align = PARM_BOUNDARY;
4527   if (!TARGET_64BIT)
4528     {
4529       /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
4530          make an exception for SSE modes since these require 128bit
4531          alignment.
4532
4533          The handling here differs from field_alignment.  ICC aligns MMX
4534          arguments to 4 byte boundaries, while structure fields are aligned
4535          to 8 byte boundaries.  */
4536       if (!TARGET_SSE)
4537         align = PARM_BOUNDARY;
4538       else if (!type)
4539         {
4540           if (!SSE_REG_MODE_P (mode))
4541             align = PARM_BOUNDARY;
4542         }
4543       else
4544         {
4545           if (!contains_128bit_aligned_vector_p (type))
4546             align = PARM_BOUNDARY;
4547         }
4548     }
4549   if (align > 128)
4550     align = 128;
4551   return align;
4552 }
4553
4554 /* Return true if N is a possible register number of function value.  */
4555
4556 bool
4557 ix86_function_value_regno_p (int regno)
4558 {
4559   switch (regno)
4560     {
4561     case 0:
4562       return true;
4563
4564     case FIRST_FLOAT_REG:
4565       if (TARGET_64BIT_MS_ABI)
4566         return false;
4567       return TARGET_FLOAT_RETURNS_IN_80387;
4568
4569     case FIRST_SSE_REG:
4570       return TARGET_SSE;
4571
4572     case FIRST_MMX_REG:
4573       if (TARGET_MACHO || TARGET_64BIT)
4574         return false;
4575       return TARGET_MMX;
4576     }
4577
4578   return false;
4579 }
4580
4581 /* Define how to find the value returned by a function.
4582    VALTYPE is the data type of the value (as a tree).
4583    If the precise function being called is known, FUNC is its FUNCTION_DECL;
4584    otherwise, FUNC is 0.  */
4585
4586 static rtx
4587 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4588                    const_tree fntype, const_tree fn)
4589 {
4590   unsigned int regno;
4591
4592   /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4593      we normally prevent this case when mmx is not available.  However
4594      some ABIs may require the result to be returned like DImode.  */
4595   if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4596     regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4597
4598   /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
4599      we prevent this case when sse is not available.  However some ABIs
4600      may require the result to be returned like integer TImode.  */
4601   else if (mode == TImode
4602            || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4603     regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4604
4605   /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387).  */
4606   else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4607     regno = FIRST_FLOAT_REG;
4608   else
4609     /* Most things go in %eax.  */
4610     regno = AX_REG;
4611
4612   /* Override FP return register with %xmm0 for local functions when
4613      SSE math is enabled or for functions with sseregparm attribute.  */
4614   if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4615     {
4616       int sse_level = ix86_function_sseregparm (fntype, fn, false);
4617       if ((sse_level >= 1 && mode == SFmode)
4618           || (sse_level == 2 && mode == DFmode))
4619         regno = FIRST_SSE_REG;
4620     }
4621
4622   return gen_rtx_REG (orig_mode, regno);
4623 }
4624
4625 static rtx
4626 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4627                    const_tree valtype)
4628 {
4629   rtx ret;
4630
4631   /* Handle libcalls, which don't provide a type node.  */
4632   if (valtype == NULL)
4633     {
4634       switch (mode)
4635         {
4636         case SFmode:
4637         case SCmode:
4638         case DFmode:
4639         case DCmode:
4640         case TFmode:
4641         case SDmode:
4642         case DDmode:
4643         case TDmode:
4644           return gen_rtx_REG (mode, FIRST_SSE_REG);
4645         case XFmode:
4646         case XCmode:
4647           return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4648         case TCmode:
4649           return NULL;
4650         default:
4651           return gen_rtx_REG (mode, AX_REG);
4652         }
4653     }
4654
4655   ret = construct_container (mode, orig_mode, valtype, 1,
4656                              REGPARM_MAX, SSE_REGPARM_MAX,
4657                              x86_64_int_return_registers, 0);
4658
4659   /* For zero sized structures, construct_container returns NULL, but we
4660      need to keep rest of compiler happy by returning meaningful value.  */
4661   if (!ret)
4662     ret = gen_rtx_REG (orig_mode, AX_REG);
4663
4664   return ret;
4665 }
4666
4667 static rtx
4668 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4669 {
4670   unsigned int regno = AX_REG;
4671
4672   if (TARGET_SSE)
4673     {
4674       if (mode == SFmode || mode == DFmode)
4675         regno = FIRST_SSE_REG;
4676       else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4677         regno = FIRST_SSE_REG;
4678     }
4679
4680   return gen_rtx_REG (orig_mode, regno);
4681 }
4682
4683 static rtx
4684 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4685                        enum machine_mode orig_mode, enum machine_mode mode)
4686 {
4687   const_tree fn, fntype;
4688
4689   fn = NULL_TREE;
4690   if (fntype_or_decl && DECL_P (fntype_or_decl))
4691     fn = fntype_or_decl;
4692   fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4693
4694   if (TARGET_64BIT_MS_ABI)
4695     return function_value_ms_64 (orig_mode, mode);
4696   else if (TARGET_64BIT)
4697     return function_value_64 (orig_mode, mode, valtype);
4698   else
4699     return function_value_32 (orig_mode, mode, fntype, fn);
4700 }
4701
4702 static rtx
4703 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4704                      bool outgoing ATTRIBUTE_UNUSED)
4705 {
4706   enum machine_mode mode, orig_mode;
4707
4708   orig_mode = TYPE_MODE (valtype);
4709   mode = type_natural_mode (valtype);
4710   return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4711 }
4712
4713 rtx
4714 ix86_libcall_value (enum machine_mode mode)
4715 {
4716   return ix86_function_value_1 (NULL, NULL, mode, mode);
4717 }
4718
4719 /* Return true iff type is returned in memory.  */
4720
4721 static int
4722 return_in_memory_32 (const_tree type, enum machine_mode mode)
4723 {
4724   HOST_WIDE_INT size;
4725
4726   if (mode == BLKmode)
4727     return 1;
4728
4729   size = int_size_in_bytes (type);
4730
4731   if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4732     return 0;
4733
4734   if (VECTOR_MODE_P (mode) || mode == TImode)
4735     {
4736       /* User-created vectors small enough to fit in EAX.  */
4737       if (size < 8)
4738         return 0;
4739
4740       /* MMX/3dNow values are returned in MM0,
4741          except when it doesn't exits.  */
4742       if (size == 8)
4743         return (TARGET_MMX ? 0 : 1);
4744
4745       /* SSE values are returned in XMM0, except when it doesn't exist.  */
4746       if (size == 16)
4747         return (TARGET_SSE ? 0 : 1);
4748     }
4749
4750   if (mode == XFmode)
4751     return 0;
4752
4753   if (mode == TDmode)
4754     return 1;
4755
4756   if (size > 12)
4757     return 1;
4758   return 0;
4759 }
4760
4761 static int
4762 return_in_memory_64 (const_tree type, enum machine_mode mode)
4763 {
4764   int needed_intregs, needed_sseregs;
4765   return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4766 }
4767
4768 static int
4769 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
4770 {
4771   HOST_WIDE_INT size = int_size_in_bytes (type);
4772
4773   /* __m128 and friends are returned in xmm0.  */
4774   if (!COMPLEX_MODE_P (mode) && size == 16 && VECTOR_MODE_P (mode))
4775     return 0;
4776
4777   /* Otherwise, the size must be exactly in [1248]. But not for complex. */
4778   return (size != 1 && size != 2 && size != 4 && size != 8)
4779          || COMPLEX_MODE_P (mode);
4780 }
4781
4782 int
4783 ix86_return_in_memory (const_tree type)
4784 {
4785   const enum machine_mode mode = type_natural_mode (type);
4786
4787   if (TARGET_64BIT_MS_ABI)
4788     return return_in_memory_ms_64 (type, mode);
4789   else if (TARGET_64BIT)
4790     return return_in_memory_64 (type, mode);
4791   else
4792     return return_in_memory_32 (type, mode);
4793 }
4794
4795 /* Return false iff TYPE is returned in memory.  This version is used
4796    on Solaris 10.  It is similar to the generic ix86_return_in_memory,
4797    but differs notably in that when MMX is available, 8-byte vectors
4798    are returned in memory, rather than in MMX registers.  */
4799
4800 int
4801 ix86_sol10_return_in_memory (const_tree type)
4802 {
4803   int size;
4804   enum machine_mode mode = type_natural_mode (type);
4805
4806   if (TARGET_64BIT)
4807     return return_in_memory_64 (type, mode);
4808
4809   if (mode == BLKmode)
4810     return 1;
4811
4812   size = int_size_in_bytes (type);
4813
4814   if (VECTOR_MODE_P (mode))
4815     {
4816       /* Return in memory only if MMX registers *are* available.  This
4817          seems backwards, but it is consistent with the existing
4818          Solaris x86 ABI.  */
4819       if (size == 8)
4820         return TARGET_MMX;
4821       if (size == 16)
4822         return !TARGET_SSE;
4823     }
4824   else if (mode == TImode)
4825     return !TARGET_SSE;
4826   else if (mode == XFmode)
4827     return 0;
4828
4829   return size > 12;
4830 }
4831
4832 /* When returning SSE vector types, we have a choice of either
4833      (1) being abi incompatible with a -march switch, or
4834      (2) generating an error.
4835    Given no good solution, I think the safest thing is one warning.
4836    The user won't be able to use -Werror, but....
4837
4838    Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4839    called in response to actually generating a caller or callee that
4840    uses such a type.  As opposed to RETURN_IN_MEMORY, which is called
4841    via aggregate_value_p for general type probing from tree-ssa.  */
4842
4843 static rtx
4844 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4845 {
4846   static bool warnedsse, warnedmmx;
4847
4848   if (!TARGET_64BIT && type)
4849     {
4850       /* Look at the return type of the function, not the function type.  */
4851       enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4852
4853       if (!TARGET_SSE && !warnedsse)
4854         {
4855           if (mode == TImode
4856               || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4857             {
4858               warnedsse = true;
4859               warning (0, "SSE vector return without SSE enabled "
4860                        "changes the ABI");
4861             }
4862         }
4863
4864       if (!TARGET_MMX && !warnedmmx)
4865         {
4866           if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4867             {
4868               warnedmmx = true;
4869               warning (0, "MMX vector return without MMX enabled "
4870                        "changes the ABI");
4871             }
4872         }
4873     }
4874
4875   return NULL;
4876 }
4877
4878 \f
4879 /* Create the va_list data type.  */
4880
4881 static tree
4882 ix86_build_builtin_va_list (void)
4883 {
4884   tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4885
4886   /* For i386 we use plain pointer to argument area.  */
4887   if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4888     return build_pointer_type (char_type_node);
4889
4890   record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4891   type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4892
4893   f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4894                       unsigned_type_node);
4895   f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4896                       unsigned_type_node);
4897   f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4898                       ptr_type_node);
4899   f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4900                       ptr_type_node);
4901
4902   va_list_gpr_counter_field = f_gpr;
4903   va_list_fpr_counter_field = f_fpr;
4904
4905   DECL_FIELD_CONTEXT (f_gpr) = record;
4906   DECL_FIELD_CONTEXT (f_fpr) = record;
4907   DECL_FIELD_CONTEXT (f_ovf) = record;
4908   DECL_FIELD_CONTEXT (f_sav) = record;
4909
4910   TREE_CHAIN (record) = type_decl;
4911   TYPE_NAME (record) = type_decl;
4912   TYPE_FIELDS (record) = f_gpr;
4913   TREE_CHAIN (f_gpr) = f_fpr;
4914   TREE_CHAIN (f_fpr) = f_ovf;
4915   TREE_CHAIN (f_ovf) = f_sav;
4916
4917   layout_type (record);
4918
4919   /* The correct type is an array type of one element.  */
4920   return build_array_type (record, build_index_type (size_zero_node));
4921 }
4922
4923 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
4924
4925 static void
4926 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4927 {
4928   rtx save_area, mem;
4929   rtx label;
4930   rtx label_ref;
4931   rtx tmp_reg;
4932   rtx nsse_reg;
4933   alias_set_type set;
4934   int i;
4935
4936   if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4937     return;
4938
4939   /* Indicate to allocate space on the stack for varargs save area.  */
4940   ix86_save_varrargs_registers = 1;
4941   /* We need 16-byte stack alignment to save SSE registers.  If user
4942      asked for lower preferred_stack_boundary, lets just hope that he knows
4943      what he is doing and won't varargs SSE values.
4944
4945      We also may end up assuming that only 64bit values are stored in SSE
4946      register let some floating point program work.  */
4947   if (ix86_preferred_stack_boundary >= 128)
4948     cfun->stack_alignment_needed = 128;
4949
4950   save_area = frame_pointer_rtx;
4951   set = get_varargs_alias_set ();
4952
4953   for (i = cum->regno;
4954        i < ix86_regparm
4955        && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4956        i++)
4957     {
4958       mem = gen_rtx_MEM (Pmode,
4959                          plus_constant (save_area, i * UNITS_PER_WORD));
4960       MEM_NOTRAP_P (mem) = 1;
4961       set_mem_alias_set (mem, set);
4962       emit_move_insn (mem, gen_rtx_REG (Pmode,
4963                                         x86_64_int_parameter_registers[i]));
4964     }
4965
4966   if (cum->sse_nregs && cfun->va_list_fpr_size)
4967     {
4968       /* Now emit code to save SSE registers.  The AX parameter contains number
4969          of SSE parameter registers used to call this function.  We use
4970          sse_prologue_save insn template that produces computed jump across
4971          SSE saves.  We need some preparation work to get this working.  */
4972
4973       label = gen_label_rtx ();
4974       label_ref = gen_rtx_LABEL_REF (Pmode, label);
4975
4976       /* Compute address to jump to :
4977          label - 5*eax + nnamed_sse_arguments*5  */
4978       tmp_reg = gen_reg_rtx (Pmode);
4979       nsse_reg = gen_reg_rtx (Pmode);
4980       emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
4981       emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4982                               gen_rtx_MULT (Pmode, nsse_reg,
4983                                             GEN_INT (4))));
4984       if (cum->sse_regno)
4985         emit_move_insn
4986           (nsse_reg,
4987            gen_rtx_CONST (DImode,
4988                           gen_rtx_PLUS (DImode,
4989                                         label_ref,
4990                                         GEN_INT (cum->sse_regno * 4))));
4991       else
4992         emit_move_insn (nsse_reg, label_ref);
4993       emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4994
4995       /* Compute address of memory block we save into.  We always use pointer
4996          pointing 127 bytes after first byte to store - this is needed to keep
4997          instruction size limited by 4 bytes.  */
4998       tmp_reg = gen_reg_rtx (Pmode);
4999       emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5000                               plus_constant (save_area,
5001                                              8 * REGPARM_MAX + 127)));
5002       mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
5003       MEM_NOTRAP_P (mem) = 1;
5004       set_mem_alias_set (mem, set);
5005       set_mem_align (mem, BITS_PER_WORD);
5006
5007       /* And finally do the dirty job!  */
5008       emit_insn (gen_sse_prologue_save (mem, nsse_reg,
5009                                         GEN_INT (cum->sse_regno), label));
5010     }
5011 }
5012
5013 static void
5014 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
5015 {
5016   alias_set_type set = get_varargs_alias_set ();
5017   int i;
5018
5019   for (i = cum->regno; i < REGPARM_MAX; i++)
5020     {
5021       rtx reg, mem;
5022
5023       mem = gen_rtx_MEM (Pmode,
5024                          plus_constant (virtual_incoming_args_rtx,
5025                                         i * UNITS_PER_WORD));
5026       MEM_NOTRAP_P (mem) = 1;
5027       set_mem_alias_set (mem, set);
5028
5029       reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
5030       emit_move_insn (mem, reg);
5031     }
5032 }
5033
5034 static void
5035 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5036                              tree type, int *pretend_size ATTRIBUTE_UNUSED,
5037                              int no_rtl)
5038 {
5039   CUMULATIVE_ARGS next_cum;
5040   tree fntype;
5041
5042   /* This argument doesn't appear to be used anymore.  Which is good,
5043      because the old code here didn't suppress rtl generation.  */
5044   gcc_assert (!no_rtl);
5045
5046   if (!TARGET_64BIT)
5047     return;
5048
5049   fntype = TREE_TYPE (current_function_decl);
5050
5051   /* For varargs, we do not want to skip the dummy va_dcl argument.
5052      For stdargs, we do want to skip the last named argument.  */
5053   next_cum = *cum;
5054   if (stdarg_p (fntype))
5055     function_arg_advance (&next_cum, mode, type, 1);
5056
5057   if (TARGET_64BIT_MS_ABI)
5058     setup_incoming_varargs_ms_64 (&next_cum);
5059   else
5060     setup_incoming_varargs_64 (&next_cum);
5061 }
5062
5063 /* Implement va_start.  */
5064
5065 static void
5066 ix86_va_start (tree valist, rtx nextarg)
5067 {
5068   HOST_WIDE_INT words, n_gpr, n_fpr;
5069   tree f_gpr, f_fpr, f_ovf, f_sav;
5070   tree gpr, fpr, ovf, sav, t;
5071   tree type;
5072
5073   /* Only 64bit target needs something special.  */
5074   if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5075     {
5076       std_expand_builtin_va_start (valist, nextarg);
5077       return;
5078     }
5079
5080   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5081   f_fpr = TREE_CHAIN (f_gpr);
5082   f_ovf = TREE_CHAIN (f_fpr);
5083   f_sav = TREE_CHAIN (f_ovf);
5084
5085   valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
5086   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5087   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5088   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5089   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5090
5091   /* Count number of gp and fp argument registers used.  */
5092   words = current_function_args_info.words;
5093   n_gpr = current_function_args_info.regno;
5094   n_fpr = current_function_args_info.sse_regno;
5095
5096   if (cfun->va_list_gpr_size)
5097     {
5098       type = TREE_TYPE (gpr);
5099       t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
5100                   build_int_cst (type, n_gpr * 8));
5101       TREE_SIDE_EFFECTS (t) = 1;
5102       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5103     }
5104
5105   if (cfun->va_list_fpr_size)
5106     {
5107       type = TREE_TYPE (fpr);
5108       t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
5109                   build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
5110       TREE_SIDE_EFFECTS (t) = 1;
5111       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5112     }
5113
5114   /* Find the overflow area.  */
5115   type = TREE_TYPE (ovf);
5116   t = make_tree (type, virtual_incoming_args_rtx);
5117   if (words != 0)
5118     t = build2 (POINTER_PLUS_EXPR, type, t,
5119                 size_int (words * UNITS_PER_WORD));
5120   t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
5121   TREE_SIDE_EFFECTS (t) = 1;
5122   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5123
5124   if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5125     {
5126       /* Find the register save area.
5127          Prologue of the function save it right above stack frame.  */
5128       type = TREE_TYPE (sav);
5129       t = make_tree (type, frame_pointer_rtx);
5130       t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
5131       TREE_SIDE_EFFECTS (t) = 1;
5132       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5133     }
5134 }
5135
5136 /* Implement va_arg.  */
5137
5138 static tree
5139 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
5140 {
5141   static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5142   tree f_gpr, f_fpr, f_ovf, f_sav;
5143   tree gpr, fpr, ovf, sav, t;
5144   int size, rsize;
5145   tree lab_false, lab_over = NULL_TREE;
5146   tree addr, t2;
5147   rtx container;
5148   int indirect_p = 0;
5149   tree ptrtype;
5150   enum machine_mode nat_mode;
5151
5152   /* Only 64bit target needs something special.  */
5153   if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5154     return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5155
5156   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5157   f_fpr = TREE_CHAIN (f_gpr);
5158   f_ovf = TREE_CHAIN (f_fpr);
5159   f_sav = TREE_CHAIN (f_ovf);
5160
5161   valist = build_va_arg_indirect_ref (valist);
5162   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5163   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5164   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5165   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5166
5167   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5168   if (indirect_p)
5169     type = build_pointer_type (type);
5170   size = int_size_in_bytes (type);
5171   rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5172
5173   nat_mode = type_natural_mode (type);
5174   container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
5175                                    REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
5176
5177   /* Pull the value out of the saved registers.  */
5178
5179   addr = create_tmp_var (ptr_type_node, "addr");
5180   DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5181
5182   if (container)
5183     {
5184       int needed_intregs, needed_sseregs;
5185       bool need_temp;
5186       tree int_addr, sse_addr;
5187
5188       lab_false = create_artificial_label ();
5189       lab_over = create_artificial_label ();
5190
5191       examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5192
5193       need_temp = (!REG_P (container)
5194                    && ((needed_intregs && TYPE_ALIGN (type) > 64)
5195                        || TYPE_ALIGN (type) > 128));
5196
5197       /* In case we are passing structure, verify that it is consecutive block
5198          on the register save area.  If not we need to do moves.  */
5199       if (!need_temp && !REG_P (container))
5200         {
5201           /* Verify that all registers are strictly consecutive  */
5202           if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5203             {
5204               int i;
5205
5206               for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5207                 {
5208                   rtx slot = XVECEXP (container, 0, i);
5209                   if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5210                       || INTVAL (XEXP (slot, 1)) != i * 16)
5211                     need_temp = 1;
5212                 }
5213             }
5214           else
5215             {
5216               int i;
5217
5218               for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5219                 {
5220                   rtx slot = XVECEXP (container, 0, i);
5221                   if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5222                       || INTVAL (XEXP (slot, 1)) != i * 8)
5223                     need_temp = 1;
5224                 }
5225             }
5226         }
5227       if (!need_temp)
5228         {
5229           int_addr = addr;
5230           sse_addr = addr;
5231         }
5232       else
5233         {
5234           int_addr = create_tmp_var (ptr_type_node, "int_addr");
5235           DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5236           sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5237           DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5238         }
5239
5240       /* First ensure that we fit completely in registers.  */
5241       if (needed_intregs)
5242         {
5243           t = build_int_cst (TREE_TYPE (gpr),
5244                              (REGPARM_MAX - needed_intregs + 1) * 8);
5245           t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5246           t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5247           t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5248           gimplify_and_add (t, pre_p);
5249         }
5250       if (needed_sseregs)
5251         {
5252           t = build_int_cst (TREE_TYPE (fpr),
5253                              (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5254                              + REGPARM_MAX * 8);
5255           t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5256           t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5257           t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5258           gimplify_and_add (t, pre_p);
5259         }
5260
5261       /* Compute index to start of area used for integer regs.  */
5262       if (needed_intregs)
5263         {
5264           /* int_addr = gpr + sav; */
5265           t = fold_convert (sizetype, gpr);
5266           t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5267           t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5268           gimplify_and_add (t, pre_p);
5269         }
5270       if (needed_sseregs)
5271         {
5272           /* sse_addr = fpr + sav; */
5273           t = fold_convert (sizetype, fpr);
5274           t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5275           t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5276           gimplify_and_add (t, pre_p);
5277         }
5278       if (need_temp)
5279         {
5280           int i;
5281           tree temp = create_tmp_var (type, "va_arg_tmp");
5282
5283           /* addr = &temp; */
5284           t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5285           t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5286           gimplify_and_add (t, pre_p);
5287
5288           for (i = 0; i < XVECLEN (container, 0); i++)
5289             {
5290               rtx slot = XVECEXP (container, 0, i);
5291               rtx reg = XEXP (slot, 0);
5292               enum machine_mode mode = GET_MODE (reg);
5293               tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5294               tree addr_type = build_pointer_type (piece_type);
5295               tree src_addr, src;
5296               int src_offset;
5297               tree dest_addr, dest;
5298
5299               if (SSE_REGNO_P (REGNO (reg)))
5300                 {
5301                   src_addr = sse_addr;
5302                   src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5303                 }
5304               else
5305                 {
5306                   src_addr = int_addr;
5307                   src_offset = REGNO (reg) * 8;
5308                 }
5309               src_addr = fold_convert (addr_type, src_addr);
5310               src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5311                                       size_int (src_offset));
5312               src = build_va_arg_indirect_ref (src_addr);
5313
5314               dest_addr = fold_convert (addr_type, addr);
5315               dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5316                                        size_int (INTVAL (XEXP (slot, 1))));
5317               dest = build_va_arg_indirect_ref (dest_addr);
5318
5319               t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5320               gimplify_and_add (t, pre_p);
5321             }
5322         }
5323
5324       if (needed_intregs)
5325         {
5326           t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5327                       build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5328           t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5329           gimplify_and_add (t, pre_p);
5330         }
5331       if (needed_sseregs)
5332         {
5333           t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5334                       build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5335           t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5336           gimplify_and_add (t, pre_p);
5337         }
5338
5339       t = build1 (GOTO_EXPR, void_type_node, lab_over);
5340       gimplify_and_add (t, pre_p);
5341
5342       t = build1 (LABEL_EXPR, void_type_node, lab_false);
5343       append_to_statement_list (t, pre_p);
5344     }
5345
5346   /* ... otherwise out of the overflow area.  */
5347
5348   /* Care for on-stack alignment if needed.  */
5349   if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5350       || integer_zerop (TYPE_SIZE (type)))
5351     t = ovf;
5352  else
5353     {
5354       HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5355       t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5356                   size_int (align - 1));
5357       t = fold_convert (sizetype, t);
5358       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5359                   size_int (-align));
5360       t = fold_convert (TREE_TYPE (ovf), t);
5361     }
5362   gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5363
5364   t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5365   gimplify_and_add (t2, pre_p);
5366
5367   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5368               size_int (rsize * UNITS_PER_WORD));
5369   t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5370   gimplify_and_add (t, pre_p);
5371
5372   if (container)
5373     {
5374       t = build1 (LABEL_EXPR, void_type_node, lab_over);
5375       append_to_statement_list (t, pre_p);
5376     }
5377
5378   ptrtype = build_pointer_type (type);
5379   addr = fold_convert (ptrtype, addr);
5380
5381   if (indirect_p)
5382     addr = build_va_arg_indirect_ref (addr);
5383   return build_va_arg_indirect_ref (addr);
5384 }
5385 \f
5386 /* Return nonzero if OPNUM's MEM should be matched
5387    in movabs* patterns.  */
5388
5389 int
5390 ix86_check_movabs (rtx insn, int opnum)
5391 {
5392   rtx set, mem;
5393
5394   set = PATTERN (insn);
5395   if (GET_CODE (set) == PARALLEL)
5396     set = XVECEXP (set, 0, 0);
5397   gcc_assert (GET_CODE (set) == SET);
5398   mem = XEXP (set, opnum);
5399   while (GET_CODE (mem) == SUBREG)
5400     mem = SUBREG_REG (mem);
5401   gcc_assert (MEM_P (mem));
5402   return (volatile_ok || !MEM_VOLATILE_P (mem));
5403 }
5404 \f
5405 /* Initialize the table of extra 80387 mathematical constants.  */
5406
5407 static void
5408 init_ext_80387_constants (void)
5409 {
5410   static const char * cst[5] =
5411   {
5412     "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
5413     "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
5414     "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
5415     "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
5416     "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
5417   };
5418   int i;
5419
5420   for (i = 0; i < 5; i++)
5421     {
5422       real_from_string (&ext_80387_constants_table[i], cst[i]);
5423       /* Ensure each constant is rounded to XFmode precision.  */
5424       real_convert (&ext_80387_constants_table[i],
5425                     XFmode, &ext_80387_constants_table[i]);
5426     }
5427
5428   ext_80387_constants_init = 1;
5429 }
5430
5431 /* Return true if the constant is something that can be loaded with
5432    a special instruction.  */
5433
5434 int
5435 standard_80387_constant_p (rtx x)
5436 {
5437   enum machine_mode mode = GET_MODE (x);
5438
5439   REAL_VALUE_TYPE r;
5440
5441   if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5442     return -1;
5443
5444   if (x == CONST0_RTX (mode))
5445     return 1;
5446   if (x == CONST1_RTX (mode))
5447     return 2;
5448
5449   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5450
5451   /* For XFmode constants, try to find a special 80387 instruction when
5452      optimizing for size or on those CPUs that benefit from them.  */
5453   if (mode == XFmode
5454       && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5455     {
5456       int i;
5457
5458       if (! ext_80387_constants_init)
5459         init_ext_80387_constants ();
5460
5461       for (i = 0; i < 5; i++)
5462         if (real_identical (&r, &ext_80387_constants_table[i]))
5463           return i + 3;
5464     }
5465
5466   /* Load of the constant -0.0 or -1.0 will be split as
5467      fldz;fchs or fld1;fchs sequence.  */
5468   if (real_isnegzero (&r))
5469     return 8;
5470   if (real_identical (&r, &dconstm1))
5471     return 9;
5472
5473   return 0;
5474 }
5475
5476 /* Return the opcode of the special instruction to be used to load
5477    the constant X.  */
5478
5479 const char *
5480 standard_80387_constant_opcode (rtx x)
5481 {
5482   switch (standard_80387_constant_p (x))
5483     {
5484     case 1:
5485       return "fldz";
5486     case 2:
5487       return "fld1";
5488     case 3:
5489       return "fldlg2";
5490     case 4:
5491       return "fldln2";
5492     case 5:
5493       return "fldl2e";
5494     case 6:
5495       return "fldl2t";
5496     case 7:
5497       return "fldpi";
5498     case 8:
5499     case 9:
5500       return "#";
5501     default:
5502       gcc_unreachable ();
5503     }
5504 }
5505
5506 /* Return the CONST_DOUBLE representing the 80387 constant that is
5507    loaded by the specified special instruction.  The argument IDX
5508    matches the return value from standard_80387_constant_p.  */
5509
5510 rtx
5511 standard_80387_constant_rtx (int idx)
5512 {
5513   int i;
5514
5515   if (! ext_80387_constants_init)
5516     init_ext_80387_constants ();
5517
5518   switch (idx)
5519     {
5520     case 3:
5521     case 4:
5522     case 5:
5523     case 6:
5524     case 7:
5525       i = idx - 3;
5526       break;
5527
5528     default:
5529       gcc_unreachable ();
5530     }
5531
5532   return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5533                                        XFmode);
5534 }
5535
5536 /* Return 1 if mode is a valid mode for sse.  */
5537 static int
5538 standard_sse_mode_p (enum machine_mode mode)
5539 {
5540   switch (mode)
5541     {
5542     case V16QImode:
5543     case V8HImode:
5544     case V4SImode:
5545     case V2DImode:
5546     case V4SFmode:
5547     case V2DFmode:
5548       return 1;
5549
5550     default:
5551       return 0;
5552     }
5553 }
5554
5555 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5556  */
5557 int
5558 standard_sse_constant_p (rtx x)
5559 {
5560   enum machine_mode mode = GET_MODE (x);
5561
5562   if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5563     return 1;
5564   if (vector_all_ones_operand (x, mode)
5565       && standard_sse_mode_p (mode))
5566     return TARGET_SSE2 ? 2 : -1;
5567
5568   return 0;
5569 }
5570
5571 /* Return the opcode of the special instruction to be used to load
5572    the constant X.  */
5573
5574 const char *
5575 standard_sse_constant_opcode (rtx insn, rtx x)
5576 {
5577   switch (standard_sse_constant_p (x))
5578     {
5579     case 1:
5580       if (get_attr_mode (insn) == MODE_V4SF)
5581         return "xorps\t%0, %0";
5582       else if (get_attr_mode (insn) == MODE_V2DF)
5583         return "xorpd\t%0, %0";
5584       else
5585         return "pxor\t%0, %0";
5586     case 2:
5587       return "pcmpeqd\t%0, %0";
5588     }
5589   gcc_unreachable ();
5590 }
5591
5592 /* Returns 1 if OP contains a symbol reference */
5593
5594 int
5595 symbolic_reference_mentioned_p (rtx op)
5596 {
5597   const char *fmt;
5598   int i;
5599
5600   if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5601     return 1;
5602
5603   fmt = GET_RTX_FORMAT (GET_CODE (op));
5604   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5605     {
5606       if (fmt[i] == 'E')
5607         {
5608           int j;
5609
5610           for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5611             if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5612               return 1;
5613         }
5614
5615       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5616         return 1;
5617     }
5618
5619   return 0;
5620 }
5621
5622 /* Return 1 if it is appropriate to emit `ret' instructions in the
5623    body of a function.  Do this only if the epilogue is simple, needing a
5624    couple of insns.  Prior to reloading, we can't tell how many registers
5625    must be saved, so return 0 then.  Return 0 if there is no frame
5626    marker to de-allocate.  */
5627
5628 int
5629 ix86_can_use_return_insn_p (void)
5630 {
5631   struct ix86_frame frame;
5632
5633   if (! reload_completed || frame_pointer_needed)
5634     return 0;
5635
5636   /* Don't allow more than 32 pop, since that's all we can do
5637      with one instruction.  */
5638   if (current_function_pops_args
5639       && current_function_args_size >= 32768)
5640     return 0;
5641
5642   ix86_compute_frame_layout (&frame);
5643   return frame.to_allocate == 0 && frame.nregs == 0;
5644 }
5645 \f
5646 /* Value should be nonzero if functions must have frame pointers.
5647    Zero means the frame pointer need not be set up (and parms may
5648    be accessed via the stack pointer) in functions that seem suitable.  */
5649
5650 int
5651 ix86_frame_pointer_required (void)
5652 {
5653   /* If we accessed previous frames, then the generated code expects
5654      to be able to access the saved ebp value in our frame.  */
5655   if (cfun->machine->accesses_prev_frame)
5656     return 1;
5657
5658   /* Several x86 os'es need a frame pointer for other reasons,
5659      usually pertaining to setjmp.  */
5660   if (SUBTARGET_FRAME_POINTER_REQUIRED)
5661     return 1;
5662
5663   /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5664      the frame pointer by default.  Turn it back on now if we've not
5665      got a leaf function.  */
5666   if (TARGET_OMIT_LEAF_FRAME_POINTER
5667       && (!current_function_is_leaf
5668           || ix86_current_function_calls_tls_descriptor))
5669     return 1;
5670
5671   if (current_function_profile)
5672     return 1;
5673
5674   return 0;
5675 }
5676
5677 /* Record that the current function accesses previous call frames.  */
5678
5679 void
5680 ix86_setup_frame_addresses (void)
5681 {
5682   cfun->machine->accesses_prev_frame = 1;
5683 }
5684 \f
5685 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5686 # define USE_HIDDEN_LINKONCE 1
5687 #else
5688 # define USE_HIDDEN_LINKONCE 0
5689 #endif
5690
5691 static int pic_labels_used;
5692
5693 /* Fills in the label name that should be used for a pc thunk for
5694    the given register.  */
5695
5696 static void
5697 get_pc_thunk_name (char name[32], unsigned int regno)
5698 {
5699   gcc_assert (!TARGET_64BIT);
5700
5701   if (USE_HIDDEN_LINKONCE)
5702     sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5703   else
5704     ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5705 }
5706
5707
5708 /* This function generates code for -fpic that loads %ebx with
5709    the return address of the caller and then returns.  */
5710
5711 void
5712 ix86_file_end (void)
5713 {
5714   rtx xops[2];
5715   int regno;
5716
5717   for (regno = 0; regno < 8; ++regno)
5718     {
5719       char name[32];
5720
5721       if (! ((pic_labels_used >> regno) & 1))
5722         continue;
5723
5724       get_pc_thunk_name (name, regno);
5725
5726 #if TARGET_MACHO
5727       if (TARGET_MACHO)
5728         {
5729           switch_to_section (darwin_sections[text_coal_section]);
5730           fputs ("\t.weak_definition\t", asm_out_file);
5731           assemble_name (asm_out_file, name);
5732           fputs ("\n\t.private_extern\t", asm_out_file);
5733           assemble_name (asm_out_file, name);
5734           fputs ("\n", asm_out_file);
5735           ASM_OUTPUT_LABEL (asm_out_file, name);
5736         }
5737       else
5738 #endif
5739       if (USE_HIDDEN_LINKONCE)
5740         {
5741           tree decl;
5742
5743           decl = build_decl (FUNCTION_DECL, get_identifier (name),
5744                              error_mark_node);
5745           TREE_PUBLIC (decl) = 1;
5746           TREE_STATIC (decl) = 1;
5747           DECL_ONE_ONLY (decl) = 1;
5748
5749           (*targetm.asm_out.unique_section) (decl, 0);
5750           switch_to_section (get_named_section (decl, NULL, 0));
5751
5752           (*targetm.asm_out.globalize_label) (asm_out_file, name);
5753           fputs ("\t.hidden\t", asm_out_file);
5754           assemble_name (asm_out_file, name);
5755           fputc ('\n', asm_out_file);
5756           ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5757         }
5758       else
5759         {
5760           switch_to_section (text_section);
5761           ASM_OUTPUT_LABEL (asm_out_file, name);
5762         }
5763
5764       xops[0] = gen_rtx_REG (SImode, regno);
5765       xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5766       output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5767       output_asm_insn ("ret", xops);
5768     }
5769
5770   if (NEED_INDICATE_EXEC_STACK)
5771     file_end_indicate_exec_stack ();
5772 }
5773
5774 /* Emit code for the SET_GOT patterns.  */
5775
5776 const char *
5777 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5778 {
5779   rtx xops[3];
5780
5781   xops[0] = dest;
5782
5783   if (TARGET_VXWORKS_RTP && flag_pic)
5784     {
5785       /* Load (*VXWORKS_GOTT_BASE) into the PIC register.  */
5786       xops[2] = gen_rtx_MEM (Pmode,
5787                              gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5788       output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5789
5790       /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5791          Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5792          an unadorned address.  */
5793       xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5794       SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5795       output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5796       return "";
5797     }
5798
5799   xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5800
5801   if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5802     {
5803       xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5804
5805       if (!flag_pic)
5806         output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5807       else
5808         output_asm_insn ("call\t%a2", xops);
5809
5810 #if TARGET_MACHO
5811       /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
5812          is what will be referenced by the Mach-O PIC subsystem.  */
5813       if (!label)
5814         ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5815 #endif
5816
5817       (*targetm.asm_out.internal_label) (asm_out_file, "L",
5818                                  CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5819
5820       if (flag_pic)
5821         output_asm_insn ("pop{l}\t%0", xops);
5822     }
5823   else
5824     {
5825       char name[32];
5826       get_pc_thunk_name (name, REGNO (dest));
5827       pic_labels_used |= 1 << REGNO (dest);
5828
5829       xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5830       xops[2] = gen_rtx_MEM (QImode, xops[2]);
5831       output_asm_insn ("call\t%X2", xops);
5832       /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
5833          is what will be referenced by the Mach-O PIC subsystem.  */
5834 #if TARGET_MACHO
5835       if (!label)
5836         ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5837       else
5838         targetm.asm_out.internal_label (asm_out_file, "L",
5839                                            CODE_LABEL_NUMBER (label));
5840 #endif
5841     }
5842
5843   if (TARGET_MACHO)
5844     return "";
5845
5846   if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5847     output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5848   else
5849     output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5850
5851   return "";
5852 }
5853
5854 /* Generate an "push" pattern for input ARG.  */
5855
5856 static rtx
5857 gen_push (rtx arg)
5858 {
5859   return gen_rtx_SET (VOIDmode,
5860                       gen_rtx_MEM (Pmode,
5861                                    gen_rtx_PRE_DEC (Pmode,
5862                                                     stack_pointer_rtx)),
5863                       arg);
5864 }
5865
5866 /* Return >= 0 if there is an unused call-clobbered register available
5867    for the entire function.  */
5868
5869 static unsigned int
5870 ix86_select_alt_pic_regnum (void)
5871 {
5872   if (current_function_is_leaf && !current_function_profile
5873       && !ix86_current_function_calls_tls_descriptor)
5874     {
5875       int i;
5876       for (i = 2; i >= 0; --i)
5877         if (!df_regs_ever_live_p (i))
5878           return i;
5879     }
5880
5881   return INVALID_REGNUM;
5882 }
5883
5884 /* Return 1 if we need to save REGNO.  */
5885 static int
5886 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5887 {
5888   if (pic_offset_table_rtx
5889       && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5890       && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5891           || current_function_profile
5892           || current_function_calls_eh_return
5893           || current_function_uses_const_pool))
5894     {
5895       if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5896         return 0;
5897       return 1;
5898     }
5899
5900   if (current_function_calls_eh_return && maybe_eh_return)
5901     {
5902       unsigned i;
5903       for (i = 0; ; i++)
5904         {
5905           unsigned test = EH_RETURN_DATA_REGNO (i);
5906           if (test == INVALID_REGNUM)
5907             break;
5908           if (test == regno)
5909             return 1;
5910         }
5911     }
5912
5913   if (cfun->machine->force_align_arg_pointer
5914       && regno == REGNO (cfun->machine->force_align_arg_pointer))
5915     return 1;
5916
5917   return (df_regs_ever_live_p (regno)
5918           && !call_used_regs[regno]
5919           && !fixed_regs[regno]
5920           && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5921 }
5922
5923 /* Return number of registers to be saved on the stack.  */
5924
5925 static int
5926 ix86_nsaved_regs (void)
5927 {
5928   int nregs = 0;
5929   int regno;
5930
5931   for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5932     if (ix86_save_reg (regno, true))
5933       nregs++;
5934   return nregs;
5935 }
5936
5937 /* Return the offset between two registers, one to be eliminated, and the other
5938    its replacement, at the start of a routine.  */
5939
5940 HOST_WIDE_INT
5941 ix86_initial_elimination_offset (int from, int to)
5942 {
5943   struct ix86_frame frame;
5944   ix86_compute_frame_layout (&frame);
5945
5946   if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5947     return frame.hard_frame_pointer_offset;
5948   else if (from == FRAME_POINTER_REGNUM
5949            && to == HARD_FRAME_POINTER_REGNUM)
5950     return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5951   else
5952     {
5953       gcc_assert (to == STACK_POINTER_REGNUM);
5954
5955       if (from == ARG_POINTER_REGNUM)
5956         return frame.stack_pointer_offset;
5957
5958       gcc_assert (from == FRAME_POINTER_REGNUM);
5959       return frame.stack_pointer_offset - frame.frame_pointer_offset;
5960     }
5961 }
5962
5963 /* Fill structure ix86_frame about frame of currently computed function.  */
5964
5965 static void
5966 ix86_compute_frame_layout (struct ix86_frame *frame)
5967 {
5968   HOST_WIDE_INT total_size;
5969   unsigned int stack_alignment_needed;
5970   HOST_WIDE_INT offset;
5971   unsigned int preferred_alignment;
5972   HOST_WIDE_INT size = get_frame_size ();
5973
5974   frame->nregs = ix86_nsaved_regs ();
5975   total_size = size;
5976
5977   stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5978   preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5979
5980   /* During reload iteration the amount of registers saved can change.
5981      Recompute the value as needed.  Do not recompute when amount of registers
5982      didn't change as reload does multiple calls to the function and does not
5983      expect the decision to change within single iteration.  */
5984   if (!optimize_size
5985       && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5986     {
5987       int count = frame->nregs;
5988
5989       cfun->machine->use_fast_prologue_epilogue_nregs = count;
5990       /* The fast prologue uses move instead of push to save registers.  This
5991          is significantly longer, but also executes faster as modern hardware
5992          can execute the moves in parallel, but can't do that for push/pop.
5993
5994          Be careful about choosing what prologue to emit:  When function takes
5995          many instructions to execute we may use slow version as well as in
5996          case function is known to be outside hot spot (this is known with
5997          feedback only).  Weight the size of function by number of registers
5998          to save as it is cheap to use one or two push instructions but very
5999          slow to use many of them.  */
6000       if (count)
6001         count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6002       if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
6003           || (flag_branch_probabilities
6004               && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
6005         cfun->machine->use_fast_prologue_epilogue = false;
6006       else
6007         cfun->machine->use_fast_prologue_epilogue
6008            = !expensive_function_p (count);
6009     }
6010   if (TARGET_PROLOGUE_USING_MOVE
6011       && cfun->machine->use_fast_prologue_epilogue)
6012     frame->save_regs_using_mov = true;
6013   else
6014     frame->save_regs_using_mov = false;
6015
6016
6017   /* Skip return address and saved base pointer.  */
6018   offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
6019
6020   frame->hard_frame_pointer_offset = offset;
6021
6022   /* Do some sanity checking of stack_alignment_needed and
6023      preferred_alignment, since i386 port is the only using those features
6024      that may break easily.  */
6025
6026   gcc_assert (!size || stack_alignment_needed);
6027   gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6028   gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6029   gcc_assert (stack_alignment_needed
6030               <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6031
6032   if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
6033     stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
6034
6035   /* Register save area */
6036   offset += frame->nregs * UNITS_PER_WORD;
6037
6038   /* Va-arg area */
6039   if (ix86_save_varrargs_registers)
6040     {
6041       offset += X86_64_VARARGS_SIZE;
6042       frame->va_arg_size = X86_64_VARARGS_SIZE;
6043     }
6044   else
6045     frame->va_arg_size = 0;
6046
6047   /* Align start of frame for local function.  */
6048   frame->padding1 = ((offset + stack_alignment_needed - 1)
6049                      & -stack_alignment_needed) - offset;
6050
6051   offset += frame->padding1;
6052
6053   /* Frame pointer points here.  */
6054   frame->frame_pointer_offset = offset;
6055
6056   offset += size;
6057
6058   /* Add outgoing arguments area.  Can be skipped if we eliminated
6059      all the function calls as dead code.
6060      Skipping is however impossible when function calls alloca.  Alloca
6061      expander assumes that last current_function_outgoing_args_size
6062      of stack frame are unused.  */
6063   if (ACCUMULATE_OUTGOING_ARGS
6064       && (!current_function_is_leaf || current_function_calls_alloca
6065           || ix86_current_function_calls_tls_descriptor))
6066     {
6067       offset += current_function_outgoing_args_size;
6068       frame->outgoing_arguments_size = current_function_outgoing_args_size;
6069     }
6070   else
6071     frame->outgoing_arguments_size = 0;
6072
6073   /* Align stack boundary.  Only needed if we're calling another function
6074      or using alloca.  */
6075   if (!current_function_is_leaf || current_function_calls_alloca
6076       || ix86_current_function_calls_tls_descriptor)
6077     frame->padding2 = ((offset + preferred_alignment - 1)
6078                        & -preferred_alignment) - offset;
6079   else
6080     frame->padding2 = 0;
6081
6082   offset += frame->padding2;
6083
6084   /* We've reached end of stack frame.  */
6085   frame->stack_pointer_offset = offset;
6086
6087   /* Size prologue needs to allocate.  */
6088   frame->to_allocate =
6089     (size + frame->padding1 + frame->padding2
6090      + frame->outgoing_arguments_size + frame->va_arg_size);
6091
6092   if ((!frame->to_allocate && frame->nregs <= 1)
6093       || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
6094     frame->save_regs_using_mov = false;
6095
6096   if (TARGET_RED_ZONE && current_function_sp_is_unchanging
6097       && current_function_is_leaf
6098       && !ix86_current_function_calls_tls_descriptor)
6099     {
6100       frame->red_zone_size = frame->to_allocate;
6101       if (frame->save_regs_using_mov)
6102         frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6103       if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6104         frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6105     }
6106   else
6107     frame->red_zone_size = 0;
6108   frame->to_allocate -= frame->red_zone_size;
6109   frame->stack_pointer_offset -= frame->red_zone_size;
6110 #if 0
6111   fprintf (stderr, "\n");
6112   fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6113   fprintf (stderr, "size: %ld\n", (long)size);
6114   fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6115   fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6116   fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6117   fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6118   fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6119   fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6120   fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6121   fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6122            (long)frame->hard_frame_pointer_offset);
6123   fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6124   fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
6125   fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
6126   fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
6127 #endif
6128 }
6129
6130 /* Emit code to save registers in the prologue.  */
6131
6132 static void
6133 ix86_emit_save_regs (void)
6134 {
6135   unsigned int regno;
6136   rtx insn;
6137
6138   for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
6139     if (ix86_save_reg (regno, true))
6140       {
6141         insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
6142         RTX_FRAME_RELATED_P (insn) = 1;
6143       }
6144 }
6145
6146 /* Emit code to save registers using MOV insns.  First register
6147    is restored from POINTER + OFFSET.  */
6148 static void
6149 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
6150 {
6151   unsigned int regno;
6152   rtx insn;
6153
6154   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6155     if (ix86_save_reg (regno, true))
6156       {
6157         insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6158                                                Pmode, offset),
6159                                gen_rtx_REG (Pmode, regno));
6160         RTX_FRAME_RELATED_P (insn) = 1;
6161         offset += UNITS_PER_WORD;
6162       }
6163 }
6164
6165 /* Expand prologue or epilogue stack adjustment.
6166    The pattern exist to put a dependency on all ebp-based memory accesses.
6167    STYLE should be negative if instructions should be marked as frame related,
6168    zero if %r11 register is live and cannot be freely used and positive
6169    otherwise.  */
6170
6171 static void
6172 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6173 {
6174   rtx insn;
6175
6176   if (! TARGET_64BIT)
6177     insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6178   else if (x86_64_immediate_operand (offset, DImode))
6179     insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6180   else
6181     {
6182       rtx r11;
6183       /* r11 is used by indirect sibcall return as well, set before the
6184          epilogue and used after the epilogue.  ATM indirect sibcall
6185          shouldn't be used together with huge frame sizes in one
6186          function because of the frame_size check in sibcall.c.  */
6187       gcc_assert (style);
6188       r11 = gen_rtx_REG (DImode, R11_REG);
6189       insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6190       if (style < 0)
6191         RTX_FRAME_RELATED_P (insn) = 1;
6192       insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6193                                                                offset));
6194     }
6195   if (style < 0)
6196     RTX_FRAME_RELATED_P (insn) = 1;
6197 }
6198
6199 /* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
6200
6201 static rtx
6202 ix86_internal_arg_pointer (void)
6203 {
6204   bool has_force_align_arg_pointer =
6205     (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6206                             TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6207   if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6208        && DECL_NAME (current_function_decl)
6209        && MAIN_NAME_P (DECL_NAME (current_function_decl))
6210        && DECL_FILE_SCOPE_P (current_function_decl))
6211       || ix86_force_align_arg_pointer
6212       || has_force_align_arg_pointer)
6213     {
6214       /* Nested functions can't realign the stack due to a register
6215          conflict.  */
6216       if (DECL_CONTEXT (current_function_decl)
6217           && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6218         {
6219           if (ix86_force_align_arg_pointer)
6220             warning (0, "-mstackrealign ignored for nested functions");
6221           if (has_force_align_arg_pointer)
6222             error ("%s not supported for nested functions",
6223                    ix86_force_align_arg_pointer_string);
6224           return virtual_incoming_args_rtx;
6225         }
6226       cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
6227       return copy_to_reg (cfun->machine->force_align_arg_pointer);
6228     }
6229   else
6230     return virtual_incoming_args_rtx;
6231 }
6232
6233 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6234    This is called from dwarf2out.c to emit call frame instructions
6235    for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6236 static void
6237 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6238 {
6239   rtx unspec = SET_SRC (pattern);
6240   gcc_assert (GET_CODE (unspec) == UNSPEC);
6241
6242   switch (index)
6243     {
6244     case UNSPEC_REG_SAVE:
6245       dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6246                               SET_DEST (pattern));
6247       break;
6248     case UNSPEC_DEF_CFA:
6249       dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6250                          INTVAL (XVECEXP (unspec, 0, 0)));
6251       break;
6252     default:
6253       gcc_unreachable ();
6254     }
6255 }
6256
6257 /* Expand the prologue into a bunch of separate insns.  */
6258
6259 void
6260 ix86_expand_prologue (void)
6261 {
6262   rtx insn;
6263   bool pic_reg_used;
6264   struct ix86_frame frame;
6265   HOST_WIDE_INT allocate;
6266
6267   ix86_compute_frame_layout (&frame);
6268
6269   if (cfun->machine->force_align_arg_pointer)
6270     {
6271       rtx x, y;
6272
6273       /* Grab the argument pointer.  */
6274       x = plus_constant (stack_pointer_rtx, 4);
6275       y = cfun->machine->force_align_arg_pointer;
6276       insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6277       RTX_FRAME_RELATED_P (insn) = 1;
6278
6279       /* The unwind info consists of two parts: install the fafp as the cfa,
6280          and record the fafp as the "save register" of the stack pointer.
6281          The later is there in order that the unwinder can see where it
6282          should restore the stack pointer across the and insn.  */
6283       x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6284       x = gen_rtx_SET (VOIDmode, y, x);
6285       RTX_FRAME_RELATED_P (x) = 1;
6286       y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6287                           UNSPEC_REG_SAVE);
6288       y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6289       RTX_FRAME_RELATED_P (y) = 1;
6290       x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6291       x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6292       REG_NOTES (insn) = x;
6293
6294       /* Align the stack.  */
6295       emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6296                              GEN_INT (-16)));
6297
6298       /* And here we cheat like madmen with the unwind info.  We force the
6299          cfa register back to sp+4, which is exactly what it was at the
6300          start of the function.  Re-pushing the return address results in
6301          the return at the same spot relative to the cfa, and thus is
6302          correct wrt the unwind info.  */
6303       x = cfun->machine->force_align_arg_pointer;
6304       x = gen_frame_mem (Pmode, plus_constant (x, -4));
6305       insn = emit_insn (gen_push (x));
6306       RTX_FRAME_RELATED_P (insn) = 1;
6307
6308       x = GEN_INT (4);
6309       x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6310       x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6311       x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6312       REG_NOTES (insn) = x;
6313     }
6314
6315   /* Note: AT&T enter does NOT have reversed args.  Enter is probably
6316      slower on all targets.  Also sdb doesn't like it.  */
6317
6318   if (frame_pointer_needed)
6319     {
6320       insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6321       RTX_FRAME_RELATED_P (insn) = 1;
6322
6323       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6324       RTX_FRAME_RELATED_P (insn) = 1;
6325     }
6326
6327   allocate = frame.to_allocate;
6328
6329   if (!frame.save_regs_using_mov)
6330     ix86_emit_save_regs ();
6331   else
6332     allocate += frame.nregs * UNITS_PER_WORD;
6333
6334   /* When using red zone we may start register saving before allocating
6335      the stack frame saving one cycle of the prologue. However I will
6336      avoid doing this if I am going to have to probe the stack since
6337      at least on x86_64 the stack probe can turn into a call that clobbers
6338      a red zone location */
6339   if (TARGET_RED_ZONE && frame.save_regs_using_mov
6340       && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
6341     ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6342                                    : stack_pointer_rtx,
6343                                    -frame.nregs * UNITS_PER_WORD);
6344
6345   if (allocate == 0)
6346     ;
6347   else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6348     pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6349                                GEN_INT (-allocate), -1);
6350   else
6351     {
6352       /* Only valid for Win32.  */
6353       rtx eax = gen_rtx_REG (Pmode, AX_REG);
6354       bool eax_live;
6355       rtx t;
6356
6357       gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6358
6359       if (TARGET_64BIT_MS_ABI)
6360         eax_live = false;
6361       else
6362         eax_live = ix86_eax_live_at_start_p ();
6363
6364       if (eax_live)
6365         {
6366           emit_insn (gen_push (eax));
6367           allocate -= UNITS_PER_WORD;
6368         }
6369
6370       emit_move_insn (eax, GEN_INT (allocate));
6371
6372       if (TARGET_64BIT)
6373         insn = gen_allocate_stack_worker_64 (eax);
6374       else
6375         insn = gen_allocate_stack_worker_32 (eax);
6376       insn = emit_insn (insn);
6377       RTX_FRAME_RELATED_P (insn) = 1;
6378       t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6379       t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6380       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6381                                             t, REG_NOTES (insn));
6382
6383       if (eax_live)
6384         {
6385           if (frame_pointer_needed)
6386             t = plus_constant (hard_frame_pointer_rtx,
6387                                allocate
6388                                - frame.to_allocate
6389                                - frame.nregs * UNITS_PER_WORD);
6390           else
6391             t = plus_constant (stack_pointer_rtx, allocate);
6392           emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6393         }
6394     }
6395
6396   if (frame.save_regs_using_mov
6397       && !(TARGET_RED_ZONE
6398          && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
6399     {
6400       if (!frame_pointer_needed || !frame.to_allocate)
6401         ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6402       else
6403         ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6404                                        -frame.nregs * UNITS_PER_WORD);
6405     }
6406
6407   pic_reg_used = false;
6408   if (pic_offset_table_rtx
6409       && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6410           || current_function_profile))
6411     {
6412       unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6413
6414       if (alt_pic_reg_used != INVALID_REGNUM)
6415         SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6416
6417       pic_reg_used = true;
6418     }
6419
6420   if (pic_reg_used)
6421     {
6422       if (TARGET_64BIT)
6423         {
6424           if (ix86_cmodel == CM_LARGE_PIC)
6425             {
6426               rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
6427               rtx label = gen_label_rtx ();
6428               emit_label (label);
6429               LABEL_PRESERVE_P (label) = 1;
6430               gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6431               insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6432               insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6433               insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6434                                             pic_offset_table_rtx, tmp_reg));
6435             }
6436           else
6437             insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6438         }
6439       else
6440         insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6441     }
6442
6443   /* Prevent function calls from being scheduled before the call to mcount.
6444      In the pic_reg_used case, make sure that the got load isn't deleted.  */
6445   if (current_function_profile)
6446     {
6447       if (pic_reg_used)
6448         emit_insn (gen_prologue_use (pic_offset_table_rtx));
6449       emit_insn (gen_blockage ());
6450     }
6451 }
6452
6453 /* Emit code to restore saved registers using MOV insns.  First register
6454    is restored from POINTER + OFFSET.  */
6455 static void
6456 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6457                                   int maybe_eh_return)
6458 {
6459   int regno;
6460   rtx base_address = gen_rtx_MEM (Pmode, pointer);
6461
6462   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6463     if (ix86_save_reg (regno, maybe_eh_return))
6464       {
6465         /* Ensure that adjust_address won't be forced to produce pointer
6466            out of range allowed by x86-64 instruction set.  */
6467         if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6468           {
6469             rtx r11;
6470
6471             r11 = gen_rtx_REG (DImode, R11_REG);
6472             emit_move_insn (r11, GEN_INT (offset));
6473             emit_insn (gen_adddi3 (r11, r11, pointer));
6474             base_address = gen_rtx_MEM (Pmode, r11);
6475             offset = 0;
6476           }
6477         emit_move_insn (gen_rtx_REG (Pmode, regno),
6478                         adjust_address (base_address, Pmode, offset));
6479         offset += UNITS_PER_WORD;
6480       }
6481 }
6482
6483 /* Restore function stack, frame, and registers.  */
6484
6485 void
6486 ix86_expand_epilogue (int style)
6487 {
6488   int regno;
6489   int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6490   struct ix86_frame frame;
6491   HOST_WIDE_INT offset;
6492
6493   ix86_compute_frame_layout (&frame);
6494
6495   /* Calculate start of saved registers relative to ebp.  Special care
6496      must be taken for the normal return case of a function using
6497      eh_return: the eax and edx registers are marked as saved, but not
6498      restored along this path.  */
6499   offset = frame.nregs;
6500   if (current_function_calls_eh_return && style != 2)
6501     offset -= 2;
6502   offset *= -UNITS_PER_WORD;
6503
6504   /* If we're only restoring one register and sp is not valid then
6505      using a move instruction to restore the register since it's
6506      less work than reloading sp and popping the register.
6507
6508      The default code result in stack adjustment using add/lea instruction,
6509      while this code results in LEAVE instruction (or discrete equivalent),
6510      so it is profitable in some other cases as well.  Especially when there
6511      are no registers to restore.  We also use this code when TARGET_USE_LEAVE
6512      and there is exactly one register to pop. This heuristic may need some
6513      tuning in future.  */
6514   if ((!sp_valid && frame.nregs <= 1)
6515       || (TARGET_EPILOGUE_USING_MOVE
6516           && cfun->machine->use_fast_prologue_epilogue
6517           && (frame.nregs > 1 || frame.to_allocate))
6518       || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6519       || (frame_pointer_needed && TARGET_USE_LEAVE
6520           && cfun->machine->use_fast_prologue_epilogue
6521           && frame.nregs == 1)
6522       || current_function_calls_eh_return)
6523     {
6524       /* Restore registers.  We can use ebp or esp to address the memory
6525          locations.  If both are available, default to ebp, since offsets
6526          are known to be small.  Only exception is esp pointing directly to the
6527          end of block of saved registers, where we may simplify addressing
6528          mode.  */
6529
6530       if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6531         ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6532                                           frame.to_allocate, style == 2);
6533       else
6534         ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6535                                           offset, style == 2);
6536
6537       /* eh_return epilogues need %ecx added to the stack pointer.  */
6538       if (style == 2)
6539         {
6540           rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6541
6542           if (frame_pointer_needed)
6543             {
6544               tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6545               tmp = plus_constant (tmp, UNITS_PER_WORD);
6546               emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6547
6548               tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6549               emit_move_insn (hard_frame_pointer_rtx, tmp);
6550
6551               pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6552                                          const0_rtx, style);
6553             }
6554           else
6555             {
6556               tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6557               tmp = plus_constant (tmp, (frame.to_allocate
6558                                          + frame.nregs * UNITS_PER_WORD));
6559               emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6560             }
6561         }
6562       else if (!frame_pointer_needed)
6563         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6564                                    GEN_INT (frame.to_allocate
6565                                             + frame.nregs * UNITS_PER_WORD),
6566                                    style);
6567       /* If not an i386, mov & pop is faster than "leave".  */
6568       else if (TARGET_USE_LEAVE || optimize_size
6569                || !cfun->machine->use_fast_prologue_epilogue)
6570         emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6571       else
6572         {
6573           pro_epilogue_adjust_stack (stack_pointer_rtx,
6574                                      hard_frame_pointer_rtx,
6575                                      const0_rtx, style);
6576           if (TARGET_64BIT)
6577             emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6578           else
6579             emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6580         }
6581     }
6582   else
6583     {
6584       /* First step is to deallocate the stack frame so that we can
6585          pop the registers.  */
6586       if (!sp_valid)
6587         {
6588           gcc_assert (frame_pointer_needed);
6589           pro_epilogue_adjust_stack (stack_pointer_rtx,
6590                                      hard_frame_pointer_rtx,
6591                                      GEN_INT (offset), style);
6592         }
6593       else if (frame.to_allocate)
6594         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6595                                    GEN_INT (frame.to_allocate), style);
6596
6597       for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6598         if (ix86_save_reg (regno, false))
6599           {
6600             if (TARGET_64BIT)
6601               emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6602             else
6603               emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6604           }
6605       if (frame_pointer_needed)
6606         {
6607           /* Leave results in shorter dependency chains on CPUs that are
6608              able to grok it fast.  */
6609           if (TARGET_USE_LEAVE)
6610             emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6611           else if (TARGET_64BIT)
6612             emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6613           else
6614             emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6615         }
6616     }
6617
6618   if (cfun->machine->force_align_arg_pointer)
6619     {
6620       emit_insn (gen_addsi3 (stack_pointer_rtx,
6621                              cfun->machine->force_align_arg_pointer,
6622                              GEN_INT (-4)));
6623     }
6624
6625   /* Sibcall epilogues don't want a return instruction.  */
6626   if (style == 0)
6627     return;
6628
6629   if (current_function_pops_args && current_function_args_size)
6630     {
6631       rtx popc = GEN_INT (current_function_pops_args);
6632
6633       /* i386 can only pop 64K bytes.  If asked to pop more, pop
6634          return address, do explicit add, and jump indirectly to the
6635          caller.  */
6636
6637       if (current_function_pops_args >= 65536)
6638         {
6639           rtx ecx = gen_rtx_REG (SImode, CX_REG);
6640
6641           /* There is no "pascal" calling convention in any 64bit ABI.  */
6642           gcc_assert (!TARGET_64BIT);
6643
6644           emit_insn (gen_popsi1 (ecx));
6645           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6646           emit_jump_insn (gen_return_indirect_internal (ecx));
6647         }
6648       else
6649         emit_jump_insn (gen_return_pop_internal (popc));
6650     }
6651   else
6652     emit_jump_insn (gen_return_internal ());
6653 }
6654
6655 /* Reset from the function's potential modifications.  */
6656
6657 static void
6658 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6659                                HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6660 {
6661   if (pic_offset_table_rtx)
6662     SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6663 #if TARGET_MACHO
6664   /* Mach-O doesn't support labels at the end of objects, so if
6665      it looks like we might want one, insert a NOP.  */
6666   {
6667     rtx insn = get_last_insn ();
6668     while (insn
6669            && NOTE_P (insn)
6670            && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6671       insn = PREV_INSN (insn);
6672     if (insn
6673         && (LABEL_P (insn)
6674             || (NOTE_P (insn)
6675                 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6676       fputs ("\tnop\n", file);
6677   }
6678 #endif
6679
6680 }
6681 \f
6682 /* Extract the parts of an RTL expression that is a valid memory address
6683    for an instruction.  Return 0 if the structure of the address is
6684    grossly off.  Return -1 if the address contains ASHIFT, so it is not
6685    strictly valid, but still used for computing length of lea instruction.  */
6686
6687 int
6688 ix86_decompose_address (rtx addr, struct ix86_address *out)
6689 {
6690   rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6691   rtx base_reg, index_reg;
6692   HOST_WIDE_INT scale = 1;
6693   rtx scale_rtx = NULL_RTX;
6694   int retval = 1;
6695   enum ix86_address_seg seg = SEG_DEFAULT;
6696
6697   if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6698     base = addr;
6699   else if (GET_CODE (addr) == PLUS)
6700     {
6701       rtx addends[4], op;
6702       int n = 0, i;
6703
6704       op = addr;
6705       do
6706         {
6707           if (n >= 4)
6708             return 0;
6709           addends[n++] = XEXP (op, 1);
6710           op = XEXP (op, 0);
6711         }
6712       while (GET_CODE (op) == PLUS);
6713       if (n >= 4)
6714         return 0;
6715       addends[n] = op;
6716
6717       for (i = n; i >= 0; --i)
6718         {
6719           op = addends[i];
6720           switch (GET_CODE (op))
6721             {
6722             case MULT:
6723               if (index)
6724                 return 0;
6725               index = XEXP (op, 0);
6726               scale_rtx = XEXP (op, 1);
6727               break;
6728
6729             case UNSPEC:
6730               if (XINT (op, 1) == UNSPEC_TP
6731                   && TARGET_TLS_DIRECT_SEG_REFS
6732                   && seg == SEG_DEFAULT)
6733                 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6734               else
6735                 return 0;
6736               break;
6737
6738             case REG:
6739             case SUBREG:
6740               if (!base)
6741                 base = op;
6742               else if (!index)
6743                 index = op;
6744               else
6745                 return 0;
6746               break;
6747
6748             case CONST:
6749             case CONST_INT:
6750             case SYMBOL_REF:
6751             case LABEL_REF:
6752               if (disp)
6753                 return 0;
6754               disp = op;
6755               break;
6756
6757             default:
6758               return 0;
6759             }
6760         }
6761     }
6762   else if (GET_CODE (addr) == MULT)
6763     {
6764       index = XEXP (addr, 0);           /* index*scale */
6765       scale_rtx = XEXP (addr, 1);
6766     }
6767   else if (GET_CODE (addr) == ASHIFT)
6768     {
6769       rtx tmp;
6770
6771       /* We're called for lea too, which implements ashift on occasion.  */
6772       index = XEXP (addr, 0);
6773       tmp = XEXP (addr, 1);
6774       if (!CONST_INT_P (tmp))
6775         return 0;
6776       scale = INTVAL (tmp);
6777       if ((unsigned HOST_WIDE_INT) scale > 3)
6778         return 0;
6779       scale = 1 << scale;
6780       retval = -1;
6781     }
6782   else
6783     disp = addr;                        /* displacement */
6784
6785   /* Extract the integral value of scale.  */
6786   if (scale_rtx)
6787     {
6788       if (!CONST_INT_P (scale_rtx))
6789         return 0;
6790       scale = INTVAL (scale_rtx);
6791     }
6792
6793   base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6794   index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6795
6796   /* Allow arg pointer and stack pointer as index if there is not scaling.  */
6797   if (base_reg && index_reg && scale == 1
6798       && (index_reg == arg_pointer_rtx
6799           || index_reg == frame_pointer_rtx
6800           || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6801     {
6802       rtx tmp;
6803       tmp = base, base = index, index = tmp;
6804       tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6805     }
6806
6807   /* Special case: %ebp cannot be encoded as a base without a displacement.  */
6808   if ((base_reg == hard_frame_pointer_rtx
6809        || base_reg == frame_pointer_rtx
6810        || base_reg == arg_pointer_rtx) && !disp)
6811     disp = const0_rtx;
6812
6813   /* Special case: on K6, [%esi] makes the instruction vector decoded.
6814      Avoid this by transforming to [%esi+0].  */
6815   if (TARGET_K6 && !optimize_size
6816       && base_reg && !index_reg && !disp
6817       && REG_P (base_reg)
6818       && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6819     disp = const0_rtx;
6820
6821   /* Special case: encode reg+reg instead of reg*2.  */
6822   if (!base && index && scale && scale == 2)
6823     base = index, base_reg = index_reg, scale = 1;
6824
6825   /* Special case: scaling cannot be encoded without base or displacement.  */
6826   if (!base && !disp && index && scale != 1)
6827     disp = const0_rtx;
6828
6829   out->base = base;
6830   out->index = index;
6831   out->disp = disp;
6832   out->scale = scale;
6833   out->seg = seg;
6834
6835   return retval;
6836 }
6837 \f
6838 /* Return cost of the memory address x.
6839    For i386, it is better to use a complex address than let gcc copy
6840    the address into a reg and make a new pseudo.  But not if the address
6841    requires to two regs - that would mean more pseudos with longer
6842    lifetimes.  */
6843 static int
6844 ix86_address_cost (rtx x)
6845 {
6846   struct ix86_address parts;
6847   int cost = 1;
6848   int ok = ix86_decompose_address (x, &parts);
6849
6850   gcc_assert (ok);
6851
6852   if (parts.base && GET_CODE (parts.base) == SUBREG)
6853     parts.base = SUBREG_REG (parts.base);
6854   if (parts.index && GET_CODE (parts.index) == SUBREG)
6855     parts.index = SUBREG_REG (parts.index);
6856
6857   /* Attempt to minimize number of registers in the address.  */
6858   if ((parts.base
6859        && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6860       || (parts.index
6861           && (!REG_P (parts.index)
6862               || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6863     cost++;
6864
6865   if (parts.base
6866       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6867       && parts.index
6868       && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6869       && parts.base != parts.index)
6870     cost++;
6871
6872   /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6873      since it's predecode logic can't detect the length of instructions
6874      and it degenerates to vector decoded.  Increase cost of such
6875      addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
6876      to split such addresses or even refuse such addresses at all.
6877
6878      Following addressing modes are affected:
6879       [base+scale*index]
6880       [scale*index+disp]
6881       [base+index]
6882
6883      The first and last case  may be avoidable by explicitly coding the zero in
6884      memory address, but I don't have AMD-K6 machine handy to check this
6885      theory.  */
6886
6887   if (TARGET_K6
6888       && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6889           || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6890           || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6891     cost += 10;
6892
6893   return cost;
6894 }
6895 \f
6896 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6897    this is used for to form addresses to local data when -fPIC is in
6898    use.  */
6899
6900 static bool
6901 darwin_local_data_pic (rtx disp)
6902 {
6903   if (GET_CODE (disp) == MINUS)
6904     {
6905       if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6906           || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6907         if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6908           {
6909             const char *sym_name = XSTR (XEXP (disp, 1), 0);
6910             if (! strcmp (sym_name, "<pic base>"))
6911               return true;
6912           }
6913     }
6914
6915   return false;
6916 }
6917
6918 /* Determine if a given RTX is a valid constant.  We already know this
6919    satisfies CONSTANT_P.  */
6920
6921 bool
6922 legitimate_constant_p (rtx x)
6923 {
6924   switch (GET_CODE (x))
6925     {
6926     case CONST:
6927       x = XEXP (x, 0);
6928
6929       if (GET_CODE (x) == PLUS)
6930         {
6931           if (!CONST_INT_P (XEXP (x, 1)))
6932             return false;
6933           x = XEXP (x, 0);
6934         }
6935
6936       if (TARGET_MACHO && darwin_local_data_pic (x))
6937         return true;
6938
6939       /* Only some unspecs are valid as "constants".  */
6940       if (GET_CODE (x) == UNSPEC)
6941         switch (XINT (x, 1))
6942           {
6943           case UNSPEC_GOT:
6944           case UNSPEC_GOTOFF:
6945           case UNSPEC_PLTOFF:
6946             return TARGET_64BIT;
6947           case UNSPEC_TPOFF:
6948           case UNSPEC_NTPOFF:
6949             x = XVECEXP (x, 0, 0);
6950             return (GET_CODE (x) == SYMBOL_REF
6951                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6952           case UNSPEC_DTPOFF:
6953             x = XVECEXP (x, 0, 0);
6954             return (GET_CODE (x) == SYMBOL_REF
6955                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6956           default:
6957             return false;
6958           }
6959
6960       /* We must have drilled down to a symbol.  */
6961       if (GET_CODE (x) == LABEL_REF)
6962         return true;
6963       if (GET_CODE (x) != SYMBOL_REF)
6964         return false;
6965       /* FALLTHRU */
6966
6967     case SYMBOL_REF:
6968       /* TLS symbols are never valid.  */
6969       if (SYMBOL_REF_TLS_MODEL (x))
6970         return false;
6971
6972       /* DLLIMPORT symbols are never valid.  */
6973       if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6974           && SYMBOL_REF_DLLIMPORT_P (x))
6975         return false;
6976       break;
6977
6978     case CONST_DOUBLE:
6979       if (GET_MODE (x) == TImode
6980           && x != CONST0_RTX (TImode)
6981           && !TARGET_64BIT)
6982         return false;
6983       break;
6984
6985     case CONST_VECTOR:
6986       if (x == CONST0_RTX (GET_MODE (x)))
6987         return true;
6988       return false;
6989
6990     default:
6991       break;
6992     }
6993
6994   /* Otherwise we handle everything else in the move patterns.  */
6995   return true;
6996 }
6997
6998 /* Determine if it's legal to put X into the constant pool.  This
6999    is not possible for the address of thread-local symbols, which
7000    is checked above.  */
7001
7002 static bool
7003 ix86_cannot_force_const_mem (rtx x)
7004 {
7005   /* We can always put integral constants and vectors in memory.  */
7006   switch (GET_CODE (x))
7007     {
7008     case CONST_INT:
7009     case CONST_DOUBLE:
7010     case CONST_VECTOR:
7011       return false;
7012
7013     default:
7014       break;
7015     }
7016   return !legitimate_constant_p (x);
7017 }
7018
7019 /* Determine if a given RTX is a valid constant address.  */
7020
7021 bool
7022 constant_address_p (rtx x)
7023 {
7024   return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
7025 }
7026
7027 /* Nonzero if the constant value X is a legitimate general operand
7028    when generating PIC code.  It is given that flag_pic is on and
7029    that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
7030
7031 bool
7032 legitimate_pic_operand_p (rtx x)
7033 {
7034   rtx inner;
7035
7036   switch (GET_CODE (x))
7037     {
7038     case CONST:
7039       inner = XEXP (x, 0);
7040       if (GET_CODE (inner) == PLUS
7041           && CONST_INT_P (XEXP (inner, 1)))
7042         inner = XEXP (inner, 0);
7043
7044       /* Only some unspecs are valid as "constants".  */
7045       if (GET_CODE (inner) == UNSPEC)
7046         switch (XINT (inner, 1))
7047           {
7048           case UNSPEC_GOT:
7049           case UNSPEC_GOTOFF:
7050           case UNSPEC_PLTOFF:
7051             return TARGET_64BIT;
7052           case UNSPEC_TPOFF:
7053             x = XVECEXP (inner, 0, 0);
7054             return (GET_CODE (x) == SYMBOL_REF
7055                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7056           default:
7057             return false;
7058           }
7059       /* FALLTHRU */
7060
7061     case SYMBOL_REF:
7062     case LABEL_REF:
7063       return legitimate_pic_address_disp_p (x);
7064
7065     default:
7066       return true;
7067     }
7068 }
7069
7070 /* Determine if a given CONST RTX is a valid memory displacement
7071    in PIC mode.  */
7072
7073 int
7074 legitimate_pic_address_disp_p (rtx disp)
7075 {
7076   bool saw_plus;
7077
7078   /* In 64bit mode we can allow direct addresses of symbols and labels
7079      when they are not dynamic symbols.  */
7080   if (TARGET_64BIT)
7081     {
7082       rtx op0 = disp, op1;
7083
7084       switch (GET_CODE (disp))
7085         {
7086         case LABEL_REF:
7087           return true;
7088
7089         case CONST:
7090           if (GET_CODE (XEXP (disp, 0)) != PLUS)
7091             break;
7092           op0 = XEXP (XEXP (disp, 0), 0);
7093           op1 = XEXP (XEXP (disp, 0), 1);
7094           if (!CONST_INT_P (op1)
7095               || INTVAL (op1) >= 16*1024*1024
7096               || INTVAL (op1) < -16*1024*1024)
7097             break;
7098           if (GET_CODE (op0) == LABEL_REF)
7099             return true;
7100           if (GET_CODE (op0) != SYMBOL_REF)
7101             break;
7102           /* FALLTHRU */
7103
7104         case SYMBOL_REF:
7105           /* TLS references should always be enclosed in UNSPEC.  */
7106           if (SYMBOL_REF_TLS_MODEL (op0))
7107             return false;
7108           if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7109               && ix86_cmodel != CM_LARGE_PIC)
7110             return true;
7111           break;
7112
7113         default:
7114           break;
7115         }
7116     }
7117   if (GET_CODE (disp) != CONST)
7118     return 0;
7119   disp = XEXP (disp, 0);
7120
7121   if (TARGET_64BIT)
7122     {
7123       /* We are unsafe to allow PLUS expressions.  This limit allowed distance
7124          of GOT tables.  We should not need these anyway.  */
7125       if (GET_CODE (disp) != UNSPEC
7126           || (XINT (disp, 1) != UNSPEC_GOTPCREL
7127               && XINT (disp, 1) != UNSPEC_GOTOFF
7128               && XINT (disp, 1) != UNSPEC_PLTOFF))
7129         return 0;
7130
7131       if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7132           && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7133         return 0;
7134       return 1;
7135     }
7136
7137   saw_plus = false;
7138   if (GET_CODE (disp) == PLUS)
7139     {
7140       if (!CONST_INT_P (XEXP (disp, 1)))
7141         return 0;
7142       disp = XEXP (disp, 0);
7143       saw_plus = true;
7144     }
7145
7146   if (TARGET_MACHO && darwin_local_data_pic (disp))
7147     return 1;
7148
7149   if (GET_CODE (disp) != UNSPEC)
7150     return 0;
7151
7152   switch (XINT (disp, 1))
7153     {
7154     case UNSPEC_GOT:
7155       if (saw_plus)
7156         return false;
7157       /* We need to check for both symbols and labels because VxWorks loads
7158          text labels with @GOT rather than @GOTOFF.  See gotoff_operand for
7159          details.  */
7160       return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7161               || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
7162     case UNSPEC_GOTOFF:
7163       /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7164          While ABI specify also 32bit relocation but we don't produce it in
7165          small PIC model at all.  */
7166       if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7167            || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7168           && !TARGET_64BIT)
7169         return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
7170       return false;
7171     case UNSPEC_GOTTPOFF:
7172     case UNSPEC_GOTNTPOFF:
7173     case UNSPEC_INDNTPOFF:
7174       if (saw_plus)
7175         return false;
7176       disp = XVECEXP (disp, 0, 0);
7177       return (GET_CODE (disp) == SYMBOL_REF
7178               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
7179     case UNSPEC_NTPOFF:
7180       disp = XVECEXP (disp, 0, 0);
7181       return (GET_CODE (disp) == SYMBOL_REF
7182               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
7183     case UNSPEC_DTPOFF:
7184       disp = XVECEXP (disp, 0, 0);
7185       return (GET_CODE (disp) == SYMBOL_REF
7186               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
7187     }
7188
7189   return 0;
7190 }
7191
7192 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7193    memory address for an instruction.  The MODE argument is the machine mode
7194    for the MEM expression that wants to use this address.
7195
7196    It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
7197    convert common non-canonical forms to canonical form so that they will
7198    be recognized.  */
7199
7200 int
7201 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7202                       rtx addr, int strict)
7203 {
7204   struct ix86_address parts;
7205   rtx base, index, disp;
7206   HOST_WIDE_INT scale;
7207   const char *reason = NULL;
7208   rtx reason_rtx = NULL_RTX;
7209
7210   if (ix86_decompose_address (addr, &parts) <= 0)
7211     {
7212       reason = "decomposition failed";
7213       goto report_error;
7214     }
7215
7216   base = parts.base;
7217   index = parts.index;
7218   disp = parts.disp;
7219   scale = parts.scale;
7220
7221   /* Validate base register.
7222
7223      Don't allow SUBREG's that span more than a word here.  It can lead to spill
7224      failures when the base is one word out of a two word structure, which is
7225      represented internally as a DImode int.  */
7226
7227   if (base)
7228     {
7229       rtx reg;
7230       reason_rtx = base;
7231
7232       if (REG_P (base))
7233         reg = base;
7234       else if (GET_CODE (base) == SUBREG
7235                && REG_P (SUBREG_REG (base))
7236                && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7237                   <= UNITS_PER_WORD)
7238         reg = SUBREG_REG (base);
7239       else
7240         {
7241           reason = "base is not a register";
7242           goto report_error;
7243         }
7244
7245       if (GET_MODE (base) != Pmode)
7246         {
7247           reason = "base is not in Pmode";
7248           goto report_error;
7249         }
7250
7251       if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7252           || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7253         {
7254           reason = "base is not valid";
7255           goto report_error;
7256         }
7257     }
7258
7259   /* Validate index register.
7260
7261      Don't allow SUBREG's that span more than a word here -- same as above.  */
7262
7263   if (index)
7264     {
7265       rtx reg;
7266       reason_rtx = index;
7267
7268       if (REG_P (index))
7269         reg = index;
7270       else if (GET_CODE (index) == SUBREG
7271                && REG_P (SUBREG_REG (index))
7272                && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7273                   <= UNITS_PER_WORD)
7274         reg = SUBREG_REG (index);
7275       else
7276         {
7277           reason = "index is not a register";
7278           goto report_error;
7279         }
7280
7281       if (GET_MODE (index) != Pmode)
7282         {
7283           reason = "index is not in Pmode";
7284           goto report_error;
7285         }
7286
7287       if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7288           || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7289         {
7290           reason = "index is not valid";
7291           goto report_error;
7292         }
7293     }
7294
7295   /* Validate scale factor.  */
7296   if (scale != 1)
7297     {
7298       reason_rtx = GEN_INT (scale);
7299       if (!index)
7300         {
7301           reason = "scale without index";
7302           goto report_error;
7303         }
7304
7305       if (scale != 2 && scale != 4 && scale != 8)
7306         {
7307           reason = "scale is not a valid multiplier";
7308           goto report_error;
7309         }
7310     }
7311
7312   /* Validate displacement.  */
7313   if (disp)
7314     {
7315       reason_rtx = disp;
7316
7317       if (GET_CODE (disp) == CONST
7318           && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7319         switch (XINT (XEXP (disp, 0), 1))
7320           {
7321           /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7322              used.  While ABI specify also 32bit relocations, we don't produce
7323              them at all and use IP relative instead.  */
7324           case UNSPEC_GOT:
7325           case UNSPEC_GOTOFF:
7326             gcc_assert (flag_pic);
7327             if (!TARGET_64BIT)
7328               goto is_legitimate_pic;
7329             reason = "64bit address unspec";
7330             goto report_error;
7331
7332           case UNSPEC_GOTPCREL:
7333             gcc_assert (flag_pic);
7334             goto is_legitimate_pic;
7335
7336           case UNSPEC_GOTTPOFF:
7337           case UNSPEC_GOTNTPOFF:
7338           case UNSPEC_INDNTPOFF:
7339           case UNSPEC_NTPOFF:
7340           case UNSPEC_DTPOFF:
7341             break;
7342
7343           default:
7344             reason = "invalid address unspec";
7345             goto report_error;
7346           }
7347
7348       else if (SYMBOLIC_CONST (disp)
7349                && (flag_pic
7350                    || (TARGET_MACHO
7351 #if TARGET_MACHO
7352                        && MACHOPIC_INDIRECT
7353                        && !machopic_operand_p (disp)
7354 #endif
7355                )))
7356         {
7357
7358         is_legitimate_pic:
7359           if (TARGET_64BIT && (index || base))
7360             {
7361               /* foo@dtpoff(%rX) is ok.  */
7362               if (GET_CODE (disp) != CONST
7363                   || GET_CODE (XEXP (disp, 0)) != PLUS
7364                   || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7365                   || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7366                   || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7367                       && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7368                 {
7369                   reason = "non-constant pic memory reference";
7370                   goto report_error;
7371                 }
7372             }
7373           else if (! legitimate_pic_address_disp_p (disp))
7374             {
7375               reason = "displacement is an invalid pic construct";
7376               goto report_error;
7377             }
7378
7379           /* This code used to verify that a symbolic pic displacement
7380              includes the pic_offset_table_rtx register.
7381
7382              While this is good idea, unfortunately these constructs may
7383              be created by "adds using lea" optimization for incorrect
7384              code like:
7385
7386              int a;
7387              int foo(int i)
7388                {
7389                  return *(&a+i);
7390                }
7391
7392              This code is nonsensical, but results in addressing
7393              GOT table with pic_offset_table_rtx base.  We can't
7394              just refuse it easily, since it gets matched by
7395              "addsi3" pattern, that later gets split to lea in the
7396              case output register differs from input.  While this
7397              can be handled by separate addsi pattern for this case
7398              that never results in lea, this seems to be easier and
7399              correct fix for crash to disable this test.  */
7400         }
7401       else if (GET_CODE (disp) != LABEL_REF
7402                && !CONST_INT_P (disp)
7403                && (GET_CODE (disp) != CONST
7404                    || !legitimate_constant_p (disp))
7405                && (GET_CODE (disp) != SYMBOL_REF
7406                    || !legitimate_constant_p (disp)))
7407         {
7408           reason = "displacement is not constant";
7409           goto report_error;
7410         }
7411       else if (TARGET_64BIT
7412                && !x86_64_immediate_operand (disp, VOIDmode))
7413         {
7414           reason = "displacement is out of range";
7415           goto report_error;
7416         }
7417     }
7418
7419   /* Everything looks valid.  */
7420   return TRUE;
7421
7422  report_error:
7423   return FALSE;
7424 }
7425 \f
7426 /* Return a unique alias set for the GOT.  */
7427
7428 static alias_set_type
7429 ix86_GOT_alias_set (void)
7430 {
7431   static alias_set_type set = -1;
7432   if (set == -1)
7433     set = new_alias_set ();
7434   return set;
7435 }
7436
7437 /* Return a legitimate reference for ORIG (an address) using the
7438    register REG.  If REG is 0, a new pseudo is generated.
7439
7440    There are two types of references that must be handled:
7441
7442    1. Global data references must load the address from the GOT, via
7443       the PIC reg.  An insn is emitted to do this load, and the reg is
7444       returned.
7445
7446    2. Static data references, constant pool addresses, and code labels
7447       compute the address as an offset from the GOT, whose base is in
7448       the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
7449       differentiate them from global data objects.  The returned
7450       address is the PIC reg + an unspec constant.
7451
7452    GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7453    reg also appears in the address.  */
7454
7455 static rtx
7456 legitimize_pic_address (rtx orig, rtx reg)
7457 {
7458   rtx addr = orig;
7459   rtx new_rtx = orig;
7460   rtx base;
7461
7462 #if TARGET_MACHO
7463   if (TARGET_MACHO && !TARGET_64BIT)
7464     {
7465       if (reg == 0)
7466         reg = gen_reg_rtx (Pmode);
7467       /* Use the generic Mach-O PIC machinery.  */
7468       return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7469     }
7470 #endif
7471
7472   if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7473     new_rtx = addr;
7474   else if (TARGET_64BIT
7475            && ix86_cmodel != CM_SMALL_PIC
7476            && gotoff_operand (addr, Pmode))
7477     {
7478       rtx tmpreg;
7479       /* This symbol may be referenced via a displacement from the PIC
7480          base address (@GOTOFF).  */
7481
7482       if (reload_in_progress)
7483         df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7484       if (GET_CODE (addr) == CONST)
7485         addr = XEXP (addr, 0);
7486       if (GET_CODE (addr) == PLUS)
7487           {
7488             new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7489                                       UNSPEC_GOTOFF);
7490             new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7491           }
7492         else
7493           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7494       new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7495       if (!reg)
7496         tmpreg = gen_reg_rtx (Pmode);
7497       else
7498         tmpreg = reg;
7499       emit_move_insn (tmpreg, new_rtx);
7500
7501       if (reg != 0)
7502         {
7503           new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7504                                          tmpreg, 1, OPTAB_DIRECT);
7505           new_rtx = reg;
7506         }
7507       else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7508     }
7509   else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7510     {
7511       /* This symbol may be referenced via a displacement from the PIC
7512          base address (@GOTOFF).  */
7513
7514       if (reload_in_progress)
7515         df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7516       if (GET_CODE (addr) == CONST)
7517         addr = XEXP (addr, 0);
7518       if (GET_CODE (addr) == PLUS)
7519           {
7520             new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7521                                       UNSPEC_GOTOFF);
7522             new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7523           }
7524         else
7525           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7526       new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7527       new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7528
7529       if (reg != 0)
7530         {
7531           emit_move_insn (reg, new_rtx);
7532           new_rtx = reg;
7533         }
7534     }
7535   else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7536            /* We can't use @GOTOFF for text labels on VxWorks;
7537               see gotoff_operand.  */
7538            || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7539     {
7540       /* Given that we've already handled dllimport variables separately
7541          in legitimize_address, and all other variables should satisfy
7542          legitimate_pic_address_disp_p, we should never arrive here.  */
7543       gcc_assert (!TARGET_64BIT_MS_ABI);
7544
7545       if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7546         {
7547           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7548           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7549           new_rtx = gen_const_mem (Pmode, new_rtx);
7550           set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7551
7552           if (reg == 0)
7553             reg = gen_reg_rtx (Pmode);
7554           /* Use directly gen_movsi, otherwise the address is loaded
7555              into register for CSE.  We don't want to CSE this addresses,
7556              instead we CSE addresses from the GOT table, so skip this.  */
7557           emit_insn (gen_movsi (reg, new_rtx));
7558           new_rtx = reg;
7559         }
7560       else
7561         {
7562           /* This symbol must be referenced via a load from the
7563              Global Offset Table (@GOT).  */
7564
7565           if (reload_in_progress)
7566             df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7567           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7568           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7569           if (TARGET_64BIT)
7570             new_rtx = force_reg (Pmode, new_rtx);
7571           new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7572           new_rtx = gen_const_mem (Pmode, new_rtx);
7573           set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7574
7575           if (reg == 0)
7576             reg = gen_reg_rtx (Pmode);
7577           emit_move_insn (reg, new_rtx);
7578           new_rtx = reg;
7579         }
7580     }
7581   else
7582     {
7583       if (CONST_INT_P (addr)
7584           && !x86_64_immediate_operand (addr, VOIDmode))
7585         {
7586           if (reg)
7587             {
7588               emit_move_insn (reg, addr);
7589               new_rtx = reg;
7590             }
7591           else
7592             new_rtx = force_reg (Pmode, addr);
7593         }
7594       else if (GET_CODE (addr) == CONST)
7595         {
7596           addr = XEXP (addr, 0);
7597
7598           /* We must match stuff we generate before.  Assume the only
7599              unspecs that can get here are ours.  Not that we could do
7600              anything with them anyway....  */
7601           if (GET_CODE (addr) == UNSPEC
7602               || (GET_CODE (addr) == PLUS
7603                   && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7604             return orig;
7605           gcc_assert (GET_CODE (addr) == PLUS);
7606         }
7607       if (GET_CODE (addr) == PLUS)
7608         {
7609           rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7610
7611           /* Check first to see if this is a constant offset from a @GOTOFF
7612              symbol reference.  */
7613           if (gotoff_operand (op0, Pmode)
7614               && CONST_INT_P (op1))
7615             {
7616               if (!TARGET_64BIT)
7617                 {
7618                   if (reload_in_progress)
7619                     df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7620                   new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7621                                             UNSPEC_GOTOFF);
7622                   new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7623                   new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7624                   new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7625
7626                   if (reg != 0)
7627                     {
7628                       emit_move_insn (reg, new_rtx);
7629                       new_rtx = reg;
7630                     }
7631                 }
7632               else
7633                 {
7634                   if (INTVAL (op1) < -16*1024*1024
7635                       || INTVAL (op1) >= 16*1024*1024)
7636                     {
7637                       if (!x86_64_immediate_operand (op1, Pmode))
7638                         op1 = force_reg (Pmode, op1);
7639                       new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7640                     }
7641                 }
7642             }
7643           else
7644             {
7645               base = legitimize_pic_address (XEXP (addr, 0), reg);
7646               new_rtx  = legitimize_pic_address (XEXP (addr, 1),
7647                                                  base == reg ? NULL_RTX : reg);
7648
7649               if (CONST_INT_P (new_rtx))
7650                 new_rtx = plus_constant (base, INTVAL (new_rtx));
7651               else
7652                 {
7653                   if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7654                     {
7655                       base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7656                       new_rtx = XEXP (new_rtx, 1);
7657                     }
7658                   new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7659                 }
7660             }
7661         }
7662     }
7663   return new_rtx;
7664 }
7665 \f
7666 /* Load the thread pointer.  If TO_REG is true, force it into a register.  */
7667
7668 static rtx
7669 get_thread_pointer (int to_reg)
7670 {
7671   rtx tp, reg, insn;
7672
7673   tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7674   if (!to_reg)
7675     return tp;
7676
7677   reg = gen_reg_rtx (Pmode);
7678   insn = gen_rtx_SET (VOIDmode, reg, tp);
7679   insn = emit_insn (insn);
7680
7681   return reg;
7682 }
7683
7684 /* A subroutine of legitimize_address and ix86_expand_move.  FOR_MOV is
7685    false if we expect this to be used for a memory address and true if
7686    we expect to load the address into a register.  */
7687
7688 static rtx
7689 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7690 {
7691   rtx dest, base, off, pic, tp;
7692   int type;
7693
7694   switch (model)
7695     {
7696     case TLS_MODEL_GLOBAL_DYNAMIC:
7697       dest = gen_reg_rtx (Pmode);
7698       tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7699
7700       if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7701         {
7702           rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
7703
7704           start_sequence ();
7705           emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7706           insns = get_insns ();
7707           end_sequence ();
7708
7709           CONST_OR_PURE_CALL_P (insns) = 1;
7710           emit_libcall_block (insns, dest, rax, x);
7711         }
7712       else if (TARGET_64BIT && TARGET_GNU2_TLS)
7713         emit_insn (gen_tls_global_dynamic_64 (dest, x));
7714       else
7715         emit_insn (gen_tls_global_dynamic_32 (dest, x));
7716
7717       if (TARGET_GNU2_TLS)
7718         {
7719           dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7720
7721           set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7722         }
7723       break;
7724
7725     case TLS_MODEL_LOCAL_DYNAMIC:
7726       base = gen_reg_rtx (Pmode);
7727       tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7728
7729       if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7730         {
7731           rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
7732
7733           start_sequence ();
7734           emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7735           insns = get_insns ();
7736           end_sequence ();
7737
7738           note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7739           note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7740           CONST_OR_PURE_CALL_P (insns) = 1;
7741           emit_libcall_block (insns, base, rax, note);
7742         }
7743       else if (TARGET_64BIT && TARGET_GNU2_TLS)
7744         emit_insn (gen_tls_local_dynamic_base_64 (base));
7745       else
7746         emit_insn (gen_tls_local_dynamic_base_32 (base));
7747
7748       if (TARGET_GNU2_TLS)
7749         {
7750           rtx x = ix86_tls_module_base ();
7751
7752           set_unique_reg_note (get_last_insn (), REG_EQUIV,
7753                                gen_rtx_MINUS (Pmode, x, tp));
7754         }
7755
7756       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7757       off = gen_rtx_CONST (Pmode, off);
7758
7759       dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7760
7761       if (TARGET_GNU2_TLS)
7762         {
7763           dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7764
7765           set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7766         }
7767
7768       break;
7769
7770     case TLS_MODEL_INITIAL_EXEC:
7771       if (TARGET_64BIT)
7772         {
7773           pic = NULL;
7774           type = UNSPEC_GOTNTPOFF;
7775         }
7776       else if (flag_pic)
7777         {
7778           if (reload_in_progress)
7779             df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7780           pic = pic_offset_table_rtx;
7781           type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7782         }
7783       else if (!TARGET_ANY_GNU_TLS)
7784         {
7785           pic = gen_reg_rtx (Pmode);
7786           emit_insn (gen_set_got (pic));
7787           type = UNSPEC_GOTTPOFF;
7788         }
7789       else
7790         {
7791           pic = NULL;
7792           type = UNSPEC_INDNTPOFF;
7793         }
7794
7795       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7796       off = gen_rtx_CONST (Pmode, off);
7797       if (pic)
7798         off = gen_rtx_PLUS (Pmode, pic, off);
7799       off = gen_const_mem (Pmode, off);
7800       set_mem_alias_set (off, ix86_GOT_alias_set ());
7801
7802       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7803         {
7804           base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7805           off = force_reg (Pmode, off);
7806           return gen_rtx_PLUS (Pmode, base, off);
7807         }
7808       else
7809         {
7810           base = get_thread_pointer (true);
7811           dest = gen_reg_rtx (Pmode);
7812           emit_insn (gen_subsi3 (dest, base, off));
7813         }
7814       break;
7815
7816     case TLS_MODEL_LOCAL_EXEC:
7817       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7818                             (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7819                             ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7820       off = gen_rtx_CONST (Pmode, off);
7821
7822       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7823         {
7824           base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7825           return gen_rtx_PLUS (Pmode, base, off);
7826         }
7827       else
7828         {
7829           base = get_thread_pointer (true);
7830           dest = gen_reg_rtx (Pmode);
7831           emit_insn (gen_subsi3 (dest, base, off));
7832         }
7833       break;
7834
7835     default:
7836       gcc_unreachable ();
7837     }
7838
7839   return dest;
7840 }
7841
7842 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7843    to symbol DECL.  */
7844
7845 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7846   htab_t dllimport_map;
7847
7848 static tree
7849 get_dllimport_decl (tree decl)
7850 {
7851   struct tree_map *h, in;
7852   void **loc;
7853   const char *name;
7854   const char *prefix;
7855   size_t namelen, prefixlen;
7856   char *imp_name;
7857   tree to;
7858   rtx rtl;
7859
7860   if (!dllimport_map)
7861     dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7862
7863   in.hash = htab_hash_pointer (decl);
7864   in.base.from = decl;
7865   loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7866   h = (struct tree_map *) *loc;
7867   if (h)
7868     return h->to;
7869
7870   *loc = h = GGC_NEW (struct tree_map);
7871   h->hash = in.hash;
7872   h->base.from = decl;
7873   h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7874   DECL_ARTIFICIAL (to) = 1;
7875   DECL_IGNORED_P (to) = 1;
7876   DECL_EXTERNAL (to) = 1;
7877   TREE_READONLY (to) = 1;
7878
7879   name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7880   name = targetm.strip_name_encoding (name);
7881   prefix = name[0] == FASTCALL_PREFIX  ?  "*__imp_": "*__imp__";
7882   namelen = strlen (name);
7883   prefixlen = strlen (prefix);
7884   imp_name = (char *) alloca (namelen + prefixlen + 1);
7885   memcpy (imp_name, prefix, prefixlen);
7886   memcpy (imp_name + prefixlen, name, namelen + 1);
7887
7888   name = ggc_alloc_string (imp_name, namelen + prefixlen);
7889   rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7890   SET_SYMBOL_REF_DECL (rtl, to);
7891   SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7892
7893   rtl = gen_const_mem (Pmode, rtl);
7894   set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7895
7896   SET_DECL_RTL (to, rtl);
7897   SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
7898
7899   return to;
7900 }
7901
7902 /* Expand SYMBOL into its corresponding dllimport symbol.  WANT_REG is
7903    true if we require the result be a register.  */
7904
7905 static rtx
7906 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7907 {
7908   tree imp_decl;
7909   rtx x;
7910
7911   gcc_assert (SYMBOL_REF_DECL (symbol));
7912   imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7913
7914   x = DECL_RTL (imp_decl);
7915   if (want_reg)
7916     x = force_reg (Pmode, x);
7917   return x;
7918 }
7919
7920 /* Try machine-dependent ways of modifying an illegitimate address
7921    to be legitimate.  If we find one, return the new, valid address.
7922    This macro is used in only one place: `memory_address' in explow.c.
7923
7924    OLDX is the address as it was before break_out_memory_refs was called.
7925    In some cases it is useful to look at this to decide what needs to be done.
7926
7927    MODE and WIN are passed so that this macro can use
7928    GO_IF_LEGITIMATE_ADDRESS.
7929
7930    It is always safe for this macro to do nothing.  It exists to recognize
7931    opportunities to optimize the output.
7932
7933    For the 80386, we handle X+REG by loading X into a register R and
7934    using R+REG.  R will go in a general reg and indexing will be used.
7935    However, if REG is a broken-out memory address or multiplication,
7936    nothing needs to be done because REG can certainly go in a general reg.
7937
7938    When -fpic is used, special handling is needed for symbolic references.
7939    See comments by legitimize_pic_address in i386.c for details.  */
7940
7941 rtx
7942 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7943 {
7944   int changed = 0;
7945   unsigned log;
7946
7947   log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7948   if (log)
7949     return legitimize_tls_address (x, (enum tls_model) log, false);
7950   if (GET_CODE (x) == CONST
7951       && GET_CODE (XEXP (x, 0)) == PLUS
7952       && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7953       && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7954     {
7955       rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
7956                                       (enum tls_model) log, false);
7957       return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7958     }
7959
7960   if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7961     {
7962       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7963         return legitimize_dllimport_symbol (x, true);
7964       if (GET_CODE (x) == CONST
7965           && GET_CODE (XEXP (x, 0)) == PLUS
7966           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7967           && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7968         {
7969           rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7970           return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7971         }
7972     }
7973
7974   if (flag_pic && SYMBOLIC_CONST (x))
7975     return legitimize_pic_address (x, 0);
7976
7977   /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7978   if (GET_CODE (x) == ASHIFT
7979       && CONST_INT_P (XEXP (x, 1))
7980       && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7981     {
7982       changed = 1;
7983       log = INTVAL (XEXP (x, 1));
7984       x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7985                         GEN_INT (1 << log));
7986     }
7987
7988   if (GET_CODE (x) == PLUS)
7989     {
7990       /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
7991
7992       if (GET_CODE (XEXP (x, 0)) == ASHIFT
7993           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7994           && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7995         {
7996           changed = 1;
7997           log = INTVAL (XEXP (XEXP (x, 0), 1));
7998           XEXP (x, 0) = gen_rtx_MULT (Pmode,
7999                                       force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
8000                                       GEN_INT (1 << log));
8001         }
8002
8003       if (GET_CODE (XEXP (x, 1)) == ASHIFT
8004           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
8005           && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
8006         {
8007           changed = 1;
8008           log = INTVAL (XEXP (XEXP (x, 1), 1));
8009           XEXP (x, 1) = gen_rtx_MULT (Pmode,
8010                                       force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
8011                                       GEN_INT (1 << log));
8012         }
8013
8014       /* Put multiply first if it isn't already.  */
8015       if (GET_CODE (XEXP (x, 1)) == MULT)
8016         {
8017           rtx tmp = XEXP (x, 0);
8018           XEXP (x, 0) = XEXP (x, 1);
8019           XEXP (x, 1) = tmp;
8020           changed = 1;
8021         }
8022
8023       /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
8024          into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
8025          created by virtual register instantiation, register elimination, and
8026          similar optimizations.  */
8027       if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
8028         {
8029           changed = 1;
8030           x = gen_rtx_PLUS (Pmode,
8031                             gen_rtx_PLUS (Pmode, XEXP (x, 0),
8032                                           XEXP (XEXP (x, 1), 0)),
8033                             XEXP (XEXP (x, 1), 1));
8034         }
8035
8036       /* Canonicalize
8037          (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
8038          into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
8039       else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8040                && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8041                && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
8042                && CONSTANT_P (XEXP (x, 1)))
8043         {
8044           rtx constant;
8045           rtx other = NULL_RTX;
8046
8047           if (CONST_INT_P (XEXP (x, 1)))
8048             {
8049               constant = XEXP (x, 1);
8050               other = XEXP (XEXP (XEXP (x, 0), 1), 1);
8051             }
8052           else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
8053             {
8054               constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
8055               other = XEXP (x, 1);
8056             }
8057           else
8058             constant = 0;
8059
8060           if (constant)
8061             {
8062               changed = 1;
8063               x = gen_rtx_PLUS (Pmode,
8064                                 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8065                                               XEXP (XEXP (XEXP (x, 0), 1), 0)),
8066                                 plus_constant (other, INTVAL (constant)));
8067             }
8068         }
8069
8070       if (changed && legitimate_address_p (mode, x, FALSE))
8071         return x;
8072
8073       if (GET_CODE (XEXP (x, 0)) == MULT)
8074         {
8075           changed = 1;
8076           XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8077         }
8078
8079       if (GET_CODE (XEXP (x, 1)) == MULT)
8080         {
8081           changed = 1;
8082           XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8083         }
8084
8085       if (changed
8086           && REG_P (XEXP (x, 1))
8087           && REG_P (XEXP (x, 0)))
8088         return x;
8089
8090       if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8091         {
8092           changed = 1;
8093           x = legitimize_pic_address (x, 0);
8094         }
8095
8096       if (changed && legitimate_address_p (mode, x, FALSE))
8097         return x;
8098
8099       if (REG_P (XEXP (x, 0)))
8100         {
8101           rtx temp = gen_reg_rtx (Pmode);
8102           rtx val  = force_operand (XEXP (x, 1), temp);
8103           if (val != temp)
8104             emit_move_insn (temp, val);
8105
8106           XEXP (x, 1) = temp;
8107           return x;
8108         }
8109
8110       else if (REG_P (XEXP (x, 1)))
8111         {
8112           rtx temp = gen_reg_rtx (Pmode);
8113           rtx val  = force_operand (XEXP (x, 0), temp);
8114           if (val != temp)
8115             emit_move_insn (temp, val);
8116
8117           XEXP (x, 0) = temp;
8118           return x;
8119         }
8120     }
8121
8122   return x;
8123 }
8124 \f
8125 /* Print an integer constant expression in assembler syntax.  Addition
8126    and subtraction are the only arithmetic that may appear in these
8127    expressions.  FILE is the stdio stream to write to, X is the rtx, and
8128    CODE is the operand print code from the output string.  */
8129
8130 static void
8131 output_pic_addr_const (FILE *file, rtx x, int code)
8132 {
8133   char buf[256];
8134
8135   switch (GET_CODE (x))
8136     {
8137     case PC:
8138       gcc_assert (flag_pic);
8139       putc ('.', file);
8140       break;
8141
8142     case SYMBOL_REF:
8143       if (! TARGET_MACHO || TARGET_64BIT)
8144         output_addr_const (file, x);
8145       else
8146         {
8147           const char *name = XSTR (x, 0);
8148
8149           /* Mark the decl as referenced so that cgraph will
8150              output the function.  */
8151           if (SYMBOL_REF_DECL (x))
8152             mark_decl_referenced (SYMBOL_REF_DECL (x));
8153
8154 #if TARGET_MACHO
8155           if (MACHOPIC_INDIRECT
8156               && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
8157             name = machopic_indirection_name (x, /*stub_p=*/true);
8158 #endif
8159           assemble_name (file, name);
8160         }
8161       if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
8162           && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
8163         fputs ("@PLT", file);
8164       break;
8165
8166     case LABEL_REF:
8167       x = XEXP (x, 0);
8168       /* FALLTHRU */
8169     case CODE_LABEL:
8170       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8171       assemble_name (asm_out_file, buf);
8172       break;
8173
8174     case CONST_INT:
8175       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8176       break;
8177
8178     case CONST:
8179       /* This used to output parentheses around the expression,
8180          but that does not work on the 386 (either ATT or BSD assembler).  */
8181       output_pic_addr_const (file, XEXP (x, 0), code);
8182       break;
8183
8184     case CONST_DOUBLE:
8185       if (GET_MODE (x) == VOIDmode)
8186         {
8187           /* We can use %d if the number is <32 bits and positive.  */
8188           if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
8189             fprintf (file, "0x%lx%08lx",
8190                      (unsigned long) CONST_DOUBLE_HIGH (x),
8191                      (unsigned long) CONST_DOUBLE_LOW (x));
8192           else
8193             fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
8194         }
8195       else
8196         /* We can't handle floating point constants;
8197            PRINT_OPERAND must handle them.  */
8198         output_operand_lossage ("floating constant misused");
8199       break;
8200
8201     case PLUS:
8202       /* Some assemblers need integer constants to appear first.  */
8203       if (CONST_INT_P (XEXP (x, 0)))
8204         {
8205           output_pic_addr_const (file, XEXP (x, 0), code);
8206           putc ('+', file);
8207           output_pic_addr_const (file, XEXP (x, 1), code);
8208         }
8209       else
8210         {
8211           gcc_assert (CONST_INT_P (XEXP (x, 1)));
8212           output_pic_addr_const (file, XEXP (x, 1), code);
8213           putc ('+', file);
8214           output_pic_addr_const (file, XEXP (x, 0), code);
8215         }
8216       break;
8217
8218     case MINUS:
8219       if (!TARGET_MACHO)
8220         putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
8221       output_pic_addr_const (file, XEXP (x, 0), code);
8222       putc ('-', file);
8223       output_pic_addr_const (file, XEXP (x, 1), code);
8224       if (!TARGET_MACHO)
8225         putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8226       break;
8227
8228      case UNSPEC:
8229        gcc_assert (XVECLEN (x, 0) == 1);
8230        output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8231        switch (XINT (x, 1))
8232         {
8233         case UNSPEC_GOT:
8234           fputs ("@GOT", file);
8235           break;
8236         case UNSPEC_GOTOFF:
8237           fputs ("@GOTOFF", file);
8238           break;
8239         case UNSPEC_PLTOFF:
8240           fputs ("@PLTOFF", file);
8241           break;
8242         case UNSPEC_GOTPCREL:
8243           fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8244                  "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
8245           break;
8246         case UNSPEC_GOTTPOFF:
8247           /* FIXME: This might be @TPOFF in Sun ld too.  */
8248           fputs ("@GOTTPOFF", file);
8249           break;
8250         case UNSPEC_TPOFF:
8251           fputs ("@TPOFF", file);
8252           break;
8253         case UNSPEC_NTPOFF:
8254           if (TARGET_64BIT)
8255             fputs ("@TPOFF", file);
8256           else
8257             fputs ("@NTPOFF", file);
8258           break;
8259         case UNSPEC_DTPOFF:
8260           fputs ("@DTPOFF", file);
8261           break;
8262         case UNSPEC_GOTNTPOFF:
8263           if (TARGET_64BIT)
8264             fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8265                    "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
8266           else
8267             fputs ("@GOTNTPOFF", file);
8268           break;
8269         case UNSPEC_INDNTPOFF:
8270           fputs ("@INDNTPOFF", file);
8271           break;
8272         default:
8273           output_operand_lossage ("invalid UNSPEC as operand");
8274           break;
8275         }
8276        break;
8277
8278     default:
8279       output_operand_lossage ("invalid expression as operand");
8280     }
8281 }
8282
8283 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8284    We need to emit DTP-relative relocations.  */
8285
8286 static void ATTRIBUTE_UNUSED
8287 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8288 {
8289   fputs (ASM_LONG, file);
8290   output_addr_const (file, x);
8291   fputs ("@DTPOFF", file);
8292   switch (size)
8293     {
8294     case 4:
8295       break;
8296     case 8:
8297       fputs (", 0", file);
8298       break;
8299     default:
8300       gcc_unreachable ();
8301    }
8302 }
8303
8304 /* In the name of slightly smaller debug output, and to cater to
8305    general assembler lossage, recognize PIC+GOTOFF and turn it back
8306    into a direct symbol reference.
8307
8308    On Darwin, this is necessary to avoid a crash, because Darwin
8309    has a different PIC label for each routine but the DWARF debugging
8310    information is not associated with any particular routine, so it's
8311    necessary to remove references to the PIC label from RTL stored by
8312    the DWARF output code.  */
8313
8314 static rtx
8315 ix86_delegitimize_address (rtx orig_x)
8316 {
8317   rtx x = orig_x;
8318   /* reg_addend is NULL or a multiple of some register.  */
8319   rtx reg_addend = NULL_RTX;
8320   /* const_addend is NULL or a const_int.  */
8321   rtx const_addend = NULL_RTX;
8322   /* This is the result, or NULL.  */
8323   rtx result = NULL_RTX;
8324
8325   if (MEM_P (x))
8326     x = XEXP (x, 0);
8327
8328   if (TARGET_64BIT)
8329     {
8330       if (GET_CODE (x) != CONST
8331           || GET_CODE (XEXP (x, 0)) != UNSPEC
8332           || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8333           || !MEM_P (orig_x))
8334         return orig_x;
8335       return XVECEXP (XEXP (x, 0), 0, 0);
8336     }
8337
8338   if (GET_CODE (x) != PLUS
8339       || GET_CODE (XEXP (x, 1)) != CONST)
8340     return orig_x;
8341
8342   if (REG_P (XEXP (x, 0))
8343       && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8344     /* %ebx + GOT/GOTOFF */
8345     ;
8346   else if (GET_CODE (XEXP (x, 0)) == PLUS)
8347     {
8348       /* %ebx + %reg * scale + GOT/GOTOFF */
8349       reg_addend = XEXP (x, 0);
8350       if (REG_P (XEXP (reg_addend, 0))
8351           && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8352         reg_addend = XEXP (reg_addend, 1);
8353       else if (REG_P (XEXP (reg_addend, 1))
8354                && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8355         reg_addend = XEXP (reg_addend, 0);
8356       else
8357         return orig_x;
8358       if (!REG_P (reg_addend)
8359           && GET_CODE (reg_addend) != MULT
8360           && GET_CODE (reg_addend) != ASHIFT)
8361         return orig_x;
8362     }
8363   else
8364     return orig_x;
8365
8366   x = XEXP (XEXP (x, 1), 0);
8367   if (GET_CODE (x) == PLUS
8368       && CONST_INT_P (XEXP (x, 1)))
8369     {
8370       const_addend = XEXP (x, 1);
8371       x = XEXP (x, 0);
8372     }
8373
8374   if (GET_CODE (x) == UNSPEC
8375       && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8376           || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8377     result = XVECEXP (x, 0, 0);
8378
8379   if (TARGET_MACHO && darwin_local_data_pic (x)
8380       && !MEM_P (orig_x))
8381     result = XEXP (x, 0);
8382
8383   if (! result)
8384     return orig_x;
8385
8386   if (const_addend)
8387     result = gen_rtx_PLUS (Pmode, result, const_addend);
8388   if (reg_addend)
8389     result = gen_rtx_PLUS (Pmode, reg_addend, result);
8390   return result;
8391 }
8392
8393 /* If X is a machine specific address (i.e. a symbol or label being
8394    referenced as a displacement from the GOT implemented using an
8395    UNSPEC), then return the base term.  Otherwise return X.  */
8396
8397 rtx
8398 ix86_find_base_term (rtx x)
8399 {
8400   rtx term;
8401
8402   if (TARGET_64BIT)
8403     {
8404       if (GET_CODE (x) != CONST)
8405         return x;
8406       term = XEXP (x, 0);
8407       if (GET_CODE (term) == PLUS
8408           && (CONST_INT_P (XEXP (term, 1))
8409               || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8410         term = XEXP (term, 0);
8411       if (GET_CODE (term) != UNSPEC
8412           || XINT (term, 1) != UNSPEC_GOTPCREL)
8413         return x;
8414
8415       term = XVECEXP (term, 0, 0);
8416
8417       if (GET_CODE (term) != SYMBOL_REF
8418           && GET_CODE (term) != LABEL_REF)
8419         return x;
8420
8421       return term;
8422     }
8423
8424   term = ix86_delegitimize_address (x);
8425
8426   if (GET_CODE (term) != SYMBOL_REF
8427       && GET_CODE (term) != LABEL_REF)
8428     return x;
8429
8430   return term;
8431 }
8432 \f
8433 static void
8434 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8435                     int fp, FILE *file)
8436 {
8437   const char *suffix;
8438
8439   if (mode == CCFPmode || mode == CCFPUmode)
8440     {
8441       enum rtx_code second_code, bypass_code;
8442       ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8443       gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8444       code = ix86_fp_compare_code_to_integer (code);
8445       mode = CCmode;
8446     }
8447   if (reverse)
8448     code = reverse_condition (code);
8449
8450   switch (code)
8451     {
8452     case EQ:
8453       switch (mode)
8454         {
8455         case CCAmode:
8456           suffix = "a";
8457           break;
8458
8459         case CCCmode:
8460           suffix = "c";
8461           break;
8462
8463         case CCOmode:
8464           suffix = "o";
8465           break;
8466
8467         case CCSmode:
8468           suffix = "s";
8469           break;
8470
8471         default:
8472           suffix = "e";
8473         }
8474       break;
8475     case NE:
8476       switch (mode)
8477         {
8478         case CCAmode:
8479           suffix = "na";
8480           break;
8481
8482         case CCCmode:
8483           suffix = "nc";
8484           break;
8485
8486         case CCOmode:
8487           suffix = "no";
8488           break;
8489
8490         case CCSmode:
8491           suffix = "ns";
8492           break;
8493
8494         default:
8495           suffix = "ne";
8496         }
8497       break;
8498     case GT:
8499       gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8500       suffix = "g";
8501       break;
8502     case GTU:
8503       /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8504          Those same assemblers have the same but opposite lossage on cmov.  */
8505       if (mode == CCmode)
8506         suffix = fp ? "nbe" : "a";
8507       else if (mode == CCCmode)
8508         suffix = "b";
8509       else
8510         gcc_unreachable ();
8511       break;
8512     case LT:
8513       switch (mode)
8514         {
8515         case CCNOmode:
8516         case CCGOCmode:
8517           suffix = "s";
8518           break;
8519
8520         case CCmode:
8521         case CCGCmode:
8522           suffix = "l";
8523           break;
8524
8525         default:
8526           gcc_unreachable ();
8527         }
8528       break;
8529     case LTU:
8530       gcc_assert (mode == CCmode || mode == CCCmode);
8531       suffix = "b";
8532       break;
8533     case GE:
8534       switch (mode)
8535         {
8536         case CCNOmode:
8537         case CCGOCmode:
8538           suffix = "ns";
8539           break;
8540
8541         case CCmode:
8542         case CCGCmode:
8543           suffix = "ge";
8544           break;
8545
8546         default:
8547           gcc_unreachable ();
8548         }
8549       break;
8550     case GEU:
8551       /* ??? As above.  */
8552       gcc_assert (mode == CCmode || mode == CCCmode);
8553       suffix = fp ? "nb" : "ae";
8554       break;
8555     case LE:
8556       gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8557       suffix = "le";
8558       break;
8559     case LEU:
8560       /* ??? As above.  */
8561       if (mode == CCmode)
8562         suffix = "be";
8563       else if (mode == CCCmode)
8564         suffix = fp ? "nb" : "ae";
8565       else
8566         gcc_unreachable ();
8567       break;
8568     case UNORDERED:
8569       suffix = fp ? "u" : "p";
8570       break;
8571     case ORDERED:
8572       suffix = fp ? "nu" : "np";
8573       break;
8574     default:
8575       gcc_unreachable ();
8576     }
8577   fputs (suffix, file);
8578 }
8579
8580 /* Print the name of register X to FILE based on its machine mode and number.
8581    If CODE is 'w', pretend the mode is HImode.
8582    If CODE is 'b', pretend the mode is QImode.
8583    If CODE is 'k', pretend the mode is SImode.
8584    If CODE is 'q', pretend the mode is DImode.
8585    If CODE is 'h', pretend the reg is the 'high' byte register.
8586    If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.  */
8587
8588 void
8589 print_reg (rtx x, int code, FILE *file)
8590 {
8591   gcc_assert (x == pc_rtx
8592               || (REGNO (x) != ARG_POINTER_REGNUM
8593                   && REGNO (x) != FRAME_POINTER_REGNUM
8594                   && REGNO (x) != FLAGS_REG
8595                   && REGNO (x) != FPSR_REG
8596                   && REGNO (x) != FPCR_REG));
8597
8598   if (ASSEMBLER_DIALECT == ASM_ATT)
8599     putc ('%', file);
8600
8601   if (x == pc_rtx)
8602     {
8603       gcc_assert (TARGET_64BIT);
8604       fputs ("rip", file);
8605       return;
8606     }
8607
8608   if (code == 'w' || MMX_REG_P (x))
8609     code = 2;
8610   else if (code == 'b')
8611     code = 1;
8612   else if (code == 'k')
8613     code = 4;
8614   else if (code == 'q')
8615     code = 8;
8616   else if (code == 'y')
8617     code = 3;
8618   else if (code == 'h')
8619     code = 0;
8620   else
8621     code = GET_MODE_SIZE (GET_MODE (x));
8622
8623   /* Irritatingly, AMD extended registers use different naming convention
8624      from the normal registers.  */
8625   if (REX_INT_REG_P (x))
8626     {
8627       gcc_assert (TARGET_64BIT);
8628       switch (code)
8629         {
8630           case 0:
8631             error ("extended registers have no high halves");
8632             break;
8633           case 1:
8634             fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8635             break;
8636           case 2:
8637             fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8638             break;
8639           case 4:
8640             fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8641             break;
8642           case 8:
8643             fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8644             break;
8645           default:
8646             error ("unsupported operand size for extended register");
8647             break;
8648         }
8649       return;
8650     }
8651   switch (code)
8652     {
8653     case 3:
8654       if (STACK_TOP_P (x))
8655         {
8656           fputs ("st(0)", file);
8657           break;
8658         }
8659       /* FALLTHRU */
8660     case 8:
8661     case 4:
8662     case 12:
8663       if (! ANY_FP_REG_P (x))
8664         putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8665       /* FALLTHRU */
8666     case 16:
8667     case 2:
8668     normal:
8669       fputs (hi_reg_name[REGNO (x)], file);
8670       break;
8671     case 1:
8672       if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8673         goto normal;
8674       fputs (qi_reg_name[REGNO (x)], file);
8675       break;
8676     case 0:
8677       if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8678         goto normal;
8679       fputs (qi_high_reg_name[REGNO (x)], file);
8680       break;
8681     default:
8682       gcc_unreachable ();
8683     }
8684 }
8685
8686 /* Locate some local-dynamic symbol still in use by this function
8687    so that we can print its name in some tls_local_dynamic_base
8688    pattern.  */
8689
8690 static int
8691 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8692 {
8693   rtx x = *px;
8694
8695   if (GET_CODE (x) == SYMBOL_REF
8696       && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8697     {
8698       cfun->machine->some_ld_name = XSTR (x, 0);
8699       return 1;
8700     }
8701
8702   return 0;
8703 }
8704
8705 static const char *
8706 get_some_local_dynamic_name (void)
8707 {
8708   rtx insn;
8709
8710   if (cfun->machine->some_ld_name)
8711     return cfun->machine->some_ld_name;
8712
8713   for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8714     if (INSN_P (insn)
8715         && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8716       return cfun->machine->some_ld_name;
8717
8718   gcc_unreachable ();
8719 }
8720
8721 /* Meaning of CODE:
8722    L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8723    C -- print opcode suffix for set/cmov insn.
8724    c -- like C, but print reversed condition
8725    F,f -- likewise, but for floating-point.
8726    O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8727         otherwise nothing
8728    R -- print the prefix for register names.
8729    z -- print the opcode suffix for the size of the current operand.
8730    * -- print a star (in certain assembler syntax)
8731    A -- print an absolute memory reference.
8732    w -- print the operand as if it's a "word" (HImode) even if it isn't.
8733    s -- print a shift double count, followed by the assemblers argument
8734         delimiter.
8735    b -- print the QImode name of the register for the indicated operand.
8736         %b0 would print %al if operands[0] is reg 0.
8737    w --  likewise, print the HImode name of the register.
8738    k --  likewise, print the SImode name of the register.
8739    q --  likewise, print the DImode name of the register.
8740    h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8741    y -- print "st(0)" instead of "st" as a register.
8742    D -- print condition for SSE cmp instruction.
8743    P -- if PIC, print an @PLT suffix.
8744    X -- don't print any sort of PIC '@' suffix for a symbol.
8745    & -- print some in-use local-dynamic symbol name.
8746    H -- print a memory address offset by 8; used for sse high-parts
8747    Y -- print condition for SSE5 com* instruction.
8748    + -- print a branch hint as 'cs' or 'ds' prefix
8749    ; -- print a semicolon (after prefixes due to bug in older gas).
8750  */
8751
8752 void
8753 print_operand (FILE *file, rtx x, int code)
8754 {
8755   if (code)
8756     {
8757       switch (code)
8758         {
8759         case '*':
8760           if (ASSEMBLER_DIALECT == ASM_ATT)
8761             putc ('*', file);
8762           return;
8763
8764         case '&':
8765           assemble_name (file, get_some_local_dynamic_name ());
8766           return;
8767
8768         case 'A':
8769           switch (ASSEMBLER_DIALECT)
8770             {
8771             case ASM_ATT:
8772               putc ('*', file);
8773               break;
8774
8775             case ASM_INTEL:
8776               /* Intel syntax. For absolute addresses, registers should not
8777                  be surrounded by braces.  */
8778               if (!REG_P (x))
8779                 {
8780                   putc ('[', file);
8781                   PRINT_OPERAND (file, x, 0);
8782                   putc (']', file);
8783                   return;
8784                 }
8785               break;
8786
8787             default:
8788               gcc_unreachable ();
8789             }
8790
8791           PRINT_OPERAND (file, x, 0);
8792           return;
8793
8794
8795         case 'L':
8796           if (ASSEMBLER_DIALECT == ASM_ATT)
8797             putc ('l', file);
8798           return;
8799
8800         case 'W':
8801           if (ASSEMBLER_DIALECT == ASM_ATT)
8802             putc ('w', file);
8803           return;
8804
8805         case 'B':
8806           if (ASSEMBLER_DIALECT == ASM_ATT)
8807             putc ('b', file);
8808           return;
8809
8810         case 'Q':
8811           if (ASSEMBLER_DIALECT == ASM_ATT)
8812             putc ('l', file);
8813           return;
8814
8815         case 'S':
8816           if (ASSEMBLER_DIALECT == ASM_ATT)
8817             putc ('s', file);
8818           return;
8819
8820         case 'T':
8821           if (ASSEMBLER_DIALECT == ASM_ATT)
8822             putc ('t', file);
8823           return;
8824
8825         case 'z':
8826           /* 387 opcodes don't get size suffixes if the operands are
8827              registers.  */
8828           if (STACK_REG_P (x))
8829             return;
8830
8831           /* Likewise if using Intel opcodes.  */
8832           if (ASSEMBLER_DIALECT == ASM_INTEL)
8833             return;
8834
8835           /* This is the size of op from size of operand.  */
8836           switch (GET_MODE_SIZE (GET_MODE (x)))
8837             {
8838             case 1:
8839               putc ('b', file);
8840               return;
8841
8842             case 2:
8843               if (MEM_P (x))
8844                 {
8845 #ifdef HAVE_GAS_FILDS_FISTS
8846                   putc ('s', file);
8847 #endif
8848                   return;
8849                 }
8850               else
8851                 putc ('w', file);
8852               return;
8853
8854             case 4:
8855               if (GET_MODE (x) == SFmode)
8856                 {
8857                   putc ('s', file);
8858                   return;
8859                 }
8860               else
8861                 putc ('l', file);
8862               return;
8863
8864             case 12:
8865             case 16:
8866               putc ('t', file);
8867               return;
8868
8869             case 8:
8870               if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8871                 {
8872 #ifdef GAS_MNEMONICS
8873                   putc ('q', file);
8874 #else
8875                   putc ('l', file);
8876                   putc ('l', file);
8877 #endif
8878                 }
8879               else
8880                 putc ('l', file);
8881               return;
8882
8883             default:
8884               gcc_unreachable ();
8885             }
8886
8887         case 'b':
8888         case 'w':
8889         case 'k':
8890         case 'q':
8891         case 'h':
8892         case 'y':
8893         case 'X':
8894         case 'P':
8895           break;
8896
8897         case 's':
8898           if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8899             {
8900               PRINT_OPERAND (file, x, 0);
8901               putc (',', file);
8902             }
8903           return;
8904
8905         case 'D':
8906           /* Little bit of braindamage here.  The SSE compare instructions
8907              does use completely different names for the comparisons that the
8908              fp conditional moves.  */
8909           switch (GET_CODE (x))
8910             {
8911             case EQ:
8912             case UNEQ:
8913               fputs ("eq", file);
8914               break;
8915             case LT:
8916             case UNLT:
8917               fputs ("lt", file);
8918               break;
8919             case LE:
8920             case UNLE:
8921               fputs ("le", file);
8922               break;
8923             case UNORDERED:
8924               fputs ("unord", file);
8925               break;
8926             case NE:
8927             case LTGT:
8928               fputs ("neq", file);
8929               break;
8930             case UNGE:
8931             case GE:
8932               fputs ("nlt", file);
8933               break;
8934             case UNGT:
8935             case GT:
8936               fputs ("nle", file);
8937               break;
8938             case ORDERED:
8939               fputs ("ord", file);
8940               break;
8941             default:
8942               gcc_unreachable ();
8943             }
8944           return;
8945         case 'O':
8946 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8947           if (ASSEMBLER_DIALECT == ASM_ATT)
8948             {
8949               switch (GET_MODE (x))
8950                 {
8951                 case HImode: putc ('w', file); break;
8952                 case SImode:
8953                 case SFmode: putc ('l', file); break;
8954                 case DImode:
8955                 case DFmode: putc ('q', file); break;
8956                 default: gcc_unreachable ();
8957                 }
8958               putc ('.', file);
8959             }
8960 #endif
8961           return;
8962         case 'C':
8963           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8964           return;
8965         case 'F':
8966 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8967           if (ASSEMBLER_DIALECT == ASM_ATT)
8968             putc ('.', file);
8969 #endif
8970           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8971           return;
8972
8973           /* Like above, but reverse condition */
8974         case 'c':
8975           /* Check to see if argument to %c is really a constant
8976              and not a condition code which needs to be reversed.  */
8977           if (!COMPARISON_P (x))
8978           {
8979             output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8980              return;
8981           }
8982           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8983           return;
8984         case 'f':
8985 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8986           if (ASSEMBLER_DIALECT == ASM_ATT)
8987             putc ('.', file);
8988 #endif
8989           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8990           return;
8991
8992         case 'H':
8993           /* It doesn't actually matter what mode we use here, as we're
8994              only going to use this for printing.  */
8995           x = adjust_address_nv (x, DImode, 8);
8996           break;
8997
8998         case '+':
8999           {
9000             rtx x;
9001
9002             if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
9003               return;
9004
9005             x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
9006             if (x)
9007               {
9008                 int pred_val = INTVAL (XEXP (x, 0));
9009
9010                 if (pred_val < REG_BR_PROB_BASE * 45 / 100
9011                     || pred_val > REG_BR_PROB_BASE * 55 / 100)
9012                   {
9013                     int taken = pred_val > REG_BR_PROB_BASE / 2;
9014                     int cputaken = final_forward_branch_p (current_output_insn) == 0;
9015
9016                     /* Emit hints only in the case default branch prediction
9017                        heuristics would fail.  */
9018                     if (taken != cputaken)
9019                       {
9020                         /* We use 3e (DS) prefix for taken branches and
9021                            2e (CS) prefix for not taken branches.  */
9022                         if (taken)
9023                           fputs ("ds ; ", file);
9024                         else
9025                           fputs ("cs ; ", file);
9026                       }
9027                   }
9028               }
9029             return;
9030           }
9031
9032         case 'Y':
9033           switch (GET_CODE (x))
9034             {
9035             case NE:
9036               fputs ("neq", file);
9037               break;
9038             case EQ:
9039               fputs ("eq", file);
9040               break;
9041             case GE:
9042             case GEU:
9043               fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
9044               break;
9045             case GT:
9046             case GTU:
9047               fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
9048               break;
9049             case LE:
9050             case LEU:
9051               fputs ("le", file);
9052               break;
9053             case LT:
9054             case LTU:
9055               fputs ("lt", file);
9056               break;
9057             case UNORDERED:
9058               fputs ("unord", file);
9059               break;
9060             case ORDERED:
9061               fputs ("ord", file);
9062               break;
9063             case UNEQ:
9064               fputs ("ueq", file);
9065               break;
9066             case UNGE:
9067               fputs ("nlt", file);
9068               break;
9069             case UNGT:
9070               fputs ("nle", file);
9071               break;
9072             case UNLE:
9073               fputs ("ule", file);
9074               break;
9075             case UNLT:
9076               fputs ("ult", file);
9077               break;
9078             case LTGT:
9079               fputs ("une", file);
9080               break;
9081             default:
9082               gcc_unreachable ();
9083             }
9084           return;
9085
9086         case ';':
9087 #if TARGET_MACHO
9088           fputs (" ; ", file);
9089 #else
9090           fputc (' ', file);
9091 #endif
9092           return;
9093
9094         default:
9095             output_operand_lossage ("invalid operand code '%c'", code);
9096         }
9097     }
9098
9099   if (REG_P (x))
9100     print_reg (x, code, file);
9101
9102   else if (MEM_P (x))
9103     {
9104       /* No `byte ptr' prefix for call instructions or BLKmode operands.  */
9105       if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
9106           && GET_MODE (x) != BLKmode)
9107         {
9108           const char * size;
9109           switch (GET_MODE_SIZE (GET_MODE (x)))
9110             {
9111             case 1: size = "BYTE"; break;
9112             case 2: size = "WORD"; break;
9113             case 4: size = "DWORD"; break;
9114             case 8: size = "QWORD"; break;
9115             case 12: size = "XWORD"; break;
9116             case 16:
9117               if (GET_MODE (x) == XFmode)
9118                 size = "XWORD";
9119               else
9120                 size = "XMMWORD";
9121               break;
9122             default:
9123               gcc_unreachable ();
9124             }
9125
9126           /* Check for explicit size override (codes 'b', 'w' and 'k')  */
9127           if (code == 'b')
9128             size = "BYTE";
9129           else if (code == 'w')
9130             size = "WORD";
9131           else if (code == 'k')
9132             size = "DWORD";
9133
9134           fputs (size, file);
9135           fputs (" PTR ", file);
9136         }
9137
9138       x = XEXP (x, 0);
9139       /* Avoid (%rip) for call operands.  */
9140       if (CONSTANT_ADDRESS_P (x) && code == 'P'
9141           && !CONST_INT_P (x))
9142         output_addr_const (file, x);
9143       else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9144         output_operand_lossage ("invalid constraints for operand");
9145       else
9146         output_address (x);
9147     }
9148
9149   else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9150     {
9151       REAL_VALUE_TYPE r;
9152       long l;
9153
9154       REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9155       REAL_VALUE_TO_TARGET_SINGLE (r, l);
9156
9157       if (ASSEMBLER_DIALECT == ASM_ATT)
9158         putc ('$', file);
9159       fprintf (file, "0x%08lx", l);
9160     }
9161
9162   /* These float cases don't actually occur as immediate operands.  */
9163   else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
9164     {
9165       char dstr[30];
9166
9167       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9168       fprintf (file, "%s", dstr);
9169     }
9170
9171   else if (GET_CODE (x) == CONST_DOUBLE
9172            && GET_MODE (x) == XFmode)
9173     {
9174       char dstr[30];
9175
9176       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9177       fprintf (file, "%s", dstr);
9178     }
9179
9180   else
9181     {
9182       /* We have patterns that allow zero sets of memory, for instance.
9183          In 64-bit mode, we should probably support all 8-byte vectors,
9184          since we can in fact encode that into an immediate.  */
9185       if (GET_CODE (x) == CONST_VECTOR)
9186         {
9187           gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9188           x = const0_rtx;
9189         }
9190
9191       if (code != 'P')
9192         {
9193           if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
9194             {
9195               if (ASSEMBLER_DIALECT == ASM_ATT)
9196                 putc ('$', file);
9197             }
9198           else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9199                    || GET_CODE (x) == LABEL_REF)
9200             {
9201               if (ASSEMBLER_DIALECT == ASM_ATT)
9202                 putc ('$', file);
9203               else
9204                 fputs ("OFFSET FLAT:", file);
9205             }
9206         }
9207       if (CONST_INT_P (x))
9208         fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9209       else if (flag_pic)
9210         output_pic_addr_const (file, x, code);
9211       else
9212         output_addr_const (file, x);
9213     }
9214 }
9215 \f
9216 /* Print a memory operand whose address is ADDR.  */
9217
9218 void
9219 print_operand_address (FILE *file, rtx addr)
9220 {
9221   struct ix86_address parts;
9222   rtx base, index, disp;
9223   int scale;
9224   int ok = ix86_decompose_address (addr, &parts);
9225
9226   gcc_assert (ok);
9227
9228   base = parts.base;
9229   index = parts.index;
9230   disp = parts.disp;
9231   scale = parts.scale;
9232
9233   switch (parts.seg)
9234     {
9235     case SEG_DEFAULT:
9236       break;
9237     case SEG_FS:
9238     case SEG_GS:
9239       if (ASSEMBLER_DIALECT == ASM_ATT)
9240         putc ('%', file);
9241       fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9242       break;
9243     default:
9244       gcc_unreachable ();
9245     }
9246
9247   /* Use one byte shorter RIP relative addressing for 64bit mode.  */
9248   if (TARGET_64BIT && !base && !index)
9249     {
9250       rtx symbol = disp;
9251
9252       if (GET_CODE (disp) == CONST
9253           && GET_CODE (XEXP (disp, 0)) == PLUS
9254           && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9255         symbol = XEXP (XEXP (disp, 0), 0);
9256
9257       if (GET_CODE (symbol) == LABEL_REF
9258           || (GET_CODE (symbol) == SYMBOL_REF
9259               && SYMBOL_REF_TLS_MODEL (symbol) == 0))
9260         base = pc_rtx;
9261     }
9262   if (!base && !index)
9263     {
9264       /* Displacement only requires special attention.  */
9265
9266       if (CONST_INT_P (disp))
9267         {
9268           if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9269             fputs ("ds:", file);
9270           fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
9271         }
9272       else if (flag_pic)
9273         output_pic_addr_const (file, disp, 0);
9274       else
9275         output_addr_const (file, disp);
9276     }
9277   else
9278     {
9279       if (ASSEMBLER_DIALECT == ASM_ATT)
9280         {
9281           if (disp)
9282             {
9283               if (flag_pic)
9284                 output_pic_addr_const (file, disp, 0);
9285               else if (GET_CODE (disp) == LABEL_REF)
9286                 output_asm_label (disp);
9287               else
9288                 output_addr_const (file, disp);
9289             }
9290
9291           putc ('(', file);
9292           if (base)
9293             print_reg (base, 0, file);
9294           if (index)
9295             {
9296               putc (',', file);
9297               print_reg (index, 0, file);
9298               if (scale != 1)
9299                 fprintf (file, ",%d", scale);
9300             }
9301           putc (')', file);
9302         }
9303       else
9304         {
9305           rtx offset = NULL_RTX;
9306
9307           if (disp)
9308             {
9309               /* Pull out the offset of a symbol; print any symbol itself.  */
9310               if (GET_CODE (disp) == CONST
9311                   && GET_CODE (XEXP (disp, 0)) == PLUS
9312                   && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9313                 {
9314                   offset = XEXP (XEXP (disp, 0), 1);
9315                   disp = gen_rtx_CONST (VOIDmode,
9316                                         XEXP (XEXP (disp, 0), 0));
9317                 }
9318
9319               if (flag_pic)
9320                 output_pic_addr_const (file, disp, 0);
9321               else if (GET_CODE (disp) == LABEL_REF)
9322                 output_asm_label (disp);
9323               else if (CONST_INT_P (disp))
9324                 offset = disp;
9325               else
9326                 output_addr_const (file, disp);
9327             }
9328
9329           putc ('[', file);
9330           if (base)
9331             {
9332               print_reg (base, 0, file);
9333               if (offset)
9334                 {
9335                   if (INTVAL (offset) >= 0)
9336                     putc ('+', file);
9337                   fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9338                 }
9339             }
9340           else if (offset)
9341             fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9342           else
9343             putc ('0', file);
9344
9345           if (index)
9346             {
9347               putc ('+', file);
9348               print_reg (index, 0, file);
9349               if (scale != 1)
9350                 fprintf (file, "*%d", scale);
9351             }
9352           putc (']', file);
9353         }
9354     }
9355 }
9356
9357 bool
9358 output_addr_const_extra (FILE *file, rtx x)
9359 {
9360   rtx op;
9361
9362   if (GET_CODE (x) != UNSPEC)
9363     return false;
9364
9365   op = XVECEXP (x, 0, 0);
9366   switch (XINT (x, 1))
9367     {
9368     case UNSPEC_GOTTPOFF:
9369       output_addr_const (file, op);
9370       /* FIXME: This might be @TPOFF in Sun ld.  */
9371       fputs ("@GOTTPOFF", file);
9372       break;
9373     case UNSPEC_TPOFF:
9374       output_addr_const (file, op);
9375       fputs ("@TPOFF", file);
9376       break;
9377     case UNSPEC_NTPOFF:
9378       output_addr_const (file, op);
9379       if (TARGET_64BIT)
9380         fputs ("@TPOFF", file);
9381       else
9382         fputs ("@NTPOFF", file);
9383       break;
9384     case UNSPEC_DTPOFF:
9385       output_addr_const (file, op);
9386       fputs ("@DTPOFF", file);
9387       break;
9388     case UNSPEC_GOTNTPOFF:
9389       output_addr_const (file, op);
9390       if (TARGET_64BIT)
9391         fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9392                "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
9393       else
9394         fputs ("@GOTNTPOFF", file);
9395       break;
9396     case UNSPEC_INDNTPOFF:
9397       output_addr_const (file, op);
9398       fputs ("@INDNTPOFF", file);
9399       break;
9400
9401     default:
9402       return false;
9403     }
9404
9405   return true;
9406 }
9407 \f
9408 /* Split one or more DImode RTL references into pairs of SImode
9409    references.  The RTL can be REG, offsettable MEM, integer constant, or
9410    CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
9411    split and "num" is its length.  lo_half and hi_half are output arrays
9412    that parallel "operands".  */
9413
9414 void
9415 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9416 {
9417   while (num--)
9418     {
9419       rtx op = operands[num];
9420
9421       /* simplify_subreg refuse to split volatile memory addresses,
9422          but we still have to handle it.  */
9423       if (MEM_P (op))
9424         {
9425           lo_half[num] = adjust_address (op, SImode, 0);
9426           hi_half[num] = adjust_address (op, SImode, 4);
9427         }
9428       else
9429         {
9430           lo_half[num] = simplify_gen_subreg (SImode, op,
9431                                               GET_MODE (op) == VOIDmode
9432                                               ? DImode : GET_MODE (op), 0);
9433           hi_half[num] = simplify_gen_subreg (SImode, op,
9434                                               GET_MODE (op) == VOIDmode
9435                                               ? DImode : GET_MODE (op), 4);
9436         }
9437     }
9438 }
9439 /* Split one or more TImode RTL references into pairs of DImode
9440    references.  The RTL can be REG, offsettable MEM, integer constant, or
9441    CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
9442    split and "num" is its length.  lo_half and hi_half are output arrays
9443    that parallel "operands".  */
9444
9445 void
9446 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9447 {
9448   while (num--)
9449     {
9450       rtx op = operands[num];
9451
9452       /* simplify_subreg refuse to split volatile memory addresses, but we
9453          still have to handle it.  */
9454       if (MEM_P (op))
9455         {
9456           lo_half[num] = adjust_address (op, DImode, 0);
9457           hi_half[num] = adjust_address (op, DImode, 8);
9458         }
9459       else
9460         {
9461           lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9462           hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9463         }
9464     }
9465 }
9466 \f
9467 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9468    MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
9469    is the expression of the binary operation.  The output may either be
9470    emitted here, or returned to the caller, like all output_* functions.
9471
9472    There is no guarantee that the operands are the same mode, as they
9473    might be within FLOAT or FLOAT_EXTEND expressions.  */
9474
9475 #ifndef SYSV386_COMPAT
9476 /* Set to 1 for compatibility with brain-damaged assemblers.  No-one
9477    wants to fix the assemblers because that causes incompatibility
9478    with gcc.  No-one wants to fix gcc because that causes
9479    incompatibility with assemblers...  You can use the option of
9480    -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
9481 #define SYSV386_COMPAT 1
9482 #endif
9483
9484 const char *
9485 output_387_binary_op (rtx insn, rtx *operands)
9486 {
9487   static char buf[30];
9488   const char *p;
9489   const char *ssep;
9490   int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9491
9492 #ifdef ENABLE_CHECKING
9493   /* Even if we do not want to check the inputs, this documents input
9494      constraints.  Which helps in understanding the following code.  */
9495   if (STACK_REG_P (operands[0])
9496       && ((REG_P (operands[1])
9497            && REGNO (operands[0]) == REGNO (operands[1])
9498            && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9499           || (REG_P (operands[2])
9500               && REGNO (operands[0]) == REGNO (operands[2])
9501               && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9502       && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9503     ; /* ok */
9504   else
9505     gcc_assert (is_sse);
9506 #endif
9507
9508   switch (GET_CODE (operands[3]))
9509     {
9510     case PLUS:
9511       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9512           || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9513         p = "fiadd";
9514       else
9515         p = "fadd";
9516       ssep = "add";
9517       break;
9518
9519     case MINUS:
9520       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9521           || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9522         p = "fisub";
9523       else
9524         p = "fsub";
9525       ssep = "sub";
9526       break;
9527
9528     case MULT:
9529       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9530           || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9531         p = "fimul";
9532       else
9533         p = "fmul";
9534       ssep = "mul";
9535       break;
9536
9537     case DIV:
9538       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9539           || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9540         p = "fidiv";
9541       else
9542         p = "fdiv";
9543       ssep = "div";
9544       break;
9545
9546     default:
9547       gcc_unreachable ();
9548     }
9549
9550   if (is_sse)
9551    {
9552       strcpy (buf, ssep);
9553       if (GET_MODE (operands[0]) == SFmode)
9554         strcat (buf, "ss\t{%2, %0|%0, %2}");
9555       else
9556         strcat (buf, "sd\t{%2, %0|%0, %2}");
9557       return buf;
9558    }
9559   strcpy (buf, p);
9560
9561   switch (GET_CODE (operands[3]))
9562     {
9563     case MULT:
9564     case PLUS:
9565       if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9566         {
9567           rtx temp = operands[2];
9568           operands[2] = operands[1];
9569           operands[1] = temp;
9570         }
9571
9572       /* know operands[0] == operands[1].  */
9573
9574       if (MEM_P (operands[2]))
9575         {
9576           p = "%z2\t%2";
9577           break;
9578         }
9579
9580       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9581         {
9582           if (STACK_TOP_P (operands[0]))
9583             /* How is it that we are storing to a dead operand[2]?
9584                Well, presumably operands[1] is dead too.  We can't
9585                store the result to st(0) as st(0) gets popped on this
9586                instruction.  Instead store to operands[2] (which I
9587                think has to be st(1)).  st(1) will be popped later.
9588                gcc <= 2.8.1 didn't have this check and generated
9589                assembly code that the Unixware assembler rejected.  */
9590             p = "p\t{%0, %2|%2, %0}";   /* st(1) = st(0) op st(1); pop */
9591           else
9592             p = "p\t{%2, %0|%0, %2}";   /* st(r1) = st(r1) op st(0); pop */
9593           break;
9594         }
9595
9596       if (STACK_TOP_P (operands[0]))
9597         p = "\t{%y2, %0|%0, %y2}";      /* st(0) = st(0) op st(r2) */
9598       else
9599         p = "\t{%2, %0|%0, %2}";        /* st(r1) = st(r1) op st(0) */
9600       break;
9601
9602     case MINUS:
9603     case DIV:
9604       if (MEM_P (operands[1]))
9605         {
9606           p = "r%z1\t%1";
9607           break;
9608         }
9609
9610       if (MEM_P (operands[2]))
9611         {
9612           p = "%z2\t%2";
9613           break;
9614         }
9615
9616       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9617         {
9618 #if SYSV386_COMPAT
9619           /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9620              derived assemblers, confusingly reverse the direction of
9621              the operation for fsub{r} and fdiv{r} when the
9622              destination register is not st(0).  The Intel assembler
9623              doesn't have this brain damage.  Read !SYSV386_COMPAT to
9624              figure out what the hardware really does.  */
9625           if (STACK_TOP_P (operands[0]))
9626             p = "{p\t%0, %2|rp\t%2, %0}";
9627           else
9628             p = "{rp\t%2, %0|p\t%0, %2}";
9629 #else
9630           if (STACK_TOP_P (operands[0]))
9631             /* As above for fmul/fadd, we can't store to st(0).  */
9632             p = "rp\t{%0, %2|%2, %0}";  /* st(1) = st(0) op st(1); pop */
9633           else
9634             p = "p\t{%2, %0|%0, %2}";   /* st(r1) = st(r1) op st(0); pop */
9635 #endif
9636           break;
9637         }
9638
9639       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9640         {
9641 #if SYSV386_COMPAT
9642           if (STACK_TOP_P (operands[0]))
9643             p = "{rp\t%0, %1|p\t%1, %0}";
9644           else
9645             p = "{p\t%1, %0|rp\t%0, %1}";
9646 #else
9647           if (STACK_TOP_P (operands[0]))
9648             p = "p\t{%0, %1|%1, %0}";   /* st(1) = st(1) op st(0); pop */
9649           else
9650             p = "rp\t{%1, %0|%0, %1}";  /* st(r2) = st(0) op st(r2); pop */
9651 #endif
9652           break;
9653         }
9654
9655       if (STACK_TOP_P (operands[0]))
9656         {
9657           if (STACK_TOP_P (operands[1]))
9658             p = "\t{%y2, %0|%0, %y2}";  /* st(0) = st(0) op st(r2) */
9659           else
9660             p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9661           break;
9662         }
9663       else if (STACK_TOP_P (operands[1]))
9664         {
9665 #if SYSV386_COMPAT
9666           p = "{\t%1, %0|r\t%0, %1}";
9667 #else
9668           p = "r\t{%1, %0|%0, %1}";     /* st(r2) = st(0) op st(r2) */
9669 #endif
9670         }
9671       else
9672         {
9673 #if SYSV386_COMPAT
9674           p = "{r\t%2, %0|\t%0, %2}";
9675 #else
9676           p = "\t{%2, %0|%0, %2}";      /* st(r1) = st(r1) op st(0) */
9677 #endif
9678         }
9679       break;
9680
9681     default:
9682       gcc_unreachable ();
9683     }
9684
9685   strcat (buf, p);
9686   return buf;
9687 }
9688
9689 /* Return needed mode for entity in optimize_mode_switching pass.  */
9690
9691 int
9692 ix86_mode_needed (int entity, rtx insn)
9693 {
9694   enum attr_i387_cw mode;
9695
9696   /* The mode UNINITIALIZED is used to store control word after a
9697      function call or ASM pattern.  The mode ANY specify that function
9698      has no requirements on the control word and make no changes in the
9699      bits we are interested in.  */
9700
9701   if (CALL_P (insn)
9702       || (NONJUMP_INSN_P (insn)
9703           && (asm_noperands (PATTERN (insn)) >= 0
9704               || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9705     return I387_CW_UNINITIALIZED;
9706
9707   if (recog_memoized (insn) < 0)
9708     return I387_CW_ANY;
9709
9710   mode = get_attr_i387_cw (insn);
9711
9712   switch (entity)
9713     {
9714     case I387_TRUNC:
9715       if (mode == I387_CW_TRUNC)
9716         return mode;
9717       break;
9718
9719     case I387_FLOOR:
9720       if (mode == I387_CW_FLOOR)
9721         return mode;
9722       break;
9723
9724     case I387_CEIL:
9725       if (mode == I387_CW_CEIL)
9726         return mode;
9727       break;
9728
9729     case I387_MASK_PM:
9730       if (mode == I387_CW_MASK_PM)
9731         return mode;
9732       break;
9733
9734     default:
9735       gcc_unreachable ();
9736     }
9737
9738   return I387_CW_ANY;
9739 }
9740
9741 /* Output code to initialize control word copies used by trunc?f?i and
9742    rounding patterns.  CURRENT_MODE is set to current control word,
9743    while NEW_MODE is set to new control word.  */
9744
9745 void
9746 emit_i387_cw_initialization (int mode)
9747 {
9748   rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9749   rtx new_mode;
9750
9751   enum ix86_stack_slot slot;
9752
9753   rtx reg = gen_reg_rtx (HImode);
9754
9755   emit_insn (gen_x86_fnstcw_1 (stored_mode));
9756   emit_move_insn (reg, copy_rtx (stored_mode));
9757
9758   if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9759     {
9760       switch (mode)
9761         {
9762         case I387_CW_TRUNC:
9763           /* round toward zero (truncate) */
9764           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9765           slot = SLOT_CW_TRUNC;
9766           break;
9767
9768         case I387_CW_FLOOR:
9769           /* round down toward -oo */
9770           emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9771           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9772           slot = SLOT_CW_FLOOR;
9773           break;
9774
9775         case I387_CW_CEIL:
9776           /* round up toward +oo */
9777           emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9778           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9779           slot = SLOT_CW_CEIL;
9780           break;
9781
9782         case I387_CW_MASK_PM:
9783           /* mask precision exception for nearbyint() */
9784           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9785           slot = SLOT_CW_MASK_PM;
9786           break;
9787
9788         default:
9789           gcc_unreachable ();
9790         }
9791     }
9792   else
9793     {
9794       switch (mode)
9795         {
9796         case I387_CW_TRUNC:
9797           /* round toward zero (truncate) */
9798           emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9799           slot = SLOT_CW_TRUNC;
9800           break;
9801
9802         case I387_CW_FLOOR:
9803           /* round down toward -oo */
9804           emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9805           slot = SLOT_CW_FLOOR;
9806           break;
9807
9808         case I387_CW_CEIL:
9809           /* round up toward +oo */
9810           emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9811           slot = SLOT_CW_CEIL;
9812           break;
9813
9814         case I387_CW_MASK_PM:
9815           /* mask precision exception for nearbyint() */
9816           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9817           slot = SLOT_CW_MASK_PM;
9818           break;
9819
9820         default:
9821           gcc_unreachable ();
9822         }
9823     }
9824
9825   gcc_assert (slot < MAX_386_STACK_LOCALS);
9826
9827   new_mode = assign_386_stack_local (HImode, slot);
9828   emit_move_insn (new_mode, reg);
9829 }
9830
9831 /* Output code for INSN to convert a float to a signed int.  OPERANDS
9832    are the insn operands.  The output may be [HSD]Imode and the input
9833    operand may be [SDX]Fmode.  */
9834
9835 const char *
9836 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9837 {
9838   int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9839   int dimode_p = GET_MODE (operands[0]) == DImode;
9840   int round_mode = get_attr_i387_cw (insn);
9841
9842   /* Jump through a hoop or two for DImode, since the hardware has no
9843      non-popping instruction.  We used to do this a different way, but
9844      that was somewhat fragile and broke with post-reload splitters.  */
9845   if ((dimode_p || fisttp) && !stack_top_dies)
9846     output_asm_insn ("fld\t%y1", operands);
9847
9848   gcc_assert (STACK_TOP_P (operands[1]));
9849   gcc_assert (MEM_P (operands[0]));
9850   gcc_assert (GET_MODE (operands[1]) != TFmode);
9851
9852   if (fisttp)
9853       output_asm_insn ("fisttp%z0\t%0", operands);
9854   else
9855     {
9856       if (round_mode != I387_CW_ANY)
9857         output_asm_insn ("fldcw\t%3", operands);
9858       if (stack_top_dies || dimode_p)
9859         output_asm_insn ("fistp%z0\t%0", operands);
9860       else
9861         output_asm_insn ("fist%z0\t%0", operands);
9862       if (round_mode != I387_CW_ANY)
9863         output_asm_insn ("fldcw\t%2", operands);
9864     }
9865
9866   return "";
9867 }
9868
9869 /* Output code for x87 ffreep insn.  The OPNO argument, which may only
9870    have the values zero or one, indicates the ffreep insn's operand
9871    from the OPERANDS array.  */
9872
9873 static const char *
9874 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9875 {
9876   if (TARGET_USE_FFREEP)
9877 #if HAVE_AS_IX86_FFREEP
9878     return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9879 #else
9880     {
9881       static char retval[] = ".word\t0xc_df";
9882       int regno = REGNO (operands[opno]);
9883
9884       gcc_assert (FP_REGNO_P (regno));
9885
9886       retval[9] = '0' + (regno - FIRST_STACK_REG);
9887       return retval;
9888     }
9889 #endif
9890
9891   return opno ? "fstp\t%y1" : "fstp\t%y0";
9892 }
9893
9894
9895 /* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
9896    should be used.  UNORDERED_P is true when fucom should be used.  */
9897
9898 const char *
9899 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9900 {
9901   int stack_top_dies;
9902   rtx cmp_op0, cmp_op1;
9903   int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9904
9905   if (eflags_p)
9906     {
9907       cmp_op0 = operands[0];
9908       cmp_op1 = operands[1];
9909     }
9910   else
9911     {
9912       cmp_op0 = operands[1];
9913       cmp_op1 = operands[2];
9914     }
9915
9916   if (is_sse)
9917     {
9918       if (GET_MODE (operands[0]) == SFmode)
9919         if (unordered_p)
9920           return "ucomiss\t{%1, %0|%0, %1}";
9921         else
9922           return "comiss\t{%1, %0|%0, %1}";
9923       else
9924         if (unordered_p)
9925           return "ucomisd\t{%1, %0|%0, %1}";
9926         else
9927           return "comisd\t{%1, %0|%0, %1}";
9928     }
9929
9930   gcc_assert (STACK_TOP_P (cmp_op0));
9931
9932   stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9933
9934   if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9935     {
9936       if (stack_top_dies)
9937         {
9938           output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9939           return output_387_ffreep (operands, 1);
9940         }
9941       else
9942         return "ftst\n\tfnstsw\t%0";
9943     }
9944
9945   if (STACK_REG_P (cmp_op1)
9946       && stack_top_dies
9947       && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9948       && REGNO (cmp_op1) != FIRST_STACK_REG)
9949     {
9950       /* If both the top of the 387 stack dies, and the other operand
9951          is also a stack register that dies, then this must be a
9952          `fcompp' float compare */
9953
9954       if (eflags_p)
9955         {
9956           /* There is no double popping fcomi variant.  Fortunately,
9957              eflags is immune from the fstp's cc clobbering.  */
9958           if (unordered_p)
9959             output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9960           else
9961             output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9962           return output_387_ffreep (operands, 0);
9963         }
9964       else
9965         {
9966           if (unordered_p)
9967             return "fucompp\n\tfnstsw\t%0";
9968           else
9969             return "fcompp\n\tfnstsw\t%0";
9970         }
9971     }
9972   else
9973     {
9974       /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
9975
9976       static const char * const alt[16] =
9977       {
9978         "fcom%z2\t%y2\n\tfnstsw\t%0",
9979         "fcomp%z2\t%y2\n\tfnstsw\t%0",
9980         "fucom%z2\t%y2\n\tfnstsw\t%0",
9981         "fucomp%z2\t%y2\n\tfnstsw\t%0",
9982
9983         "ficom%z2\t%y2\n\tfnstsw\t%0",
9984         "ficomp%z2\t%y2\n\tfnstsw\t%0",
9985         NULL,
9986         NULL,
9987
9988         "fcomi\t{%y1, %0|%0, %y1}",
9989         "fcomip\t{%y1, %0|%0, %y1}",
9990         "fucomi\t{%y1, %0|%0, %y1}",
9991         "fucomip\t{%y1, %0|%0, %y1}",
9992
9993         NULL,
9994         NULL,
9995         NULL,
9996         NULL
9997       };
9998
9999       int mask;
10000       const char *ret;
10001
10002       mask  = eflags_p << 3;
10003       mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
10004       mask |= unordered_p << 1;
10005       mask |= stack_top_dies;
10006
10007       gcc_assert (mask < 16);
10008       ret = alt[mask];
10009       gcc_assert (ret);
10010
10011       return ret;
10012     }
10013 }
10014
10015 void
10016 ix86_output_addr_vec_elt (FILE *file, int value)
10017 {
10018   const char *directive = ASM_LONG;
10019
10020 #ifdef ASM_QUAD
10021   if (TARGET_64BIT)
10022     directive = ASM_QUAD;
10023 #else
10024   gcc_assert (!TARGET_64BIT);
10025 #endif
10026
10027   fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
10028 }
10029
10030 void
10031 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
10032 {
10033   const char *directive = ASM_LONG;
10034
10035 #ifdef ASM_QUAD
10036   if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
10037     directive = ASM_QUAD;
10038 #else
10039   gcc_assert (!TARGET_64BIT);
10040 #endif
10041   /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand.  */
10042   if (TARGET_64BIT || TARGET_VXWORKS_RTP)
10043     fprintf (file, "%s%s%d-%s%d\n",
10044              directive, LPREFIX, value, LPREFIX, rel);
10045   else if (HAVE_AS_GOTOFF_IN_DATA)
10046     fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
10047 #if TARGET_MACHO
10048   else if (TARGET_MACHO)
10049     {
10050       fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
10051       machopic_output_function_base_name (file);
10052       fprintf(file, "\n");
10053     }
10054 #endif
10055   else
10056     asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
10057                  ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
10058 }
10059 \f
10060 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
10061    for the target.  */
10062
10063 void
10064 ix86_expand_clear (rtx dest)
10065 {
10066   rtx tmp;
10067
10068   /* We play register width games, which are only valid after reload.  */
10069   gcc_assert (reload_completed);
10070
10071   /* Avoid HImode and its attendant prefix byte.  */
10072   if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
10073     dest = gen_rtx_REG (SImode, REGNO (dest));
10074   tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
10075
10076   /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
10077   if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
10078     {
10079       rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10080       tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10081     }
10082
10083   emit_insn (tmp);
10084 }
10085
10086 /* X is an unchanging MEM.  If it is a constant pool reference, return
10087    the constant pool rtx, else NULL.  */
10088
10089 rtx
10090 maybe_get_pool_constant (rtx x)
10091 {
10092   x = ix86_delegitimize_address (XEXP (x, 0));
10093
10094   if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
10095     return get_pool_constant (x);
10096
10097   return NULL_RTX;
10098 }
10099
10100 void
10101 ix86_expand_move (enum machine_mode mode, rtx operands[])
10102 {
10103   rtx op0, op1;
10104   enum tls_model model;
10105
10106   op0 = operands[0];
10107   op1 = operands[1];
10108
10109   if (GET_CODE (op1) == SYMBOL_REF)
10110     {
10111       model = SYMBOL_REF_TLS_MODEL (op1);
10112       if (model)
10113         {
10114           op1 = legitimize_tls_address (op1, model, true);
10115           op1 = force_operand (op1, op0);
10116           if (op1 == op0)
10117             return;
10118         }
10119       else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10120                && SYMBOL_REF_DLLIMPORT_P (op1))
10121         op1 = legitimize_dllimport_symbol (op1, false);
10122     }
10123   else if (GET_CODE (op1) == CONST
10124            && GET_CODE (XEXP (op1, 0)) == PLUS
10125            && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10126     {
10127       rtx addend = XEXP (XEXP (op1, 0), 1);
10128       rtx symbol = XEXP (XEXP (op1, 0), 0);
10129       rtx tmp = NULL;
10130
10131       model = SYMBOL_REF_TLS_MODEL (symbol);
10132       if (model)
10133         tmp = legitimize_tls_address (symbol, model, true);
10134       else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10135                && SYMBOL_REF_DLLIMPORT_P (symbol))
10136         tmp = legitimize_dllimport_symbol (symbol, true);
10137
10138       if (tmp)
10139         {
10140           tmp = force_operand (tmp, NULL);
10141           tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
10142                                      op0, 1, OPTAB_DIRECT);
10143           if (tmp == op0)
10144             return;
10145         }
10146     }
10147
10148   if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
10149     {
10150       if (TARGET_MACHO && !TARGET_64BIT)
10151         {
10152 #if TARGET_MACHO
10153           if (MACHOPIC_PURE)
10154             {
10155               rtx temp = ((reload_in_progress
10156                            || ((op0 && REG_P (op0))
10157                                && mode == Pmode))
10158                           ? op0 : gen_reg_rtx (Pmode));
10159               op1 = machopic_indirect_data_reference (op1, temp);
10160               op1 = machopic_legitimize_pic_address (op1, mode,
10161                                                      temp == op1 ? 0 : temp);
10162             }
10163           else if (MACHOPIC_INDIRECT)
10164             op1 = machopic_indirect_data_reference (op1, 0);
10165           if (op0 == op1)
10166             return;
10167 #endif
10168         }
10169       else
10170         {
10171           if (MEM_P (op0))
10172             op1 = force_reg (Pmode, op1);
10173           else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
10174             {
10175               rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
10176               op1 = legitimize_pic_address (op1, reg);
10177               if (op0 == op1)
10178                 return;
10179             }
10180         }
10181     }
10182   else
10183     {
10184       if (MEM_P (op0)
10185           && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
10186               || !push_operand (op0, mode))
10187           && MEM_P (op1))
10188         op1 = force_reg (mode, op1);
10189
10190       if (push_operand (op0, mode)
10191           && ! general_no_elim_operand (op1, mode))
10192         op1 = copy_to_mode_reg (mode, op1);
10193
10194       /* Force large constants in 64bit compilation into register
10195          to get them CSEed.  */
10196       if (can_create_pseudo_p ()
10197           && (mode == DImode) && TARGET_64BIT
10198           && immediate_operand (op1, mode)
10199           && !x86_64_zext_immediate_operand (op1, VOIDmode)
10200           && !register_operand (op0, mode)
10201           && optimize)
10202         op1 = copy_to_mode_reg (mode, op1);
10203
10204       if (can_create_pseudo_p ()
10205           && FLOAT_MODE_P (mode)
10206           && GET_CODE (op1) == CONST_DOUBLE)
10207         {
10208           /* If we are loading a floating point constant to a register,
10209              force the value to memory now, since we'll get better code
10210              out the back end.  */
10211
10212           op1 = validize_mem (force_const_mem (mode, op1));
10213           if (!register_operand (op0, mode))
10214             {
10215               rtx temp = gen_reg_rtx (mode);
10216               emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10217               emit_move_insn (op0, temp);
10218               return;
10219             }
10220         }
10221     }
10222
10223   emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10224 }
10225
10226 void
10227 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
10228 {
10229   rtx op0 = operands[0], op1 = operands[1];
10230   unsigned int align = GET_MODE_ALIGNMENT (mode);
10231
10232   /* Force constants other than zero into memory.  We do not know how
10233      the instructions used to build constants modify the upper 64 bits
10234      of the register, once we have that information we may be able
10235      to handle some of them more efficiently.  */
10236   if (can_create_pseudo_p ()
10237       && register_operand (op0, mode)
10238       && (CONSTANT_P (op1)
10239           || (GET_CODE (op1) == SUBREG
10240               && CONSTANT_P (SUBREG_REG (op1))))
10241       && standard_sse_constant_p (op1) <= 0)
10242     op1 = validize_mem (force_const_mem (mode, op1));
10243
10244   /* TDmode values are passed as TImode on the stack.  TImode values
10245      are moved via xmm registers, and moving them to stack can result in
10246      unaligned memory access.  Use ix86_expand_vector_move_misalign()
10247      if memory operand is not aligned correctly.  */
10248   if (can_create_pseudo_p ()
10249       && (mode == TImode) && !TARGET_64BIT
10250       && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10251           || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10252     {
10253       rtx tmp[2];
10254
10255       /* ix86_expand_vector_move_misalign() does not like constants ... */
10256       if (CONSTANT_P (op1)
10257           || (GET_CODE (op1) == SUBREG
10258               && CONSTANT_P (SUBREG_REG (op1))))
10259         op1 = validize_mem (force_const_mem (mode, op1));
10260
10261       /* ... nor both arguments in memory.  */
10262       if (!register_operand (op0, mode)
10263           && !register_operand (op1, mode))
10264         op1 = force_reg (mode, op1);
10265
10266       tmp[0] = op0; tmp[1] = op1;
10267       ix86_expand_vector_move_misalign (mode, tmp);
10268       return;
10269     }
10270
10271   /* Make operand1 a register if it isn't already.  */
10272   if (can_create_pseudo_p ()
10273       && !register_operand (op0, mode)
10274       && !register_operand (op1, mode))
10275     {
10276       emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
10277       return;
10278     }
10279
10280   emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10281 }
10282
10283 /* Implement the movmisalign patterns for SSE.  Non-SSE modes go
10284    straight to ix86_expand_vector_move.  */
10285 /* Code generation for scalar reg-reg moves of single and double precision data:
10286      if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10287        movaps reg, reg
10288      else
10289        movss reg, reg
10290      if (x86_sse_partial_reg_dependency == true)
10291        movapd reg, reg
10292      else
10293        movsd reg, reg
10294
10295    Code generation for scalar loads of double precision data:
10296      if (x86_sse_split_regs == true)
10297        movlpd mem, reg      (gas syntax)
10298      else
10299        movsd mem, reg
10300
10301    Code generation for unaligned packed loads of single precision data
10302    (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10303      if (x86_sse_unaligned_move_optimal)
10304        movups mem, reg
10305
10306      if (x86_sse_partial_reg_dependency == true)
10307        {
10308          xorps  reg, reg
10309          movlps mem, reg
10310          movhps mem+8, reg
10311        }
10312      else
10313        {
10314          movlps mem, reg
10315          movhps mem+8, reg
10316        }
10317
10318    Code generation for unaligned packed loads of double precision data
10319    (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10320      if (x86_sse_unaligned_move_optimal)
10321        movupd mem, reg
10322
10323      if (x86_sse_split_regs == true)
10324        {
10325          movlpd mem, reg
10326          movhpd mem+8, reg
10327        }
10328      else
10329        {
10330          movsd  mem, reg
10331          movhpd mem+8, reg
10332        }
10333  */
10334
10335 void
10336 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10337 {
10338   rtx op0, op1, m;
10339
10340   op0 = operands[0];
10341   op1 = operands[1];
10342
10343   if (MEM_P (op1))
10344     {
10345       /* If we're optimizing for size, movups is the smallest.  */
10346       if (optimize_size)
10347         {
10348           op0 = gen_lowpart (V4SFmode, op0);
10349           op1 = gen_lowpart (V4SFmode, op1);
10350           emit_insn (gen_sse_movups (op0, op1));
10351           return;
10352         }
10353
10354       /* ??? If we have typed data, then it would appear that using
10355          movdqu is the only way to get unaligned data loaded with
10356          integer type.  */
10357       if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10358         {
10359           op0 = gen_lowpart (V16QImode, op0);
10360           op1 = gen_lowpart (V16QImode, op1);
10361           emit_insn (gen_sse2_movdqu (op0, op1));
10362           return;
10363         }
10364
10365       if (TARGET_SSE2 && mode == V2DFmode)
10366         {
10367           rtx zero;
10368
10369           if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10370             {
10371               op0 = gen_lowpart (V2DFmode, op0);
10372               op1 = gen_lowpart (V2DFmode, op1);
10373               emit_insn (gen_sse2_movupd (op0, op1));
10374               return;
10375             }
10376
10377           /* When SSE registers are split into halves, we can avoid
10378              writing to the top half twice.  */
10379           if (TARGET_SSE_SPLIT_REGS)
10380             {
10381               emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10382               zero = op0;
10383             }
10384           else
10385             {
10386               /* ??? Not sure about the best option for the Intel chips.
10387                  The following would seem to satisfy; the register is
10388                  entirely cleared, breaking the dependency chain.  We
10389                  then store to the upper half, with a dependency depth
10390                  of one.  A rumor has it that Intel recommends two movsd
10391                  followed by an unpacklpd, but this is unconfirmed.  And
10392                  given that the dependency depth of the unpacklpd would
10393                  still be one, I'm not sure why this would be better.  */
10394               zero = CONST0_RTX (V2DFmode);
10395             }
10396
10397           m = adjust_address (op1, DFmode, 0);
10398           emit_insn (gen_sse2_loadlpd (op0, zero, m));
10399           m = adjust_address (op1, DFmode, 8);
10400           emit_insn (gen_sse2_loadhpd (op0, op0, m));
10401         }
10402       else
10403         {
10404           if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10405             {
10406               op0 = gen_lowpart (V4SFmode, op0);
10407               op1 = gen_lowpart (V4SFmode, op1);
10408               emit_insn (gen_sse_movups (op0, op1));
10409               return;
10410             }
10411
10412           if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10413             emit_move_insn (op0, CONST0_RTX (mode));
10414           else
10415             emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10416
10417           if (mode != V4SFmode)
10418             op0 = gen_lowpart (V4SFmode, op0);
10419           m = adjust_address (op1, V2SFmode, 0);
10420           emit_insn (gen_sse_loadlps (op0, op0, m));
10421           m = adjust_address (op1, V2SFmode, 8);
10422           emit_insn (gen_sse_loadhps (op0, op0, m));
10423         }
10424     }
10425   else if (MEM_P (op0))
10426     {
10427       /* If we're optimizing for size, movups is the smallest.  */
10428       if (optimize_size)
10429         {
10430           op0 = gen_lowpart (V4SFmode, op0);
10431           op1 = gen_lowpart (V4SFmode, op1);
10432           emit_insn (gen_sse_movups (op0, op1));
10433           return;
10434         }
10435
10436       /* ??? Similar to above, only less clear because of quote
10437          typeless stores unquote.  */
10438       if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10439           && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10440         {
10441           op0 = gen_lowpart (V16QImode, op0);
10442           op1 = gen_lowpart (V16QImode, op1);
10443           emit_insn (gen_sse2_movdqu (op0, op1));
10444           return;
10445         }
10446
10447       if (TARGET_SSE2 && mode == V2DFmode)
10448         {
10449           m = adjust_address (op0, DFmode, 0);
10450           emit_insn (gen_sse2_storelpd (m, op1));
10451           m = adjust_address (op0, DFmode, 8);
10452           emit_insn (gen_sse2_storehpd (m, op1));
10453         }
10454       else
10455         {
10456           if (mode != V4SFmode)
10457             op1 = gen_lowpart (V4SFmode, op1);
10458           m = adjust_address (op0, V2SFmode, 0);
10459           emit_insn (gen_sse_storelps (m, op1));
10460           m = adjust_address (op0, V2SFmode, 8);
10461           emit_insn (gen_sse_storehps (m, op1));
10462         }
10463     }
10464   else
10465     gcc_unreachable ();
10466 }
10467
10468 /* Expand a push in MODE.  This is some mode for which we do not support
10469    proper push instructions, at least from the registers that we expect
10470    the value to live in.  */
10471
10472 void
10473 ix86_expand_push (enum machine_mode mode, rtx x)
10474 {
10475   rtx tmp;
10476
10477   tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10478                              GEN_INT (-GET_MODE_SIZE (mode)),
10479                              stack_pointer_rtx, 1, OPTAB_DIRECT);
10480   if (tmp != stack_pointer_rtx)
10481     emit_move_insn (stack_pointer_rtx, tmp);
10482
10483   tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10484   emit_move_insn (tmp, x);
10485 }
10486
10487 /* Helper function of ix86_fixup_binary_operands to canonicalize
10488    operand order.  Returns true if the operands should be swapped.  */
10489
10490 static bool
10491 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10492                              rtx operands[])
10493 {
10494   rtx dst = operands[0];
10495   rtx src1 = operands[1];
10496   rtx src2 = operands[2];
10497
10498   /* If the operation is not commutative, we can't do anything.  */
10499   if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10500     return false;
10501
10502   /* Highest priority is that src1 should match dst.  */
10503   if (rtx_equal_p (dst, src1))
10504     return false;
10505   if (rtx_equal_p (dst, src2))
10506     return true;
10507
10508   /* Next highest priority is that immediate constants come second.  */
10509   if (immediate_operand (src2, mode))
10510     return false;
10511   if (immediate_operand (src1, mode))
10512     return true;
10513
10514   /* Lowest priority is that memory references should come second.  */
10515   if (MEM_P (src2))
10516     return false;
10517   if (MEM_P (src1))
10518     return true;
10519
10520   return false;
10521 }
10522
10523
10524 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok.  Return the
10525    destination to use for the operation.  If different from the true
10526    destination in operands[0], a copy operation will be required.  */
10527
10528 rtx
10529 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10530                             rtx operands[])
10531 {
10532   rtx dst = operands[0];
10533   rtx src1 = operands[1];
10534   rtx src2 = operands[2];
10535
10536   /* Canonicalize operand order.  */
10537   if (ix86_swap_binary_operands_p (code, mode, operands))
10538     {
10539       rtx temp = src1;
10540       src1 = src2;
10541       src2 = temp;
10542     }
10543
10544   /* Both source operands cannot be in memory.  */
10545   if (MEM_P (src1) && MEM_P (src2))
10546     {
10547       /* Optimization: Only read from memory once.  */
10548       if (rtx_equal_p (src1, src2))
10549         {
10550           src2 = force_reg (mode, src2);
10551           src1 = src2;
10552         }
10553       else
10554         src2 = force_reg (mode, src2);
10555     }
10556
10557   /* If the destination is memory, and we do not have matching source
10558      operands, do things in registers.  */
10559   if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10560     dst = gen_reg_rtx (mode);
10561
10562   /* Source 1 cannot be a constant.  */
10563   if (CONSTANT_P (src1))
10564     src1 = force_reg (mode, src1);
10565
10566   /* Source 1 cannot be a non-matching memory.  */
10567   if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10568     src1 = force_reg (mode, src1);
10569
10570   operands[1] = src1;
10571   operands[2] = src2;
10572   return dst;
10573 }
10574
10575 /* Similarly, but assume that the destination has already been
10576    set up properly.  */
10577
10578 void
10579 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10580                                     enum machine_mode mode, rtx operands[])
10581 {
10582   rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10583   gcc_assert (dst == operands[0]);
10584 }
10585
10586 /* Attempt to expand a binary operator.  Make the expansion closer to the
10587    actual machine, then just general_operand, which will allow 3 separate
10588    memory references (one output, two input) in a single insn.  */
10589
10590 void
10591 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10592                              rtx operands[])
10593 {
10594   rtx src1, src2, dst, op, clob;
10595
10596   dst = ix86_fixup_binary_operands (code, mode, operands);
10597   src1 = operands[1];
10598   src2 = operands[2];
10599
10600  /* Emit the instruction.  */
10601
10602   op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10603   if (reload_in_progress)
10604     {
10605       /* Reload doesn't know about the flags register, and doesn't know that
10606          it doesn't want to clobber it.  We can only do this with PLUS.  */
10607       gcc_assert (code == PLUS);
10608       emit_insn (op);
10609     }
10610   else
10611     {
10612       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10613       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10614     }
10615
10616   /* Fix up the destination if needed.  */
10617   if (dst != operands[0])
10618     emit_move_insn (operands[0], dst);
10619 }
10620
10621 /* Return TRUE or FALSE depending on whether the binary operator meets the
10622    appropriate constraints.  */
10623
10624 int
10625 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10626                          rtx operands[3])
10627 {
10628   rtx dst = operands[0];
10629   rtx src1 = operands[1];
10630   rtx src2 = operands[2];
10631
10632   /* Both source operands cannot be in memory.  */
10633   if (MEM_P (src1) && MEM_P (src2))
10634     return 0;
10635
10636   /* Canonicalize operand order for commutative operators.  */
10637   if (ix86_swap_binary_operands_p (code, mode, operands))
10638     {
10639       rtx temp = src1;
10640       src1 = src2;
10641       src2 = temp;
10642     }
10643
10644   /* If the destination is memory, we must have a matching source operand.  */
10645   if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10646       return 0;
10647
10648   /* Source 1 cannot be a constant.  */
10649   if (CONSTANT_P (src1))
10650     return 0;
10651
10652   /* Source 1 cannot be a non-matching memory.  */
10653   if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10654     return 0;
10655
10656   return 1;
10657 }
10658
10659 /* Attempt to expand a unary operator.  Make the expansion closer to the
10660    actual machine, then just general_operand, which will allow 2 separate
10661    memory references (one output, one input) in a single insn.  */
10662
10663 void
10664 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10665                             rtx operands[])
10666 {
10667   int matching_memory;
10668   rtx src, dst, op, clob;
10669
10670   dst = operands[0];
10671   src = operands[1];
10672
10673   /* If the destination is memory, and we do not have matching source
10674      operands, do things in registers.  */
10675   matching_memory = 0;
10676   if (MEM_P (dst))
10677     {
10678       if (rtx_equal_p (dst, src))
10679         matching_memory = 1;
10680       else
10681         dst = gen_reg_rtx (mode);
10682     }
10683
10684   /* When source operand is memory, destination must match.  */
10685   if (MEM_P (src) && !matching_memory)
10686     src = force_reg (mode, src);
10687
10688   /* Emit the instruction.  */
10689
10690   op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10691   if (reload_in_progress || code == NOT)
10692     {
10693       /* Reload doesn't know about the flags register, and doesn't know that
10694          it doesn't want to clobber it.  */
10695       gcc_assert (code == NOT);
10696       emit_insn (op);
10697     }
10698   else
10699     {
10700       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10701       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10702     }
10703
10704   /* Fix up the destination if needed.  */
10705   if (dst != operands[0])
10706     emit_move_insn (operands[0], dst);
10707 }
10708
10709 /* Return TRUE or FALSE depending on whether the unary operator meets the
10710    appropriate constraints.  */
10711
10712 int
10713 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10714                         enum machine_mode mode ATTRIBUTE_UNUSED,
10715                         rtx operands[2] ATTRIBUTE_UNUSED)
10716 {
10717   /* If one of operands is memory, source and destination must match.  */
10718   if ((MEM_P (operands[0])
10719        || MEM_P (operands[1]))
10720       && ! rtx_equal_p (operands[0], operands[1]))
10721     return FALSE;
10722   return TRUE;
10723 }
10724
10725 /* Post-reload splitter for converting an SF or DFmode value in an
10726    SSE register into an unsigned SImode.  */
10727
10728 void
10729 ix86_split_convert_uns_si_sse (rtx operands[])
10730 {
10731   enum machine_mode vecmode;
10732   rtx value, large, zero_or_two31, input, two31, x;
10733
10734   large = operands[1];
10735   zero_or_two31 = operands[2];
10736   input = operands[3];
10737   two31 = operands[4];
10738   vecmode = GET_MODE (large);
10739   value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10740
10741   /* Load up the value into the low element.  We must ensure that the other
10742      elements are valid floats -- zero is the easiest such value.  */
10743   if (MEM_P (input))
10744     {
10745       if (vecmode == V4SFmode)
10746         emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10747       else
10748         emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10749     }
10750   else
10751     {
10752       input = gen_rtx_REG (vecmode, REGNO (input));
10753       emit_move_insn (value, CONST0_RTX (vecmode));
10754       if (vecmode == V4SFmode)
10755         emit_insn (gen_sse_movss (value, value, input));
10756       else
10757         emit_insn (gen_sse2_movsd (value, value, input));
10758     }
10759
10760   emit_move_insn (large, two31);
10761   emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10762
10763   x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10764   emit_insn (gen_rtx_SET (VOIDmode, large, x));
10765
10766   x = gen_rtx_AND (vecmode, zero_or_two31, large);
10767   emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10768
10769   x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10770   emit_insn (gen_rtx_SET (VOIDmode, value, x));
10771
10772   large = gen_rtx_REG (V4SImode, REGNO (large));
10773   emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10774
10775   x = gen_rtx_REG (V4SImode, REGNO (value));
10776   if (vecmode == V4SFmode)
10777     emit_insn (gen_sse2_cvttps2dq (x, value));
10778   else
10779     emit_insn (gen_sse2_cvttpd2dq (x, value));
10780   value = x;
10781
10782   emit_insn (gen_xorv4si3 (value, value, large));
10783 }
10784
10785 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10786    Expects the 64-bit DImode to be supplied in a pair of integral
10787    registers.  Requires SSE2; will use SSE3 if available.  For x86_32,
10788    -mfpmath=sse, !optimize_size only.  */
10789
10790 void
10791 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10792 {
10793   REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10794   rtx int_xmm, fp_xmm;
10795   rtx biases, exponents;
10796   rtx x;
10797
10798   int_xmm = gen_reg_rtx (V4SImode);
10799   if (TARGET_INTER_UNIT_MOVES)
10800     emit_insn (gen_movdi_to_sse (int_xmm, input));
10801   else if (TARGET_SSE_SPLIT_REGS)
10802     {
10803       emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10804       emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10805     }
10806   else
10807     {
10808       x = gen_reg_rtx (V2DImode);
10809       ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10810       emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10811     }
10812
10813   x = gen_rtx_CONST_VECTOR (V4SImode,
10814                             gen_rtvec (4, GEN_INT (0x43300000UL),
10815                                        GEN_INT (0x45300000UL),
10816                                        const0_rtx, const0_rtx));
10817   exponents = validize_mem (force_const_mem (V4SImode, x));
10818
10819   /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10820   emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10821
10822   /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10823      yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10824      Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10825      (0x1.0p84 + double(fp_value_hi_xmm)).
10826      Note these exponents differ by 32.  */
10827
10828   fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10829
10830   /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10831      in [0,2**32-1] and [0]+[2**32,2**64-1] respectively.  */
10832   real_ldexp (&bias_lo_rvt, &dconst1, 52);
10833   real_ldexp (&bias_hi_rvt, &dconst1, 84);
10834   biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10835   x = const_double_from_real_value (bias_hi_rvt, DFmode);
10836   biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10837   biases = validize_mem (force_const_mem (V2DFmode, biases));
10838   emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10839
10840   /* Add the upper and lower DFmode values together.  */
10841   if (TARGET_SSE3)
10842     emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10843   else
10844     {
10845       x = copy_to_mode_reg (V2DFmode, fp_xmm);
10846       emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10847       emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10848     }
10849
10850   ix86_expand_vector_extract (false, target, fp_xmm, 0);
10851 }
10852
10853 /* Convert an unsigned SImode value into a DFmode.  Only currently used
10854    for SSE, but applicable anywhere.  */
10855
10856 void
10857 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10858 {
10859   REAL_VALUE_TYPE TWO31r;
10860   rtx x, fp;
10861
10862   x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10863                            NULL, 1, OPTAB_DIRECT);
10864
10865   fp = gen_reg_rtx (DFmode);
10866   emit_insn (gen_floatsidf2 (fp, x));
10867
10868   real_ldexp (&TWO31r, &dconst1, 31);
10869   x = const_double_from_real_value (TWO31r, DFmode);
10870
10871   x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10872   if (x != target)
10873     emit_move_insn (target, x);
10874 }
10875
10876 /* Convert a signed DImode value into a DFmode.  Only used for SSE in
10877    32-bit mode; otherwise we have a direct convert instruction.  */
10878
10879 void
10880 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10881 {
10882   REAL_VALUE_TYPE TWO32r;
10883   rtx fp_lo, fp_hi, x;
10884
10885   fp_lo = gen_reg_rtx (DFmode);
10886   fp_hi = gen_reg_rtx (DFmode);
10887
10888   emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10889
10890   real_ldexp (&TWO32r, &dconst1, 32);
10891   x = const_double_from_real_value (TWO32r, DFmode);
10892   fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10893
10894   ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10895
10896   x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10897                            0, OPTAB_DIRECT);
10898   if (x != target)
10899     emit_move_insn (target, x);
10900 }
10901
10902 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10903    For x86_32, -mfpmath=sse, !optimize_size only.  */
10904 void
10905 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10906 {
10907   REAL_VALUE_TYPE ONE16r;
10908   rtx fp_hi, fp_lo, int_hi, int_lo, x;
10909
10910   real_ldexp (&ONE16r, &dconst1, 16);
10911   x = const_double_from_real_value (ONE16r, SFmode);
10912   int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10913                                       NULL, 0, OPTAB_DIRECT);
10914   int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10915                                       NULL, 0, OPTAB_DIRECT);
10916   fp_hi = gen_reg_rtx (SFmode);
10917   fp_lo = gen_reg_rtx (SFmode);
10918   emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10919   emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10920   fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10921                                0, OPTAB_DIRECT);
10922   fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10923                                0, OPTAB_DIRECT);
10924   if (!rtx_equal_p (target, fp_hi))
10925     emit_move_insn (target, fp_hi);
10926 }
10927
10928 /* A subroutine of ix86_build_signbit_mask_vector.  If VECT is true,
10929    then replicate the value for all elements of the vector
10930    register.  */
10931
10932 rtx
10933 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10934 {
10935   rtvec v;
10936   switch (mode)
10937     {
10938     case SImode:
10939       gcc_assert (vect);
10940       v = gen_rtvec (4, value, value, value, value);
10941       return gen_rtx_CONST_VECTOR (V4SImode, v);
10942
10943     case DImode:
10944       gcc_assert (vect);
10945       v = gen_rtvec (2, value, value);
10946       return gen_rtx_CONST_VECTOR (V2DImode, v);
10947
10948     case SFmode:
10949       if (vect)
10950         v = gen_rtvec (4, value, value, value, value);
10951       else
10952         v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10953                        CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10954       return gen_rtx_CONST_VECTOR (V4SFmode, v);
10955
10956     case DFmode:
10957       if (vect)
10958         v = gen_rtvec (2, value, value);
10959       else
10960         v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10961       return gen_rtx_CONST_VECTOR (V2DFmode, v);
10962
10963     default:
10964       gcc_unreachable ();
10965     }
10966 }
10967
10968 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10969    and ix86_expand_int_vcond.  Create a mask for the sign bit in MODE
10970    for an SSE register.  If VECT is true, then replicate the mask for
10971    all elements of the vector register.  If INVERT is true, then create
10972    a mask excluding the sign bit.  */
10973
10974 rtx
10975 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10976 {
10977   enum machine_mode vec_mode, imode;
10978   HOST_WIDE_INT hi, lo;
10979   int shift = 63;
10980   rtx v;
10981   rtx mask;
10982
10983   /* Find the sign bit, sign extended to 2*HWI.  */
10984   switch (mode)
10985     {
10986     case SImode:
10987     case SFmode:
10988       imode = SImode;
10989       vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
10990       lo = 0x80000000, hi = lo < 0;
10991       break;
10992
10993     case DImode:
10994     case DFmode:
10995       imode = DImode;
10996       vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
10997       if (HOST_BITS_PER_WIDE_INT >= 64)
10998         lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10999       else
11000         lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
11001       break;
11002
11003     case TImode:
11004     case TFmode:
11005       imode = TImode;
11006       vec_mode = VOIDmode;
11007       gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
11008       lo = 0, hi = (HOST_WIDE_INT)1 << shift;
11009      break;
11010
11011     default:
11012       gcc_unreachable ();
11013     }
11014
11015   if (invert)
11016     lo = ~lo, hi = ~hi;
11017
11018   /* Force this value into the low part of a fp vector constant.  */
11019   mask = immed_double_const (lo, hi, imode);
11020   mask = gen_lowpart (mode, mask);
11021
11022   if (vec_mode == VOIDmode)
11023     return force_reg (mode, mask);
11024
11025   v = ix86_build_const_vector (mode, vect, mask);
11026   return force_reg (vec_mode, v);
11027 }
11028
11029 /* Generate code for floating point ABS or NEG.  */
11030
11031 void
11032 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
11033                                 rtx operands[])
11034 {
11035   rtx mask, set, use, clob, dst, src;
11036   bool use_sse = false;
11037   bool vector_mode = VECTOR_MODE_P (mode);
11038   enum machine_mode elt_mode = mode;
11039
11040   if (vector_mode)
11041     {
11042       elt_mode = GET_MODE_INNER (mode);
11043       use_sse = true;
11044     }
11045   else if (mode == TFmode)
11046     use_sse = true;
11047   else if (TARGET_SSE_MATH)
11048     use_sse = SSE_FLOAT_MODE_P (mode);
11049
11050   /* NEG and ABS performed with SSE use bitwise mask operations.
11051      Create the appropriate mask now.  */
11052   if (use_sse)
11053     mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
11054   else
11055     mask = NULL_RTX;
11056
11057   dst = operands[0];
11058   src = operands[1];
11059
11060   if (vector_mode)
11061     {
11062       set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
11063       set = gen_rtx_SET (VOIDmode, dst, set);
11064       emit_insn (set);
11065     }
11066   else
11067     {
11068       set = gen_rtx_fmt_e (code, mode, src);
11069       set = gen_rtx_SET (VOIDmode, dst, set);
11070       if (mask)
11071         {
11072           use = gen_rtx_USE (VOIDmode, mask);
11073           clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11074           emit_insn (gen_rtx_PARALLEL (VOIDmode,
11075                                        gen_rtvec (3, set, use, clob)));
11076         }
11077       else
11078         emit_insn (set);
11079     }
11080 }
11081
11082 /* Expand a copysign operation.  Special case operand 0 being a constant.  */
11083
11084 void
11085 ix86_expand_copysign (rtx operands[])
11086 {
11087   enum machine_mode mode, vmode;
11088   rtx dest, op0, op1, mask, nmask;
11089
11090   dest = operands[0];
11091   op0 = operands[1];
11092   op1 = operands[2];
11093
11094   mode = GET_MODE (dest);
11095   vmode = mode == SFmode ? V4SFmode : V2DFmode;
11096
11097   if (GET_CODE (op0) == CONST_DOUBLE)
11098     {
11099       rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
11100
11101       if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
11102         op0 = simplify_unary_operation (ABS, mode, op0, mode);
11103
11104       if (mode == SFmode || mode == DFmode)
11105         {
11106           if (op0 == CONST0_RTX (mode))
11107             op0 = CONST0_RTX (vmode);
11108           else
11109             {
11110               rtvec v;
11111
11112               if (mode == SFmode)
11113                 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11114                                CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11115               else
11116                 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
11117               op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11118             }
11119         }
11120
11121       mask = ix86_build_signbit_mask (mode, 0, 0);
11122
11123       if (mode == SFmode)
11124         copysign_insn = gen_copysignsf3_const;
11125       else if (mode == DFmode)
11126         copysign_insn = gen_copysigndf3_const;
11127       else
11128         copysign_insn = gen_copysigntf3_const;
11129
11130         emit_insn (copysign_insn (dest, op0, op1, mask));
11131     }
11132   else
11133     {
11134       rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11135
11136       nmask = ix86_build_signbit_mask (mode, 0, 1);
11137       mask = ix86_build_signbit_mask (mode, 0, 0);
11138
11139       if (mode == SFmode)
11140         copysign_insn = gen_copysignsf3_var;
11141       else if (mode == DFmode)
11142         copysign_insn = gen_copysigndf3_var;
11143       else
11144         copysign_insn = gen_copysigntf3_var;
11145
11146       emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
11147     }
11148 }
11149
11150 /* Deconstruct a copysign operation into bit masks.  Operand 0 is known to
11151    be a constant, and so has already been expanded into a vector constant.  */
11152
11153 void
11154 ix86_split_copysign_const (rtx operands[])
11155 {
11156   enum machine_mode mode, vmode;
11157   rtx dest, op0, op1, mask, x;
11158
11159   dest = operands[0];
11160   op0 = operands[1];
11161   op1 = operands[2];
11162   mask = operands[3];
11163
11164   mode = GET_MODE (dest);
11165   vmode = GET_MODE (mask);
11166
11167   dest = simplify_gen_subreg (vmode, dest, mode, 0);
11168   x = gen_rtx_AND (vmode, dest, mask);
11169   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11170
11171   if (op0 != CONST0_RTX (vmode))
11172     {
11173       x = gen_rtx_IOR (vmode, dest, op0);
11174       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11175     }
11176 }
11177
11178 /* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,
11179    so we have to do two masks.  */
11180
11181 void
11182 ix86_split_copysign_var (rtx operands[])
11183 {
11184   enum machine_mode mode, vmode;
11185   rtx dest, scratch, op0, op1, mask, nmask, x;
11186
11187   dest = operands[0];
11188   scratch = operands[1];
11189   op0 = operands[2];
11190   op1 = operands[3];
11191   nmask = operands[4];
11192   mask = operands[5];
11193
11194   mode = GET_MODE (dest);
11195   vmode = GET_MODE (mask);
11196
11197   if (rtx_equal_p (op0, op1))
11198     {
11199       /* Shouldn't happen often (it's useless, obviously), but when it does
11200          we'd generate incorrect code if we continue below.  */
11201       emit_move_insn (dest, op0);
11202       return;
11203     }
11204
11205   if (REG_P (mask) && REGNO (dest) == REGNO (mask))     /* alternative 0 */
11206     {
11207       gcc_assert (REGNO (op1) == REGNO (scratch));
11208
11209       x = gen_rtx_AND (vmode, scratch, mask);
11210       emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11211
11212       dest = mask;
11213       op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11214       x = gen_rtx_NOT (vmode, dest);
11215       x = gen_rtx_AND (vmode, x, op0);
11216       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11217     }
11218   else
11219     {
11220       if (REGNO (op1) == REGNO (scratch))               /* alternative 1,3 */
11221         {
11222           x = gen_rtx_AND (vmode, scratch, mask);
11223         }
11224       else                                              /* alternative 2,4 */
11225         {
11226           gcc_assert (REGNO (mask) == REGNO (scratch));
11227           op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11228           x = gen_rtx_AND (vmode, scratch, op1);
11229         }
11230       emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11231
11232       if (REGNO (op0) == REGNO (dest))                  /* alternative 1,2 */
11233         {
11234           dest = simplify_gen_subreg (vmode, op0, mode, 0);
11235           x = gen_rtx_AND (vmode, dest, nmask);
11236         }
11237       else                                              /* alternative 3,4 */
11238         {
11239           gcc_assert (REGNO (nmask) == REGNO (dest));
11240           dest = nmask;
11241           op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11242           x = gen_rtx_AND (vmode, dest, op0);
11243         }
11244       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11245     }
11246
11247   x = gen_rtx_IOR (vmode, dest, scratch);
11248   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11249 }
11250
11251 /* Return TRUE or FALSE depending on whether the first SET in INSN
11252    has source and destination with matching CC modes, and that the
11253    CC mode is at least as constrained as REQ_MODE.  */
11254
11255 int
11256 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
11257 {
11258   rtx set;
11259   enum machine_mode set_mode;
11260
11261   set = PATTERN (insn);
11262   if (GET_CODE (set) == PARALLEL)
11263     set = XVECEXP (set, 0, 0);
11264   gcc_assert (GET_CODE (set) == SET);
11265   gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
11266
11267   set_mode = GET_MODE (SET_DEST (set));
11268   switch (set_mode)
11269     {
11270     case CCNOmode:
11271       if (req_mode != CCNOmode
11272           && (req_mode != CCmode
11273               || XEXP (SET_SRC (set), 1) != const0_rtx))
11274         return 0;
11275       break;
11276     case CCmode:
11277       if (req_mode == CCGCmode)
11278         return 0;
11279       /* FALLTHRU */
11280     case CCGCmode:
11281       if (req_mode == CCGOCmode || req_mode == CCNOmode)
11282         return 0;
11283       /* FALLTHRU */
11284     case CCGOCmode:
11285       if (req_mode == CCZmode)
11286         return 0;
11287       /* FALLTHRU */
11288     case CCZmode:
11289       break;
11290
11291     default:
11292       gcc_unreachable ();
11293     }
11294
11295   return (GET_MODE (SET_SRC (set)) == set_mode);
11296 }
11297
11298 /* Generate insn patterns to do an integer compare of OPERANDS.  */
11299
11300 static rtx
11301 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11302 {
11303   enum machine_mode cmpmode;
11304   rtx tmp, flags;
11305
11306   cmpmode = SELECT_CC_MODE (code, op0, op1);
11307   flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11308
11309   /* This is very simple, but making the interface the same as in the
11310      FP case makes the rest of the code easier.  */
11311   tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11312   emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11313
11314   /* Return the test that should be put into the flags user, i.e.
11315      the bcc, scc, or cmov instruction.  */
11316   return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11317 }
11318
11319 /* Figure out whether to use ordered or unordered fp comparisons.
11320    Return the appropriate mode to use.  */
11321
11322 enum machine_mode
11323 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11324 {
11325   /* ??? In order to make all comparisons reversible, we do all comparisons
11326      non-trapping when compiling for IEEE.  Once gcc is able to distinguish
11327      all forms trapping and nontrapping comparisons, we can make inequality
11328      comparisons trapping again, since it results in better code when using
11329      FCOM based compares.  */
11330   return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11331 }
11332
11333 enum machine_mode
11334 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11335 {
11336   enum machine_mode mode = GET_MODE (op0);
11337
11338   if (SCALAR_FLOAT_MODE_P (mode))
11339     {
11340       gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11341       return ix86_fp_compare_mode (code);
11342     }
11343
11344   switch (code)
11345     {
11346       /* Only zero flag is needed.  */
11347     case EQ:                    /* ZF=0 */
11348     case NE:                    /* ZF!=0 */
11349       return CCZmode;
11350       /* Codes needing carry flag.  */
11351     case GEU:                   /* CF=0 */
11352     case LTU:                   /* CF=1 */
11353       /* Detect overflow checks.  They need just the carry flag.  */
11354       if (GET_CODE (op0) == PLUS
11355           && rtx_equal_p (op1, XEXP (op0, 0)))
11356         return CCCmode;
11357       else
11358         return CCmode;
11359     case GTU:                   /* CF=0 & ZF=0 */
11360     case LEU:                   /* CF=1 | ZF=1 */
11361       /* Detect overflow checks.  They need just the carry flag.  */
11362       if (GET_CODE (op0) == MINUS
11363           && rtx_equal_p (op1, XEXP (op0, 0)))
11364         return CCCmode;
11365       else
11366         return CCmode;
11367       /* Codes possibly doable only with sign flag when
11368          comparing against zero.  */
11369     case GE:                    /* SF=OF   or   SF=0 */
11370     case LT:                    /* SF<>OF  or   SF=1 */
11371       if (op1 == const0_rtx)
11372         return CCGOCmode;
11373       else
11374         /* For other cases Carry flag is not required.  */
11375         return CCGCmode;
11376       /* Codes doable only with sign flag when comparing
11377          against zero, but we miss jump instruction for it
11378          so we need to use relational tests against overflow
11379          that thus needs to be zero.  */
11380     case GT:                    /* ZF=0 & SF=OF */
11381     case LE:                    /* ZF=1 | SF<>OF */
11382       if (op1 == const0_rtx)
11383         return CCNOmode;
11384       else
11385         return CCGCmode;
11386       /* strcmp pattern do (use flags) and combine may ask us for proper
11387          mode.  */
11388     case USE:
11389       return CCmode;
11390     default:
11391       gcc_unreachable ();
11392     }
11393 }
11394
11395 /* Return the fixed registers used for condition codes.  */
11396
11397 static bool
11398 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11399 {
11400   *p1 = FLAGS_REG;
11401   *p2 = FPSR_REG;
11402   return true;
11403 }
11404
11405 /* If two condition code modes are compatible, return a condition code
11406    mode which is compatible with both.  Otherwise, return
11407    VOIDmode.  */
11408
11409 static enum machine_mode
11410 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11411 {
11412   if (m1 == m2)
11413     return m1;
11414
11415   if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11416     return VOIDmode;
11417
11418   if ((m1 == CCGCmode && m2 == CCGOCmode)
11419       || (m1 == CCGOCmode && m2 == CCGCmode))
11420     return CCGCmode;
11421
11422   switch (m1)
11423     {
11424     default:
11425       gcc_unreachable ();
11426
11427     case CCmode:
11428     case CCGCmode:
11429     case CCGOCmode:
11430     case CCNOmode:
11431     case CCAmode:
11432     case CCCmode:
11433     case CCOmode:
11434     case CCSmode:
11435     case CCZmode:
11436       switch (m2)
11437         {
11438         default:
11439           return VOIDmode;
11440
11441         case CCmode:
11442         case CCGCmode:
11443         case CCGOCmode:
11444         case CCNOmode:
11445         case CCAmode:
11446         case CCCmode:
11447         case CCOmode:
11448         case CCSmode:
11449         case CCZmode:
11450           return CCmode;
11451         }
11452
11453     case CCFPmode:
11454     case CCFPUmode:
11455       /* These are only compatible with themselves, which we already
11456          checked above.  */
11457       return VOIDmode;
11458     }
11459 }
11460
11461 /* Split comparison code CODE into comparisons we can do using branch
11462    instructions.  BYPASS_CODE is comparison code for branch that will
11463    branch around FIRST_CODE and SECOND_CODE.  If some of branches
11464    is not required, set value to UNKNOWN.
11465    We never require more than two branches.  */
11466
11467 void
11468 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11469                           enum rtx_code *first_code,
11470                           enum rtx_code *second_code)
11471 {
11472   *first_code = code;
11473   *bypass_code = UNKNOWN;
11474   *second_code = UNKNOWN;
11475
11476   /* The fcomi comparison sets flags as follows:
11477
11478      cmp    ZF PF CF
11479      >      0  0  0
11480      <      0  0  1
11481      =      1  0  0
11482      un     1  1  1 */
11483
11484   switch (code)
11485     {
11486     case GT:                    /* GTU - CF=0 & ZF=0 */
11487     case GE:                    /* GEU - CF=0 */
11488     case ORDERED:               /* PF=0 */
11489     case UNORDERED:             /* PF=1 */
11490     case UNEQ:                  /* EQ - ZF=1 */
11491     case UNLT:                  /* LTU - CF=1 */
11492     case UNLE:                  /* LEU - CF=1 | ZF=1 */
11493     case LTGT:                  /* EQ - ZF=0 */
11494       break;
11495     case LT:                    /* LTU - CF=1 - fails on unordered */
11496       *first_code = UNLT;
11497       *bypass_code = UNORDERED;
11498       break;
11499     case LE:                    /* LEU - CF=1 | ZF=1 - fails on unordered */
11500       *first_code = UNLE;
11501       *bypass_code = UNORDERED;
11502       break;
11503     case EQ:                    /* EQ - ZF=1 - fails on unordered */
11504       *first_code = UNEQ;
11505       *bypass_code = UNORDERED;
11506       break;
11507     case NE:                    /* NE - ZF=0 - fails on unordered */
11508       *first_code = LTGT;
11509       *second_code = UNORDERED;
11510       break;
11511     case UNGE:                  /* GEU - CF=0 - fails on unordered */
11512       *first_code = GE;
11513       *second_code = UNORDERED;
11514       break;
11515     case UNGT:                  /* GTU - CF=0 & ZF=0 - fails on unordered */
11516       *first_code = GT;
11517       *second_code = UNORDERED;
11518       break;
11519     default:
11520       gcc_unreachable ();
11521     }
11522   if (!TARGET_IEEE_FP)
11523     {
11524       *second_code = UNKNOWN;
11525       *bypass_code = UNKNOWN;
11526     }
11527 }
11528
11529 /* Return cost of comparison done fcom + arithmetics operations on AX.
11530    All following functions do use number of instructions as a cost metrics.
11531    In future this should be tweaked to compute bytes for optimize_size and
11532    take into account performance of various instructions on various CPUs.  */
11533 static int
11534 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11535 {
11536   if (!TARGET_IEEE_FP)
11537     return 4;
11538   /* The cost of code output by ix86_expand_fp_compare.  */
11539   switch (code)
11540     {
11541     case UNLE:
11542     case UNLT:
11543     case LTGT:
11544     case GT:
11545     case GE:
11546     case UNORDERED:
11547     case ORDERED:
11548     case UNEQ:
11549       return 4;
11550       break;
11551     case LT:
11552     case NE:
11553     case EQ:
11554     case UNGE:
11555       return 5;
11556       break;
11557     case LE:
11558     case UNGT:
11559       return 6;
11560       break;
11561     default:
11562       gcc_unreachable ();
11563     }
11564 }
11565
11566 /* Return cost of comparison done using fcomi operation.
11567    See ix86_fp_comparison_arithmetics_cost for the metrics.  */
11568 static int
11569 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11570 {
11571   enum rtx_code bypass_code, first_code, second_code;
11572   /* Return arbitrarily high cost when instruction is not supported - this
11573      prevents gcc from using it.  */
11574   if (!TARGET_CMOVE)
11575     return 1024;
11576   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11577   return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11578 }
11579
11580 /* Return cost of comparison done using sahf operation.
11581    See ix86_fp_comparison_arithmetics_cost for the metrics.  */
11582 static int
11583 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11584 {
11585   enum rtx_code bypass_code, first_code, second_code;
11586   /* Return arbitrarily high cost when instruction is not preferred - this
11587      avoids gcc from using it.  */
11588   if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11589     return 1024;
11590   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11591   return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11592 }
11593
11594 /* Compute cost of the comparison done using any method.
11595    See ix86_fp_comparison_arithmetics_cost for the metrics.  */
11596 static int
11597 ix86_fp_comparison_cost (enum rtx_code code)
11598 {
11599   int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11600   int min;
11601
11602   fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11603   sahf_cost = ix86_fp_comparison_sahf_cost (code);
11604
11605   min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11606   if (min > sahf_cost)
11607     min = sahf_cost;
11608   if (min > fcomi_cost)
11609     min = fcomi_cost;
11610   return min;
11611 }
11612
11613 /* Return true if we should use an FCOMI instruction for this
11614    fp comparison.  */
11615
11616 int
11617 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11618 {
11619   enum rtx_code swapped_code = swap_condition (code);
11620
11621   return ((ix86_fp_comparison_cost (code)
11622            == ix86_fp_comparison_fcomi_cost (code))
11623           || (ix86_fp_comparison_cost (swapped_code)
11624               == ix86_fp_comparison_fcomi_cost (swapped_code)));
11625 }
11626
11627 /* Swap, force into registers, or otherwise massage the two operands
11628    to a fp comparison.  The operands are updated in place; the new
11629    comparison code is returned.  */
11630
11631 static enum rtx_code
11632 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11633 {
11634   enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11635   rtx op0 = *pop0, op1 = *pop1;
11636   enum machine_mode op_mode = GET_MODE (op0);
11637   int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11638
11639   /* All of the unordered compare instructions only work on registers.
11640      The same is true of the fcomi compare instructions.  The XFmode
11641      compare instructions require registers except when comparing
11642      against zero or when converting operand 1 from fixed point to
11643      floating point.  */
11644
11645   if (!is_sse
11646       && (fpcmp_mode == CCFPUmode
11647           || (op_mode == XFmode
11648               && ! (standard_80387_constant_p (op0) == 1
11649                     || standard_80387_constant_p (op1) == 1)
11650               && GET_CODE (op1) != FLOAT)
11651           || ix86_use_fcomi_compare (code)))
11652     {
11653       op0 = force_reg (op_mode, op0);
11654       op1 = force_reg (op_mode, op1);
11655     }
11656   else
11657     {
11658       /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
11659          things around if they appear profitable, otherwise force op0
11660          into a register.  */
11661
11662       if (standard_80387_constant_p (op0) == 0
11663           || (MEM_P (op0)
11664               && ! (standard_80387_constant_p (op1) == 0
11665                     || MEM_P (op1))))
11666         {
11667           rtx tmp;
11668           tmp = op0, op0 = op1, op1 = tmp;
11669           code = swap_condition (code);
11670         }
11671
11672       if (!REG_P (op0))
11673         op0 = force_reg (op_mode, op0);
11674
11675       if (CONSTANT_P (op1))
11676         {
11677           int tmp = standard_80387_constant_p (op1);
11678           if (tmp == 0)
11679             op1 = validize_mem (force_const_mem (op_mode, op1));
11680           else if (tmp == 1)
11681             {
11682               if (TARGET_CMOVE)
11683                 op1 = force_reg (op_mode, op1);
11684             }
11685           else
11686             op1 = force_reg (op_mode, op1);
11687         }
11688     }
11689
11690   /* Try to rearrange the comparison to make it cheaper.  */
11691   if (ix86_fp_comparison_cost (code)
11692       > ix86_fp_comparison_cost (swap_condition (code))
11693       && (REG_P (op1) || can_create_pseudo_p ()))
11694     {
11695       rtx tmp;
11696       tmp = op0, op0 = op1, op1 = tmp;
11697       code = swap_condition (code);
11698       if (!REG_P (op0))
11699         op0 = force_reg (op_mode, op0);
11700     }
11701
11702   *pop0 = op0;
11703   *pop1 = op1;
11704   return code;
11705 }
11706
11707 /* Convert comparison codes we use to represent FP comparison to integer
11708    code that will result in proper branch.  Return UNKNOWN if no such code
11709    is available.  */
11710
11711 enum rtx_code
11712 ix86_fp_compare_code_to_integer (enum rtx_code code)
11713 {
11714   switch (code)
11715     {
11716     case GT:
11717       return GTU;
11718     case GE:
11719       return GEU;
11720     case ORDERED:
11721     case UNORDERED:
11722       return code;
11723       break;
11724     case UNEQ:
11725       return EQ;
11726       break;
11727     case UNLT:
11728       return LTU;
11729       break;
11730     case UNLE:
11731       return LEU;
11732       break;
11733     case LTGT:
11734       return NE;
11735       break;
11736     default:
11737       return UNKNOWN;
11738     }
11739 }
11740
11741 /* Generate insn patterns to do a floating point compare of OPERANDS.  */
11742
11743 static rtx
11744 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11745                         rtx *second_test, rtx *bypass_test)
11746 {
11747   enum machine_mode fpcmp_mode, intcmp_mode;
11748   rtx tmp, tmp2;
11749   int cost = ix86_fp_comparison_cost (code);
11750   enum rtx_code bypass_code, first_code, second_code;
11751
11752   fpcmp_mode = ix86_fp_compare_mode (code);
11753   code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11754
11755   if (second_test)
11756     *second_test = NULL_RTX;
11757   if (bypass_test)
11758     *bypass_test = NULL_RTX;
11759
11760   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11761
11762   /* Do fcomi/sahf based test when profitable.  */
11763   if (ix86_fp_comparison_arithmetics_cost (code) > cost
11764       && (bypass_code == UNKNOWN || bypass_test)
11765       && (second_code == UNKNOWN || second_test))
11766     {
11767       tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11768       tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11769                          tmp);
11770       if (TARGET_CMOVE)
11771         emit_insn (tmp);
11772       else
11773         {
11774           gcc_assert (TARGET_SAHF);
11775
11776           if (!scratch)
11777             scratch = gen_reg_rtx (HImode);
11778           tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
11779
11780           emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
11781         }
11782
11783       /* The FP codes work out to act like unsigned.  */
11784       intcmp_mode = fpcmp_mode;
11785       code = first_code;
11786       if (bypass_code != UNKNOWN)
11787         *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11788                                        gen_rtx_REG (intcmp_mode, FLAGS_REG),
11789                                        const0_rtx);
11790       if (second_code != UNKNOWN)
11791         *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11792                                        gen_rtx_REG (intcmp_mode, FLAGS_REG),
11793                                        const0_rtx);
11794     }
11795   else
11796     {
11797       /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
11798       tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11799       tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11800       if (!scratch)
11801         scratch = gen_reg_rtx (HImode);
11802       emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11803
11804       /* In the unordered case, we have to check C2 for NaN's, which
11805          doesn't happen to work out to anything nice combination-wise.
11806          So do some bit twiddling on the value we've got in AH to come
11807          up with an appropriate set of condition codes.  */
11808
11809       intcmp_mode = CCNOmode;
11810       switch (code)
11811         {
11812         case GT:
11813         case UNGT:
11814           if (code == GT || !TARGET_IEEE_FP)
11815             {
11816               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11817               code = EQ;
11818             }
11819           else
11820             {
11821               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11822               emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11823               emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11824               intcmp_mode = CCmode;
11825               code = GEU;
11826             }
11827           break;
11828         case LT:
11829         case UNLT:
11830           if (code == LT && TARGET_IEEE_FP)
11831             {
11832               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11833               emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11834               intcmp_mode = CCmode;
11835               code = EQ;
11836             }
11837           else
11838             {
11839               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11840               code = NE;
11841             }
11842           break;
11843         case GE:
11844         case UNGE:
11845           if (code == GE || !TARGET_IEEE_FP)
11846             {
11847               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11848               code = EQ;
11849             }
11850           else
11851             {
11852               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11853               emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11854                                              GEN_INT (0x01)));
11855               code = NE;
11856             }
11857           break;
11858         case LE:
11859         case UNLE:
11860           if (code == LE && TARGET_IEEE_FP)
11861             {
11862               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11863               emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11864               emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11865               intcmp_mode = CCmode;
11866               code = LTU;
11867             }
11868           else
11869             {
11870               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11871               code = NE;
11872             }
11873           break;
11874         case EQ:
11875         case UNEQ:
11876           if (code == EQ && TARGET_IEEE_FP)
11877             {
11878               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11879               emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11880               intcmp_mode = CCmode;
11881               code = EQ;
11882             }
11883           else
11884             {
11885               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11886               code = NE;
11887               break;
11888             }
11889           break;
11890         case NE:
11891         case LTGT:
11892           if (code == NE && TARGET_IEEE_FP)
11893             {
11894               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11895               emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11896                                              GEN_INT (0x40)));
11897               code = NE;
11898             }
11899           else
11900             {
11901               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11902               code = EQ;
11903             }
11904           break;
11905
11906         case UNORDERED:
11907           emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11908           code = NE;
11909           break;
11910         case ORDERED:
11911           emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11912           code = EQ;
11913           break;
11914
11915         default:
11916           gcc_unreachable ();
11917         }
11918     }
11919
11920   /* Return the test that should be put into the flags user, i.e.
11921      the bcc, scc, or cmov instruction.  */
11922   return gen_rtx_fmt_ee (code, VOIDmode,
11923                          gen_rtx_REG (intcmp_mode, FLAGS_REG),
11924                          const0_rtx);
11925 }
11926
11927 rtx
11928 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11929 {
11930   rtx op0, op1, ret;
11931   op0 = ix86_compare_op0;
11932   op1 = ix86_compare_op1;
11933
11934   if (second_test)
11935     *second_test = NULL_RTX;
11936   if (bypass_test)
11937     *bypass_test = NULL_RTX;
11938
11939   if (ix86_compare_emitted)
11940     {
11941       ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11942       ix86_compare_emitted = NULL_RTX;
11943     }
11944   else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11945     {
11946       gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11947       ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11948                                     second_test, bypass_test);
11949     }
11950   else
11951     ret = ix86_expand_int_compare (code, op0, op1);
11952
11953   return ret;
11954 }
11955
11956 /* Return true if the CODE will result in nontrivial jump sequence.  */
11957 bool
11958 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11959 {
11960   enum rtx_code bypass_code, first_code, second_code;
11961   if (!TARGET_CMOVE)
11962     return true;
11963   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11964   return bypass_code != UNKNOWN || second_code != UNKNOWN;
11965 }
11966
11967 void
11968 ix86_expand_branch (enum rtx_code code, rtx label)
11969 {
11970   rtx tmp;
11971
11972   /* If we have emitted a compare insn, go straight to simple.
11973      ix86_expand_compare won't emit anything if ix86_compare_emitted
11974      is non NULL.  */
11975   if (ix86_compare_emitted)
11976     goto simple;
11977
11978   switch (GET_MODE (ix86_compare_op0))
11979     {
11980     case QImode:
11981     case HImode:
11982     case SImode:
11983       simple:
11984       tmp = ix86_expand_compare (code, NULL, NULL);
11985       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11986                                   gen_rtx_LABEL_REF (VOIDmode, label),
11987                                   pc_rtx);
11988       emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11989       return;
11990
11991     case SFmode:
11992     case DFmode:
11993     case XFmode:
11994       {
11995         rtvec vec;
11996         int use_fcomi;
11997         enum rtx_code bypass_code, first_code, second_code;
11998
11999         code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
12000                                              &ix86_compare_op1);
12001
12002         ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12003
12004         /* Check whether we will use the natural sequence with one jump.  If
12005            so, we can expand jump early.  Otherwise delay expansion by
12006            creating compound insn to not confuse optimizers.  */
12007         if (bypass_code == UNKNOWN && second_code == UNKNOWN)
12008           {
12009             ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
12010                                   gen_rtx_LABEL_REF (VOIDmode, label),
12011                                   pc_rtx, NULL_RTX, NULL_RTX);
12012           }
12013         else
12014           {
12015             tmp = gen_rtx_fmt_ee (code, VOIDmode,
12016                                   ix86_compare_op0, ix86_compare_op1);
12017             tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12018                                         gen_rtx_LABEL_REF (VOIDmode, label),
12019                                         pc_rtx);
12020             tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
12021
12022             use_fcomi = ix86_use_fcomi_compare (code);
12023             vec = rtvec_alloc (3 + !use_fcomi);
12024             RTVEC_ELT (vec, 0) = tmp;
12025             RTVEC_ELT (vec, 1)
12026               = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
12027             RTVEC_ELT (vec, 2)
12028               = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
12029             if (! use_fcomi)
12030               RTVEC_ELT (vec, 3)
12031                 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
12032
12033             emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
12034           }
12035         return;
12036       }
12037
12038     case DImode:
12039       if (TARGET_64BIT)
12040         goto simple;
12041     case TImode:
12042       /* Expand DImode branch into multiple compare+branch.  */
12043       {
12044         rtx lo[2], hi[2], label2;
12045         enum rtx_code code1, code2, code3;
12046         enum machine_mode submode;
12047
12048         if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
12049           {
12050             tmp = ix86_compare_op0;
12051             ix86_compare_op0 = ix86_compare_op1;
12052             ix86_compare_op1 = tmp;
12053             code = swap_condition (code);
12054           }
12055         if (GET_MODE (ix86_compare_op0) == DImode)
12056           {
12057             split_di (&ix86_compare_op0, 1, lo+0, hi+0);
12058             split_di (&ix86_compare_op1, 1, lo+1, hi+1);
12059             submode = SImode;
12060           }
12061         else
12062           {
12063             split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
12064             split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
12065             submode = DImode;
12066           }
12067
12068         /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12069            avoid two branches.  This costs one extra insn, so disable when
12070            optimizing for size.  */
12071
12072         if ((code == EQ || code == NE)
12073             && (!optimize_size
12074                 || hi[1] == const0_rtx || lo[1] == const0_rtx))
12075           {
12076             rtx xor0, xor1;
12077
12078             xor1 = hi[0];
12079             if (hi[1] != const0_rtx)
12080               xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
12081                                    NULL_RTX, 0, OPTAB_WIDEN);
12082
12083             xor0 = lo[0];
12084             if (lo[1] != const0_rtx)
12085               xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
12086                                    NULL_RTX, 0, OPTAB_WIDEN);
12087
12088             tmp = expand_binop (submode, ior_optab, xor1, xor0,
12089                                 NULL_RTX, 0, OPTAB_WIDEN);
12090
12091             ix86_compare_op0 = tmp;
12092             ix86_compare_op1 = const0_rtx;
12093             ix86_expand_branch (code, label);
12094             return;
12095           }
12096
12097         /* Otherwise, if we are doing less-than or greater-or-equal-than,
12098            op1 is a constant and the low word is zero, then we can just
12099            examine the high word.  Similarly for low word -1 and
12100            less-or-equal-than or greater-than.  */
12101
12102         if (CONST_INT_P (hi[1]))
12103           switch (code)
12104             {
12105             case LT: case LTU: case GE: case GEU:
12106               if (lo[1] == const0_rtx)
12107                 {
12108                   ix86_compare_op0 = hi[0];
12109                   ix86_compare_op1 = hi[1];
12110                   ix86_expand_branch (code, label);
12111                   return;
12112                 }
12113             case LE: case LEU: case GT: case GTU:
12114               if (lo[1] == constm1_rtx)
12115                 {
12116                   ix86_compare_op0 = hi[0];
12117                   ix86_compare_op1 = hi[1];
12118                   ix86_expand_branch (code, label);
12119                   return;
12120                 }
12121             default:
12122               break;
12123             }
12124
12125         /* Otherwise, we need two or three jumps.  */
12126
12127         label2 = gen_label_rtx ();
12128
12129         code1 = code;
12130         code2 = swap_condition (code);
12131         code3 = unsigned_condition (code);
12132
12133         switch (code)
12134           {
12135           case LT: case GT: case LTU: case GTU:
12136             break;
12137
12138           case LE:   code1 = LT;  code2 = GT;  break;
12139           case GE:   code1 = GT;  code2 = LT;  break;
12140           case LEU:  code1 = LTU; code2 = GTU; break;
12141           case GEU:  code1 = GTU; code2 = LTU; break;
12142
12143           case EQ:   code1 = UNKNOWN; code2 = NE;  break;
12144           case NE:   code2 = UNKNOWN; break;
12145
12146           default:
12147             gcc_unreachable ();
12148           }
12149
12150         /*
12151          * a < b =>
12152          *    if (hi(a) < hi(b)) goto true;
12153          *    if (hi(a) > hi(b)) goto false;
12154          *    if (lo(a) < lo(b)) goto true;
12155          *  false:
12156          */
12157
12158         ix86_compare_op0 = hi[0];
12159         ix86_compare_op1 = hi[1];
12160
12161         if (code1 != UNKNOWN)
12162           ix86_expand_branch (code1, label);
12163         if (code2 != UNKNOWN)
12164           ix86_expand_branch (code2, label2);
12165
12166         ix86_compare_op0 = lo[0];
12167         ix86_compare_op1 = lo[1];
12168         ix86_expand_branch (code3, label);
12169
12170         if (code2 != UNKNOWN)
12171           emit_label (label2);
12172         return;
12173       }
12174
12175     default:
12176       gcc_unreachable ();
12177     }
12178 }
12179
12180 /* Split branch based on floating point condition.  */
12181 void
12182 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
12183                       rtx target1, rtx target2, rtx tmp, rtx pushed)
12184 {
12185   rtx second, bypass;
12186   rtx label = NULL_RTX;
12187   rtx condition;
12188   int bypass_probability = -1, second_probability = -1, probability = -1;
12189   rtx i;
12190
12191   if (target2 != pc_rtx)
12192     {
12193       rtx tmp = target2;
12194       code = reverse_condition_maybe_unordered (code);
12195       target2 = target1;
12196       target1 = tmp;
12197     }
12198
12199   condition = ix86_expand_fp_compare (code, op1, op2,
12200                                       tmp, &second, &bypass);
12201
12202   /* Remove pushed operand from stack.  */
12203   if (pushed)
12204     ix86_free_from_memory (GET_MODE (pushed));
12205
12206   if (split_branch_probability >= 0)
12207     {
12208       /* Distribute the probabilities across the jumps.
12209          Assume the BYPASS and SECOND to be always test
12210          for UNORDERED.  */
12211       probability = split_branch_probability;
12212
12213       /* Value of 1 is low enough to make no need for probability
12214          to be updated.  Later we may run some experiments and see
12215          if unordered values are more frequent in practice.  */
12216       if (bypass)
12217         bypass_probability = 1;
12218       if (second)
12219         second_probability = 1;
12220     }
12221   if (bypass != NULL_RTX)
12222     {
12223       label = gen_label_rtx ();
12224       i = emit_jump_insn (gen_rtx_SET
12225                           (VOIDmode, pc_rtx,
12226                            gen_rtx_IF_THEN_ELSE (VOIDmode,
12227                                                  bypass,
12228                                                  gen_rtx_LABEL_REF (VOIDmode,
12229                                                                     label),
12230                                                  pc_rtx)));
12231       if (bypass_probability >= 0)
12232         REG_NOTES (i)
12233           = gen_rtx_EXPR_LIST (REG_BR_PROB,
12234                                GEN_INT (bypass_probability),
12235                                REG_NOTES (i));
12236     }
12237   i = emit_jump_insn (gen_rtx_SET
12238                       (VOIDmode, pc_rtx,
12239                        gen_rtx_IF_THEN_ELSE (VOIDmode,
12240                                              condition, target1, target2)));
12241   if (probability >= 0)
12242     REG_NOTES (i)
12243       = gen_rtx_EXPR_LIST (REG_BR_PROB,
12244                            GEN_INT (probability),
12245                            REG_NOTES (i));
12246   if (second != NULL_RTX)
12247     {
12248       i = emit_jump_insn (gen_rtx_SET
12249                           (VOIDmode, pc_rtx,
12250                            gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12251                                                  target2)));
12252       if (second_probability >= 0)
12253         REG_NOTES (i)
12254           = gen_rtx_EXPR_LIST (REG_BR_PROB,
12255                                GEN_INT (second_probability),
12256                                REG_NOTES (i));
12257     }
12258   if (label != NULL_RTX)
12259     emit_label (label);
12260 }
12261
12262 int
12263 ix86_expand_setcc (enum rtx_code code, rtx dest)
12264 {
12265   rtx ret, tmp, tmpreg, equiv;
12266   rtx second_test, bypass_test;
12267
12268   if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
12269     return 0; /* FAIL */
12270
12271   gcc_assert (GET_MODE (dest) == QImode);
12272
12273   ret = ix86_expand_compare (code, &second_test, &bypass_test);
12274   PUT_MODE (ret, QImode);
12275
12276   tmp = dest;
12277   tmpreg = dest;
12278
12279   emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
12280   if (bypass_test || second_test)
12281     {
12282       rtx test = second_test;
12283       int bypass = 0;
12284       rtx tmp2 = gen_reg_rtx (QImode);
12285       if (bypass_test)
12286         {
12287           gcc_assert (!second_test);
12288           test = bypass_test;
12289           bypass = 1;
12290           PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12291         }
12292       PUT_MODE (test, QImode);
12293       emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12294
12295       if (bypass)
12296         emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12297       else
12298         emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12299     }
12300
12301   /* Attach a REG_EQUAL note describing the comparison result.  */
12302   if (ix86_compare_op0 && ix86_compare_op1)
12303     {
12304       equiv = simplify_gen_relational (code, QImode,
12305                                        GET_MODE (ix86_compare_op0),
12306                                        ix86_compare_op0, ix86_compare_op1);
12307       set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12308     }
12309
12310   return 1; /* DONE */
12311 }
12312
12313 /* Expand comparison setting or clearing carry flag.  Return true when
12314    successful and set pop for the operation.  */
12315 static bool
12316 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12317 {
12318   enum machine_mode mode =
12319     GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12320
12321   /* Do not handle DImode compares that go through special path.  */
12322   if (mode == (TARGET_64BIT ? TImode : DImode))
12323     return false;
12324
12325   if (SCALAR_FLOAT_MODE_P (mode))
12326     {
12327       rtx second_test = NULL, bypass_test = NULL;
12328       rtx compare_op, compare_seq;
12329
12330       gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12331
12332       /* Shortcut:  following common codes never translate
12333          into carry flag compares.  */
12334       if (code == EQ || code == NE || code == UNEQ || code == LTGT
12335           || code == ORDERED || code == UNORDERED)
12336         return false;
12337
12338       /* These comparisons require zero flag; swap operands so they won't.  */
12339       if ((code == GT || code == UNLE || code == LE || code == UNGT)
12340           && !TARGET_IEEE_FP)
12341         {
12342           rtx tmp = op0;
12343           op0 = op1;
12344           op1 = tmp;
12345           code = swap_condition (code);
12346         }
12347
12348       /* Try to expand the comparison and verify that we end up with
12349          carry flag based comparison.  This fails to be true only when
12350          we decide to expand comparison using arithmetic that is not
12351          too common scenario.  */
12352       start_sequence ();
12353       compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12354                                            &second_test, &bypass_test);
12355       compare_seq = get_insns ();
12356       end_sequence ();
12357
12358       if (second_test || bypass_test)
12359         return false;
12360
12361       if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12362           || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12363         code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12364       else
12365         code = GET_CODE (compare_op);
12366
12367       if (code != LTU && code != GEU)
12368         return false;
12369
12370       emit_insn (compare_seq);
12371       *pop = compare_op;
12372       return true;
12373     }
12374
12375   if (!INTEGRAL_MODE_P (mode))
12376     return false;
12377
12378   switch (code)
12379     {
12380     case LTU:
12381     case GEU:
12382       break;
12383
12384     /* Convert a==0 into (unsigned)a<1.  */
12385     case EQ:
12386     case NE:
12387       if (op1 != const0_rtx)
12388         return false;
12389       op1 = const1_rtx;
12390       code = (code == EQ ? LTU : GEU);
12391       break;
12392
12393     /* Convert a>b into b<a or a>=b-1.  */
12394     case GTU:
12395     case LEU:
12396       if (CONST_INT_P (op1))
12397         {
12398           op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12399           /* Bail out on overflow.  We still can swap operands but that
12400              would force loading of the constant into register.  */
12401           if (op1 == const0_rtx
12402               || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12403             return false;
12404           code = (code == GTU ? GEU : LTU);
12405         }
12406       else
12407         {
12408           rtx tmp = op1;
12409           op1 = op0;
12410           op0 = tmp;
12411           code = (code == GTU ? LTU : GEU);
12412         }
12413       break;
12414
12415     /* Convert a>=0 into (unsigned)a<0x80000000.  */
12416     case LT:
12417     case GE:
12418       if (mode == DImode || op1 != const0_rtx)
12419         return false;
12420       op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12421       code = (code == LT ? GEU : LTU);
12422       break;
12423     case LE:
12424     case GT:
12425       if (mode == DImode || op1 != constm1_rtx)
12426         return false;
12427       op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12428       code = (code == LE ? GEU : LTU);
12429       break;
12430
12431     default:
12432       return false;
12433     }
12434   /* Swapping operands may cause constant to appear as first operand.  */
12435   if (!nonimmediate_operand (op0, VOIDmode))
12436     {
12437       if (!can_create_pseudo_p ())
12438         return false;
12439       op0 = force_reg (mode, op0);
12440     }
12441   ix86_compare_op0 = op0;
12442   ix86_compare_op1 = op1;
12443   *pop = ix86_expand_compare (code, NULL, NULL);
12444   gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12445   return true;
12446 }
12447
12448 int
12449 ix86_expand_int_movcc (rtx operands[])
12450 {
12451   enum rtx_code code = GET_CODE (operands[1]), compare_code;
12452   rtx compare_seq, compare_op;
12453   rtx second_test, bypass_test;
12454   enum machine_mode mode = GET_MODE (operands[0]);
12455   bool sign_bit_compare_p = false;;
12456
12457   start_sequence ();
12458   compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12459   compare_seq = get_insns ();
12460   end_sequence ();
12461
12462   compare_code = GET_CODE (compare_op);
12463
12464   if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12465       || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12466     sign_bit_compare_p = true;
12467
12468   /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12469      HImode insns, we'd be swallowed in word prefix ops.  */
12470
12471   if ((mode != HImode || TARGET_FAST_PREFIX)
12472       && (mode != (TARGET_64BIT ? TImode : DImode))
12473       && CONST_INT_P (operands[2])
12474       && CONST_INT_P (operands[3]))
12475     {
12476       rtx out = operands[0];
12477       HOST_WIDE_INT ct = INTVAL (operands[2]);
12478       HOST_WIDE_INT cf = INTVAL (operands[3]);
12479       HOST_WIDE_INT diff;
12480
12481       diff = ct - cf;
12482       /*  Sign bit compares are better done using shifts than we do by using
12483           sbb.  */
12484       if (sign_bit_compare_p
12485           || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12486                                              ix86_compare_op1, &compare_op))
12487         {
12488           /* Detect overlap between destination and compare sources.  */
12489           rtx tmp = out;
12490
12491           if (!sign_bit_compare_p)
12492             {
12493               bool fpcmp = false;
12494
12495               compare_code = GET_CODE (compare_op);
12496
12497               if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12498                   || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12499                 {
12500                   fpcmp = true;
12501                   compare_code = ix86_fp_compare_code_to_integer (compare_code);
12502                 }
12503
12504               /* To simplify rest of code, restrict to the GEU case.  */
12505               if (compare_code == LTU)
12506                 {
12507                   HOST_WIDE_INT tmp = ct;
12508                   ct = cf;
12509                   cf = tmp;
12510                   compare_code = reverse_condition (compare_code);
12511                   code = reverse_condition (code);
12512                 }
12513               else
12514                 {
12515                   if (fpcmp)
12516                     PUT_CODE (compare_op,
12517                               reverse_condition_maybe_unordered
12518                                 (GET_CODE (compare_op)));
12519                   else
12520                     PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12521                 }
12522               diff = ct - cf;
12523
12524               if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12525                   || reg_overlap_mentioned_p (out, ix86_compare_op1))
12526                 tmp = gen_reg_rtx (mode);
12527
12528               if (mode == DImode)
12529                 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12530               else
12531                 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12532             }
12533           else
12534             {
12535               if (code == GT || code == GE)
12536                 code = reverse_condition (code);
12537               else
12538                 {
12539                   HOST_WIDE_INT tmp = ct;
12540                   ct = cf;
12541                   cf = tmp;
12542                   diff = ct - cf;
12543                 }
12544               tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12545                                      ix86_compare_op1, VOIDmode, 0, -1);
12546             }
12547
12548           if (diff == 1)
12549             {
12550               /*
12551                * cmpl op0,op1
12552                * sbbl dest,dest
12553                * [addl dest, ct]
12554                *
12555                * Size 5 - 8.
12556                */
12557               if (ct)
12558                 tmp = expand_simple_binop (mode, PLUS,
12559                                            tmp, GEN_INT (ct),
12560                                            copy_rtx (tmp), 1, OPTAB_DIRECT);
12561             }
12562           else if (cf == -1)
12563             {
12564               /*
12565                * cmpl op0,op1
12566                * sbbl dest,dest
12567                * orl $ct, dest
12568                *
12569                * Size 8.
12570                */
12571               tmp = expand_simple_binop (mode, IOR,
12572                                          tmp, GEN_INT (ct),
12573                                          copy_rtx (tmp), 1, OPTAB_DIRECT);
12574             }
12575           else if (diff == -1 && ct)
12576             {
12577               /*
12578                * cmpl op0,op1
12579                * sbbl dest,dest
12580                * notl dest
12581                * [addl dest, cf]
12582                *
12583                * Size 8 - 11.
12584                */
12585               tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12586               if (cf)
12587                 tmp = expand_simple_binop (mode, PLUS,
12588                                            copy_rtx (tmp), GEN_INT (cf),
12589                                            copy_rtx (tmp), 1, OPTAB_DIRECT);
12590             }
12591           else
12592             {
12593               /*
12594                * cmpl op0,op1
12595                * sbbl dest,dest
12596                * [notl dest]
12597                * andl cf - ct, dest
12598                * [addl dest, ct]
12599                *
12600                * Size 8 - 11.
12601                */
12602
12603               if (cf == 0)
12604                 {
12605                   cf = ct;
12606                   ct = 0;
12607                   tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12608                 }
12609
12610               tmp = expand_simple_binop (mode, AND,
12611                                          copy_rtx (tmp),
12612                                          gen_int_mode (cf - ct, mode),
12613                                          copy_rtx (tmp), 1, OPTAB_DIRECT);
12614               if (ct)
12615                 tmp = expand_simple_binop (mode, PLUS,
12616                                            copy_rtx (tmp), GEN_INT (ct),
12617                                            copy_rtx (tmp), 1, OPTAB_DIRECT);
12618             }
12619
12620           if (!rtx_equal_p (tmp, out))
12621             emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12622
12623           return 1; /* DONE */
12624         }
12625
12626       if (diff < 0)
12627         {
12628           enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12629
12630           HOST_WIDE_INT tmp;
12631           tmp = ct, ct = cf, cf = tmp;
12632           diff = -diff;
12633
12634           if (SCALAR_FLOAT_MODE_P (cmp_mode))
12635             {
12636               gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12637
12638               /* We may be reversing unordered compare to normal compare, that
12639                  is not valid in general (we may convert non-trapping condition
12640                  to trapping one), however on i386 we currently emit all
12641                  comparisons unordered.  */
12642               compare_code = reverse_condition_maybe_unordered (compare_code);
12643               code = reverse_condition_maybe_unordered (code);
12644             }
12645           else
12646             {
12647               compare_code = reverse_condition (compare_code);
12648               code = reverse_condition (code);
12649             }
12650         }
12651
12652       compare_code = UNKNOWN;
12653       if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12654           && CONST_INT_P (ix86_compare_op1))
12655         {
12656           if (ix86_compare_op1 == const0_rtx
12657               && (code == LT || code == GE))
12658             compare_code = code;
12659           else if (ix86_compare_op1 == constm1_rtx)
12660             {
12661               if (code == LE)
12662                 compare_code = LT;
12663               else if (code == GT)
12664                 compare_code = GE;
12665             }
12666         }
12667
12668       /* Optimize dest = (op0 < 0) ? -1 : cf.  */
12669       if (compare_code != UNKNOWN
12670           && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12671           && (cf == -1 || ct == -1))
12672         {
12673           /* If lea code below could be used, only optimize
12674              if it results in a 2 insn sequence.  */
12675
12676           if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12677                  || diff == 3 || diff == 5 || diff == 9)
12678               || (compare_code == LT && ct == -1)
12679               || (compare_code == GE && cf == -1))
12680             {
12681               /*
12682                * notl op1       (if necessary)
12683                * sarl $31, op1
12684                * orl cf, op1
12685                */
12686               if (ct != -1)
12687                 {
12688                   cf = ct;
12689                   ct = -1;
12690                   code = reverse_condition (code);
12691                 }
12692
12693               out = emit_store_flag (out, code, ix86_compare_op0,
12694                                      ix86_compare_op1, VOIDmode, 0, -1);
12695
12696               out = expand_simple_binop (mode, IOR,
12697                                          out, GEN_INT (cf),
12698                                          out, 1, OPTAB_DIRECT);
12699               if (out != operands[0])
12700                 emit_move_insn (operands[0], out);
12701
12702               return 1; /* DONE */
12703             }
12704         }
12705
12706
12707       if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12708            || diff == 3 || diff == 5 || diff == 9)
12709           && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12710           && (mode != DImode
12711               || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12712         {
12713           /*
12714            * xorl dest,dest
12715            * cmpl op1,op2
12716            * setcc dest
12717            * lea cf(dest*(ct-cf)),dest
12718            *
12719            * Size 14.
12720            *
12721            * This also catches the degenerate setcc-only case.
12722            */
12723
12724           rtx tmp;
12725           int nops;
12726
12727           out = emit_store_flag (out, code, ix86_compare_op0,
12728                                  ix86_compare_op1, VOIDmode, 0, 1);
12729
12730           nops = 0;
12731           /* On x86_64 the lea instruction operates on Pmode, so we need
12732              to get arithmetics done in proper mode to match.  */
12733           if (diff == 1)
12734             tmp = copy_rtx (out);
12735           else
12736             {
12737               rtx out1;
12738               out1 = copy_rtx (out);
12739               tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12740               nops++;
12741               if (diff & 1)
12742                 {
12743                   tmp = gen_rtx_PLUS (mode, tmp, out1);
12744                   nops++;
12745                 }
12746             }
12747           if (cf != 0)
12748             {
12749               tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12750               nops++;
12751             }
12752           if (!rtx_equal_p (tmp, out))
12753             {
12754               if (nops == 1)
12755                 out = force_operand (tmp, copy_rtx (out));
12756               else
12757                 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12758             }
12759           if (!rtx_equal_p (out, operands[0]))
12760             emit_move_insn (operands[0], copy_rtx (out));
12761
12762           return 1; /* DONE */
12763         }
12764
12765       /*
12766        * General case:                  Jumpful:
12767        *   xorl dest,dest               cmpl op1, op2
12768        *   cmpl op1, op2                movl ct, dest
12769        *   setcc dest                   jcc 1f
12770        *   decl dest                    movl cf, dest
12771        *   andl (cf-ct),dest            1:
12772        *   addl ct,dest
12773        *
12774        * Size 20.                       Size 14.
12775        *
12776        * This is reasonably steep, but branch mispredict costs are
12777        * high on modern cpus, so consider failing only if optimizing
12778        * for space.
12779        */
12780
12781       if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12782           && BRANCH_COST >= 2)
12783         {
12784           if (cf == 0)
12785             {
12786               enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12787
12788               cf = ct;
12789               ct = 0;
12790
12791               if (SCALAR_FLOAT_MODE_P (cmp_mode))
12792                 {
12793                   gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12794
12795                   /* We may be reversing unordered compare to normal compare,
12796                      that is not valid in general (we may convert non-trapping
12797                      condition to trapping one), however on i386 we currently
12798                      emit all comparisons unordered.  */
12799                   code = reverse_condition_maybe_unordered (code);
12800                 }
12801               else
12802                 {
12803                   code = reverse_condition (code);
12804                   if (compare_code != UNKNOWN)
12805                     compare_code = reverse_condition (compare_code);
12806                 }
12807             }
12808
12809           if (compare_code != UNKNOWN)
12810             {
12811               /* notl op1       (if needed)
12812                  sarl $31, op1
12813                  andl (cf-ct), op1
12814                  addl ct, op1
12815
12816                  For x < 0 (resp. x <= -1) there will be no notl,
12817                  so if possible swap the constants to get rid of the
12818                  complement.
12819                  True/false will be -1/0 while code below (store flag
12820                  followed by decrement) is 0/-1, so the constants need
12821                  to be exchanged once more.  */
12822
12823               if (compare_code == GE || !cf)
12824                 {
12825                   code = reverse_condition (code);
12826                   compare_code = LT;
12827                 }
12828               else
12829                 {
12830                   HOST_WIDE_INT tmp = cf;
12831                   cf = ct;
12832                   ct = tmp;
12833                 }
12834
12835               out = emit_store_flag (out, code, ix86_compare_op0,
12836                                      ix86_compare_op1, VOIDmode, 0, -1);
12837             }
12838           else
12839             {
12840               out = emit_store_flag (out, code, ix86_compare_op0,
12841                                      ix86_compare_op1, VOIDmode, 0, 1);
12842
12843               out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12844                                          copy_rtx (out), 1, OPTAB_DIRECT);
12845             }
12846
12847           out = expand_simple_binop (mode, AND, copy_rtx (out),
12848                                      gen_int_mode (cf - ct, mode),
12849                                      copy_rtx (out), 1, OPTAB_DIRECT);
12850           if (ct)
12851             out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12852                                        copy_rtx (out), 1, OPTAB_DIRECT);
12853           if (!rtx_equal_p (out, operands[0]))
12854             emit_move_insn (operands[0], copy_rtx (out));
12855
12856           return 1; /* DONE */
12857         }
12858     }
12859
12860   if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12861     {
12862       /* Try a few things more with specific constants and a variable.  */
12863
12864       optab op;
12865       rtx var, orig_out, out, tmp;
12866
12867       if (BRANCH_COST <= 2)
12868         return 0; /* FAIL */
12869
12870       /* If one of the two operands is an interesting constant, load a
12871          constant with the above and mask it in with a logical operation.  */
12872
12873       if (CONST_INT_P (operands[2]))
12874         {
12875           var = operands[3];
12876           if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12877             operands[3] = constm1_rtx, op = and_optab;
12878           else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12879             operands[3] = const0_rtx, op = ior_optab;
12880           else
12881             return 0; /* FAIL */
12882         }
12883       else if (CONST_INT_P (operands[3]))
12884         {
12885           var = operands[2];
12886           if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12887             operands[2] = constm1_rtx, op = and_optab;
12888           else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12889             operands[2] = const0_rtx, op = ior_optab;
12890           else
12891             return 0; /* FAIL */
12892         }
12893       else
12894         return 0; /* FAIL */
12895
12896       orig_out = operands[0];
12897       tmp = gen_reg_rtx (mode);
12898       operands[0] = tmp;
12899
12900       /* Recurse to get the constant loaded.  */
12901       if (ix86_expand_int_movcc (operands) == 0)
12902         return 0; /* FAIL */
12903
12904       /* Mask in the interesting variable.  */
12905       out = expand_binop (mode, op, var, tmp, orig_out, 0,
12906                           OPTAB_WIDEN);
12907       if (!rtx_equal_p (out, orig_out))
12908         emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12909
12910       return 1; /* DONE */
12911     }
12912
12913   /*
12914    * For comparison with above,
12915    *
12916    * movl cf,dest
12917    * movl ct,tmp
12918    * cmpl op1,op2
12919    * cmovcc tmp,dest
12920    *
12921    * Size 15.
12922    */
12923
12924   if (! nonimmediate_operand (operands[2], mode))
12925     operands[2] = force_reg (mode, operands[2]);
12926   if (! nonimmediate_operand (operands[3], mode))
12927     operands[3] = force_reg (mode, operands[3]);
12928
12929   if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12930     {
12931       rtx tmp = gen_reg_rtx (mode);
12932       emit_move_insn (tmp, operands[3]);
12933       operands[3] = tmp;
12934     }
12935   if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12936     {
12937       rtx tmp = gen_reg_rtx (mode);
12938       emit_move_insn (tmp, operands[2]);
12939       operands[2] = tmp;
12940     }
12941
12942   if (! register_operand (operands[2], VOIDmode)
12943       && (mode == QImode
12944           || ! register_operand (operands[3], VOIDmode)))
12945     operands[2] = force_reg (mode, operands[2]);
12946
12947   if (mode == QImode
12948       && ! register_operand (operands[3], VOIDmode))
12949     operands[3] = force_reg (mode, operands[3]);
12950
12951   emit_insn (compare_seq);
12952   emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12953                           gen_rtx_IF_THEN_ELSE (mode,
12954                                                 compare_op, operands[2],
12955                                                 operands[3])));
12956   if (bypass_test)
12957     emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12958                             gen_rtx_IF_THEN_ELSE (mode,
12959                                   bypass_test,
12960                                   copy_rtx (operands[3]),
12961                                   copy_rtx (operands[0]))));
12962   if (second_test)
12963     emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12964                             gen_rtx_IF_THEN_ELSE (mode,
12965                                   second_test,
12966                                   copy_rtx (operands[2]),
12967                                   copy_rtx (operands[0]))));
12968
12969   return 1; /* DONE */
12970 }
12971
12972 /* Swap, force into registers, or otherwise massage the two operands
12973    to an sse comparison with a mask result.  Thus we differ a bit from
12974    ix86_prepare_fp_compare_args which expects to produce a flags result.
12975
12976    The DEST operand exists to help determine whether to commute commutative
12977    operators.  The POP0/POP1 operands are updated in place.  The new
12978    comparison code is returned, or UNKNOWN if not implementable.  */
12979
12980 static enum rtx_code
12981 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12982                                   rtx *pop0, rtx *pop1)
12983 {
12984   rtx tmp;
12985
12986   switch (code)
12987     {
12988     case LTGT:
12989     case UNEQ:
12990       /* We have no LTGT as an operator.  We could implement it with
12991          NE & ORDERED, but this requires an extra temporary.  It's
12992          not clear that it's worth it.  */
12993       return UNKNOWN;
12994
12995     case LT:
12996     case LE:
12997     case UNGT:
12998     case UNGE:
12999       /* These are supported directly.  */
13000       break;
13001
13002     case EQ:
13003     case NE:
13004     case UNORDERED:
13005     case ORDERED:
13006       /* For commutative operators, try to canonicalize the destination
13007          operand to be first in the comparison - this helps reload to
13008          avoid extra moves.  */
13009       if (!dest || !rtx_equal_p (dest, *pop1))
13010         break;
13011       /* FALLTHRU */
13012
13013     case GE:
13014     case GT:
13015     case UNLE:
13016     case UNLT:
13017       /* These are not supported directly.  Swap the comparison operands
13018          to transform into something that is supported.  */
13019       tmp = *pop0;
13020       *pop0 = *pop1;
13021       *pop1 = tmp;
13022       code = swap_condition (code);
13023       break;
13024
13025     default:
13026       gcc_unreachable ();
13027     }
13028
13029   return code;
13030 }
13031
13032 /* Detect conditional moves that exactly match min/max operational
13033    semantics.  Note that this is IEEE safe, as long as we don't
13034    interchange the operands.
13035
13036    Returns FALSE if this conditional move doesn't match a MIN/MAX,
13037    and TRUE if the operation is successful and instructions are emitted.  */
13038
13039 static bool
13040 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
13041                            rtx cmp_op1, rtx if_true, rtx if_false)
13042 {
13043   enum machine_mode mode;
13044   bool is_min;
13045   rtx tmp;
13046
13047   if (code == LT)
13048     ;
13049   else if (code == UNGE)
13050     {
13051       tmp = if_true;
13052       if_true = if_false;
13053       if_false = tmp;
13054     }
13055   else
13056     return false;
13057
13058   if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
13059     is_min = true;
13060   else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
13061     is_min = false;
13062   else
13063     return false;
13064
13065   mode = GET_MODE (dest);
13066
13067   /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13068      but MODE may be a vector mode and thus not appropriate.  */
13069   if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
13070     {
13071       int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
13072       rtvec v;
13073
13074       if_true = force_reg (mode, if_true);
13075       v = gen_rtvec (2, if_true, if_false);
13076       tmp = gen_rtx_UNSPEC (mode, v, u);
13077     }
13078   else
13079     {
13080       code = is_min ? SMIN : SMAX;
13081       tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
13082     }
13083
13084   emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
13085   return true;
13086 }
13087
13088 /* Expand an sse vector comparison.  Return the register with the result.  */
13089
13090 static rtx
13091 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
13092                      rtx op_true, rtx op_false)
13093 {
13094   enum machine_mode mode = GET_MODE (dest);
13095   rtx x;
13096
13097   cmp_op0 = force_reg (mode, cmp_op0);
13098   if (!nonimmediate_operand (cmp_op1, mode))
13099     cmp_op1 = force_reg (mode, cmp_op1);
13100
13101   if (optimize
13102       || reg_overlap_mentioned_p (dest, op_true)
13103       || reg_overlap_mentioned_p (dest, op_false))
13104     dest = gen_reg_rtx (mode);
13105
13106   x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
13107   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13108
13109   return dest;
13110 }
13111
13112 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13113    operations.  This is used for both scalar and vector conditional moves.  */
13114
13115 static void
13116 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
13117 {
13118   enum machine_mode mode = GET_MODE (dest);
13119   rtx t2, t3, x;
13120
13121   if (TARGET_SSE5)
13122     {
13123       rtx pcmov = gen_rtx_SET (mode, dest,
13124                                gen_rtx_IF_THEN_ELSE (mode, cmp,
13125                                                      op_true,
13126                                                      op_false));
13127       emit_insn (pcmov);
13128     }
13129   else if (op_false == CONST0_RTX (mode))
13130     {
13131       op_true = force_reg (mode, op_true);
13132       x = gen_rtx_AND (mode, cmp, op_true);
13133       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13134     }
13135   else if (op_true == CONST0_RTX (mode))
13136     {
13137       op_false = force_reg (mode, op_false);
13138       x = gen_rtx_NOT (mode, cmp);
13139       x = gen_rtx_AND (mode, x, op_false);
13140       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13141     }
13142   else
13143     {
13144       op_true = force_reg (mode, op_true);
13145       op_false = force_reg (mode, op_false);
13146
13147       t2 = gen_reg_rtx (mode);
13148       if (optimize)
13149         t3 = gen_reg_rtx (mode);
13150       else
13151         t3 = dest;
13152
13153       x = gen_rtx_AND (mode, op_true, cmp);
13154       emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13155
13156       x = gen_rtx_NOT (mode, cmp);
13157       x = gen_rtx_AND (mode, x, op_false);
13158       emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13159
13160       x = gen_rtx_IOR (mode, t3, t2);
13161       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13162     }
13163 }
13164
13165 /* Expand a floating-point conditional move.  Return true if successful.  */
13166
13167 int
13168 ix86_expand_fp_movcc (rtx operands[])
13169 {
13170   enum machine_mode mode = GET_MODE (operands[0]);
13171   enum rtx_code code = GET_CODE (operands[1]);
13172   rtx tmp, compare_op, second_test, bypass_test;
13173
13174   if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13175     {
13176       enum machine_mode cmode;
13177
13178       /* Since we've no cmove for sse registers, don't force bad register
13179          allocation just to gain access to it.  Deny movcc when the
13180          comparison mode doesn't match the move mode.  */
13181       cmode = GET_MODE (ix86_compare_op0);
13182       if (cmode == VOIDmode)
13183         cmode = GET_MODE (ix86_compare_op1);
13184       if (cmode != mode)
13185         return 0;
13186
13187       code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13188                                                &ix86_compare_op0,
13189                                                &ix86_compare_op1);
13190       if (code == UNKNOWN)
13191         return 0;
13192
13193       if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13194                                      ix86_compare_op1, operands[2],
13195                                      operands[3]))
13196         return 1;
13197
13198       tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13199                                  ix86_compare_op1, operands[2], operands[3]);
13200       ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
13201       return 1;
13202     }
13203
13204   /* The floating point conditional move instructions don't directly
13205      support conditions resulting from a signed integer comparison.  */
13206
13207   compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13208
13209   /* The floating point conditional move instructions don't directly
13210      support signed integer comparisons.  */
13211
13212   if (!fcmov_comparison_operator (compare_op, VOIDmode))
13213     {
13214       gcc_assert (!second_test && !bypass_test);
13215       tmp = gen_reg_rtx (QImode);
13216       ix86_expand_setcc (code, tmp);
13217       code = NE;
13218       ix86_compare_op0 = tmp;
13219       ix86_compare_op1 = const0_rtx;
13220       compare_op = ix86_expand_compare (code,  &second_test, &bypass_test);
13221     }
13222   if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13223     {
13224       tmp = gen_reg_rtx (mode);
13225       emit_move_insn (tmp, operands[3]);
13226       operands[3] = tmp;
13227     }
13228   if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13229     {
13230       tmp = gen_reg_rtx (mode);
13231       emit_move_insn (tmp, operands[2]);
13232       operands[2] = tmp;
13233     }
13234
13235   emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13236                           gen_rtx_IF_THEN_ELSE (mode, compare_op,
13237                                                 operands[2], operands[3])));
13238   if (bypass_test)
13239     emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13240                             gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13241                                                   operands[3], operands[0])));
13242   if (second_test)
13243     emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13244                             gen_rtx_IF_THEN_ELSE (mode, second_test,
13245                                                   operands[2], operands[0])));
13246
13247   return 1;
13248 }
13249
13250 /* Expand a floating-point vector conditional move; a vcond operation
13251    rather than a movcc operation.  */
13252
13253 bool
13254 ix86_expand_fp_vcond (rtx operands[])
13255 {
13256   enum rtx_code code = GET_CODE (operands[3]);
13257   rtx cmp;
13258
13259   code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13260                                            &operands[4], &operands[5]);
13261   if (code == UNKNOWN)
13262     return false;
13263
13264   if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13265                                  operands[5], operands[1], operands[2]))
13266     return true;
13267
13268   cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13269                              operands[1], operands[2]);
13270   ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13271   return true;
13272 }
13273
13274 /* Expand a signed/unsigned integral vector conditional move.  */
13275
13276 bool
13277 ix86_expand_int_vcond (rtx operands[])
13278 {
13279   enum machine_mode mode = GET_MODE (operands[0]);
13280   enum rtx_code code = GET_CODE (operands[3]);
13281   bool negate = false;
13282   rtx x, cop0, cop1;
13283
13284   cop0 = operands[4];
13285   cop1 = operands[5];
13286
13287   /* Canonicalize the comparison to EQ, GT, GTU.  */
13288   switch (code)
13289     {
13290     case EQ:
13291     case GT:
13292     case GTU:
13293       break;
13294
13295     case NE:
13296     case LE:
13297     case LEU:
13298       code = reverse_condition (code);
13299       negate = true;
13300       break;
13301
13302     case GE:
13303     case GEU:
13304       code = reverse_condition (code);
13305       negate = true;
13306       /* FALLTHRU */
13307
13308     case LT:
13309     case LTU:
13310       code = swap_condition (code);
13311       x = cop0, cop0 = cop1, cop1 = x;
13312       break;
13313
13314     default:
13315       gcc_unreachable ();
13316     }
13317
13318   /* Only SSE4.1/SSE4.2 supports V2DImode.  */
13319   if (mode == V2DImode)
13320     {
13321       switch (code)
13322         {
13323         case EQ:
13324           /* SSE4.1 supports EQ.  */
13325           if (!TARGET_SSE4_1)
13326             return false;
13327           break;
13328
13329         case GT:
13330         case GTU:
13331           /* SSE4.2 supports GT/GTU.  */
13332           if (!TARGET_SSE4_2)
13333             return false;
13334           break;
13335
13336         default:
13337           gcc_unreachable ();
13338         }
13339     }
13340
13341   /* Unsigned parallel compare is not supported by the hardware.  Play some
13342      tricks to turn this into a signed comparison against 0.  */
13343   if (code == GTU)
13344     {
13345       cop0 = force_reg (mode, cop0);
13346
13347       switch (mode)
13348         {
13349         case V4SImode:
13350         case V2DImode:
13351           {
13352             rtx t1, t2, mask;
13353
13354             /* Perform a parallel modulo subtraction.  */
13355             t1 = gen_reg_rtx (mode);
13356             emit_insn ((mode == V4SImode
13357                         ? gen_subv4si3
13358                         : gen_subv2di3) (t1, cop0, cop1));
13359
13360             /* Extract the original sign bit of op0.  */
13361             mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13362                                             true, false);
13363             t2 = gen_reg_rtx (mode);
13364             emit_insn ((mode == V4SImode
13365                         ? gen_andv4si3
13366                         : gen_andv2di3) (t2, cop0, mask));
13367
13368             /* XOR it back into the result of the subtraction.  This results
13369                in the sign bit set iff we saw unsigned underflow.  */
13370             x = gen_reg_rtx (mode);
13371             emit_insn ((mode == V4SImode
13372                         ? gen_xorv4si3
13373                         : gen_xorv2di3) (x, t1, t2));
13374
13375             code = GT;
13376           }
13377           break;
13378
13379         case V16QImode:
13380         case V8HImode:
13381           /* Perform a parallel unsigned saturating subtraction.  */
13382           x = gen_reg_rtx (mode);
13383           emit_insn (gen_rtx_SET (VOIDmode, x,
13384                                   gen_rtx_US_MINUS (mode, cop0, cop1)));
13385
13386           code = EQ;
13387           negate = !negate;
13388           break;
13389
13390         default:
13391           gcc_unreachable ();
13392         }
13393
13394       cop0 = x;
13395       cop1 = CONST0_RTX (mode);
13396     }
13397
13398   x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13399                            operands[1+negate], operands[2-negate]);
13400
13401   ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13402                          operands[2-negate]);
13403   return true;
13404 }
13405
13406 /* Unpack OP[1] into the next wider integer vector type.  UNSIGNED_P is
13407    true if we should do zero extension, else sign extension.  HIGH_P is
13408    true if we want the N/2 high elements, else the low elements.  */
13409
13410 void
13411 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13412 {
13413   enum machine_mode imode = GET_MODE (operands[1]);
13414   rtx (*unpack)(rtx, rtx, rtx);
13415   rtx se, dest;
13416
13417   switch (imode)
13418     {
13419     case V16QImode:
13420       if (high_p)
13421         unpack = gen_vec_interleave_highv16qi;
13422       else
13423         unpack = gen_vec_interleave_lowv16qi;
13424       break;
13425     case V8HImode:
13426       if (high_p)
13427         unpack = gen_vec_interleave_highv8hi;
13428       else
13429         unpack = gen_vec_interleave_lowv8hi;
13430       break;
13431     case V4SImode:
13432       if (high_p)
13433         unpack = gen_vec_interleave_highv4si;
13434       else
13435         unpack = gen_vec_interleave_lowv4si;
13436       break;
13437     default:
13438       gcc_unreachable ();
13439     }
13440
13441   dest = gen_lowpart (imode, operands[0]);
13442
13443   if (unsigned_p)
13444     se = force_reg (imode, CONST0_RTX (imode));
13445   else
13446     se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13447                               operands[1], pc_rtx, pc_rtx);
13448
13449   emit_insn (unpack (dest, operands[1], se));
13450 }
13451
13452 /* This function performs the same task as ix86_expand_sse_unpack,
13453    but with SSE4.1 instructions.  */
13454
13455 void
13456 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13457 {
13458   enum machine_mode imode = GET_MODE (operands[1]);
13459   rtx (*unpack)(rtx, rtx);
13460   rtx src, dest;
13461
13462   switch (imode)
13463     {
13464     case V16QImode:
13465       if (unsigned_p)
13466         unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13467       else
13468         unpack = gen_sse4_1_extendv8qiv8hi2;
13469       break;
13470     case V8HImode:
13471       if (unsigned_p)
13472         unpack = gen_sse4_1_zero_extendv4hiv4si2;
13473       else
13474         unpack = gen_sse4_1_extendv4hiv4si2;
13475       break;
13476     case V4SImode:
13477       if (unsigned_p)
13478         unpack = gen_sse4_1_zero_extendv2siv2di2;
13479       else
13480         unpack = gen_sse4_1_extendv2siv2di2;
13481       break;
13482     default:
13483       gcc_unreachable ();
13484     }
13485
13486   dest = operands[0];
13487   if (high_p)
13488     {
13489       /* Shift higher 8 bytes to lower 8 bytes.  */
13490       src = gen_reg_rtx (imode);
13491       emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13492                                    gen_lowpart (TImode, operands[1]),
13493                                    GEN_INT (64)));
13494     }
13495   else
13496     src = operands[1];
13497
13498   emit_insn (unpack (dest, src));
13499 }
13500
13501 /* This function performs the same task as ix86_expand_sse_unpack,
13502    but with amdfam15 instructions.  */
13503
13504 #define PPERM_SRC       0x00            /* copy source */
13505 #define PPERM_INVERT    0x20            /* invert source */
13506 #define PPERM_REVERSE   0x40            /* bit reverse source */
13507 #define PPERM_REV_INV   0x60            /* bit reverse & invert src */
13508 #define PPERM_ZERO      0x80            /* all 0's */
13509 #define PPERM_ONES      0xa0            /* all 1's */
13510 #define PPERM_SIGN      0xc0            /* propagate sign bit */
13511 #define PPERM_INV_SIGN  0xe0            /* invert & propagate sign */
13512
13513 #define PPERM_SRC1      0x00            /* use first source byte */
13514 #define PPERM_SRC2      0x10            /* use second source byte */
13515
13516 void
13517 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13518 {
13519   enum machine_mode imode = GET_MODE (operands[1]);
13520   int pperm_bytes[16];
13521   int i;
13522   int h = (high_p) ? 8 : 0;
13523   int h2;
13524   int sign_extend;
13525   rtvec v = rtvec_alloc (16);
13526   rtvec vs;
13527   rtx x, p;
13528   rtx op0 = operands[0], op1 = operands[1];
13529
13530   switch (imode)
13531     {
13532     case V16QImode:
13533       vs = rtvec_alloc (8);
13534       h2 = (high_p) ? 8 : 0;
13535       for (i = 0; i < 8; i++)
13536         {
13537           pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
13538           pperm_bytes[2*i+1] = ((unsigned_p)
13539                                 ? PPERM_ZERO
13540                                 : PPERM_SIGN | PPERM_SRC2 | i | h);
13541         }
13542
13543       for (i = 0; i < 16; i++)
13544         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13545
13546       for (i = 0; i < 8; i++)
13547         RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13548
13549       p = gen_rtx_PARALLEL (VOIDmode, vs);
13550       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13551       if (unsigned_p)
13552         emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
13553       else
13554         emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
13555       break;
13556
13557     case V8HImode:
13558       vs = rtvec_alloc (4);
13559       h2 = (high_p) ? 4 : 0;
13560       for (i = 0; i < 4; i++)
13561         {
13562           sign_extend = ((unsigned_p)
13563                          ? PPERM_ZERO
13564                          : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
13565           pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
13566           pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
13567           pperm_bytes[4*i+2] = sign_extend;
13568           pperm_bytes[4*i+3] = sign_extend;
13569         }
13570
13571       for (i = 0; i < 16; i++)
13572         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13573
13574       for (i = 0; i < 4; i++)
13575         RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13576
13577       p = gen_rtx_PARALLEL (VOIDmode, vs);
13578       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13579       if (unsigned_p)
13580         emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
13581       else
13582         emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
13583       break;
13584
13585     case V4SImode:
13586       vs = rtvec_alloc (2);
13587       h2 = (high_p) ? 2 : 0;
13588       for (i = 0; i < 2; i++)
13589         {
13590           sign_extend = ((unsigned_p)
13591                          ? PPERM_ZERO
13592                          : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
13593           pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
13594           pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
13595           pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
13596           pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
13597           pperm_bytes[8*i+4] = sign_extend;
13598           pperm_bytes[8*i+5] = sign_extend;
13599           pperm_bytes[8*i+6] = sign_extend;
13600           pperm_bytes[8*i+7] = sign_extend;
13601         }
13602
13603       for (i = 0; i < 16; i++)
13604         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13605
13606       for (i = 0; i < 2; i++)
13607         RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13608
13609       p = gen_rtx_PARALLEL (VOIDmode, vs);
13610       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13611       if (unsigned_p)
13612         emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
13613       else
13614         emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
13615       break;
13616
13617     default:
13618       gcc_unreachable ();
13619     }
13620
13621   return;
13622 }
13623
13624 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13625    next narrower integer vector type */
13626 void
13627 ix86_expand_sse5_pack (rtx operands[3])
13628 {
13629   enum machine_mode imode = GET_MODE (operands[0]);
13630   int pperm_bytes[16];
13631   int i;
13632   rtvec v = rtvec_alloc (16);
13633   rtx x;
13634   rtx op0 = operands[0];
13635   rtx op1 = operands[1];
13636   rtx op2 = operands[2];
13637
13638   switch (imode)
13639     {
13640     case V16QImode:
13641       for (i = 0; i < 8; i++)
13642         {
13643           pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
13644           pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
13645         }
13646
13647       for (i = 0; i < 16; i++)
13648         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13649
13650       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13651       emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
13652       break;
13653
13654     case V8HImode:
13655       for (i = 0; i < 4; i++)
13656         {
13657           pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
13658           pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
13659           pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
13660           pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
13661         }
13662
13663       for (i = 0; i < 16; i++)
13664         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13665
13666       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13667       emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
13668       break;
13669
13670     case V4SImode:
13671       for (i = 0; i < 2; i++)
13672         {
13673           pperm_bytes[(4*i)+0]  = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
13674           pperm_bytes[(4*i)+1]  = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
13675           pperm_bytes[(4*i)+2]  = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
13676           pperm_bytes[(4*i)+3]  = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
13677           pperm_bytes[(4*i)+8]  = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
13678           pperm_bytes[(4*i)+9]  = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
13679           pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
13680           pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
13681         }
13682
13683       for (i = 0; i < 16; i++)
13684         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13685
13686       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13687       emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
13688       break;
13689
13690     default:
13691       gcc_unreachable ();
13692     }
13693
13694   return;
13695 }
13696
13697 /* Expand conditional increment or decrement using adb/sbb instructions.
13698    The default case using setcc followed by the conditional move can be
13699    done by generic code.  */
13700 int
13701 ix86_expand_int_addcc (rtx operands[])
13702 {
13703   enum rtx_code code = GET_CODE (operands[1]);
13704   rtx compare_op;
13705   rtx val = const0_rtx;
13706   bool fpcmp = false;
13707   enum machine_mode mode = GET_MODE (operands[0]);
13708
13709   if (operands[3] != const1_rtx
13710       && operands[3] != constm1_rtx)
13711     return 0;
13712   if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13713                                        ix86_compare_op1, &compare_op))
13714      return 0;
13715   code = GET_CODE (compare_op);
13716
13717   if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13718       || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13719     {
13720       fpcmp = true;
13721       code = ix86_fp_compare_code_to_integer (code);
13722     }
13723
13724   if (code != LTU)
13725     {
13726       val = constm1_rtx;
13727       if (fpcmp)
13728         PUT_CODE (compare_op,
13729                   reverse_condition_maybe_unordered
13730                     (GET_CODE (compare_op)));
13731       else
13732         PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13733     }
13734   PUT_MODE (compare_op, mode);
13735
13736   /* Construct either adc or sbb insn.  */
13737   if ((code == LTU) == (operands[3] == constm1_rtx))
13738     {
13739       switch (GET_MODE (operands[0]))
13740         {
13741           case QImode:
13742             emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13743             break;
13744           case HImode:
13745             emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13746             break;
13747           case SImode:
13748             emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13749             break;
13750           case DImode:
13751             emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13752             break;
13753           default:
13754             gcc_unreachable ();
13755         }
13756     }
13757   else
13758     {
13759       switch (GET_MODE (operands[0]))
13760         {
13761           case QImode:
13762             emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13763             break;
13764           case HImode:
13765             emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13766             break;
13767           case SImode:
13768             emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13769             break;
13770           case DImode:
13771             emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13772             break;
13773           default:
13774             gcc_unreachable ();
13775         }
13776     }
13777   return 1; /* DONE */
13778 }
13779
13780
13781 /* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
13782    works for floating pointer parameters and nonoffsetable memories.
13783    For pushes, it returns just stack offsets; the values will be saved
13784    in the right order.  Maximally three parts are generated.  */
13785
13786 static int
13787 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13788 {
13789   int size;
13790
13791   if (!TARGET_64BIT)
13792     size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13793   else
13794     size = (GET_MODE_SIZE (mode) + 4) / 8;
13795
13796   gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13797   gcc_assert (size >= 2 && size <= 3);
13798
13799   /* Optimize constant pool reference to immediates.  This is used by fp
13800      moves, that force all constants to memory to allow combining.  */
13801   if (MEM_P (operand) && MEM_READONLY_P (operand))
13802     {
13803       rtx tmp = maybe_get_pool_constant (operand);
13804       if (tmp)
13805         operand = tmp;
13806     }
13807
13808   if (MEM_P (operand) && !offsettable_memref_p (operand))
13809     {
13810       /* The only non-offsetable memories we handle are pushes.  */
13811       int ok = push_operand (operand, VOIDmode);
13812
13813       gcc_assert (ok);
13814
13815       operand = copy_rtx (operand);
13816       PUT_MODE (operand, Pmode);
13817       parts[0] = parts[1] = parts[2] = operand;
13818       return size;
13819     }
13820
13821   if (GET_CODE (operand) == CONST_VECTOR)
13822     {
13823       enum machine_mode imode = int_mode_for_mode (mode);
13824       /* Caution: if we looked through a constant pool memory above,
13825          the operand may actually have a different mode now.  That's
13826          ok, since we want to pun this all the way back to an integer.  */
13827       operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13828       gcc_assert (operand != NULL);
13829       mode = imode;
13830     }
13831
13832   if (!TARGET_64BIT)
13833     {
13834       if (mode == DImode)
13835         split_di (&operand, 1, &parts[0], &parts[1]);
13836       else
13837         {
13838           if (REG_P (operand))
13839             {
13840               gcc_assert (reload_completed);
13841               parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13842               parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13843               if (size == 3)
13844                 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13845             }
13846           else if (offsettable_memref_p (operand))
13847             {
13848               operand = adjust_address (operand, SImode, 0);
13849               parts[0] = operand;
13850               parts[1] = adjust_address (operand, SImode, 4);
13851               if (size == 3)
13852                 parts[2] = adjust_address (operand, SImode, 8);
13853             }
13854           else if (GET_CODE (operand) == CONST_DOUBLE)
13855             {
13856               REAL_VALUE_TYPE r;
13857               long l[4];
13858
13859               REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13860               switch (mode)
13861                 {
13862                 case XFmode:
13863                   REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
13864                   parts[2] = gen_int_mode (l[2], SImode);
13865                   break;
13866                 case DFmode:
13867                   REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13868                   break;
13869                 default:
13870                   gcc_unreachable ();
13871                 }
13872               parts[1] = gen_int_mode (l[1], SImode);
13873               parts[0] = gen_int_mode (l[0], SImode);
13874             }
13875           else
13876             gcc_unreachable ();
13877         }
13878     }
13879   else
13880     {
13881       if (mode == TImode)
13882         split_ti (&operand, 1, &parts[0], &parts[1]);
13883       if (mode == XFmode || mode == TFmode)
13884         {
13885           enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13886           if (REG_P (operand))
13887             {
13888               gcc_assert (reload_completed);
13889               parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13890               parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13891             }
13892           else if (offsettable_memref_p (operand))
13893             {
13894               operand = adjust_address (operand, DImode, 0);
13895               parts[0] = operand;
13896               parts[1] = adjust_address (operand, upper_mode, 8);
13897             }
13898           else if (GET_CODE (operand) == CONST_DOUBLE)
13899             {
13900               REAL_VALUE_TYPE r;
13901               long l[4];
13902
13903               REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13904               real_to_target (l, &r, mode);
13905
13906               /* Do not use shift by 32 to avoid warning on 32bit systems.  */
13907               if (HOST_BITS_PER_WIDE_INT >= 64)
13908                 parts[0]
13909                   = gen_int_mode
13910                       ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13911                        + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13912                        DImode);
13913               else
13914                 parts[0] = immed_double_const (l[0], l[1], DImode);
13915
13916               if (upper_mode == SImode)
13917                 parts[1] = gen_int_mode (l[2], SImode);
13918               else if (HOST_BITS_PER_WIDE_INT >= 64)
13919                 parts[1]
13920                   = gen_int_mode
13921                       ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13922                        + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13923                        DImode);
13924               else
13925                 parts[1] = immed_double_const (l[2], l[3], DImode);
13926             }
13927           else
13928             gcc_unreachable ();
13929         }
13930     }
13931
13932   return size;
13933 }
13934
13935 /* Emit insns to perform a move or push of DI, DF, and XF values.
13936    Return false when normal moves are needed; true when all required
13937    insns have been emitted.  Operands 2-4 contain the input values
13938    int the correct order; operands 5-7 contain the output values.  */
13939
13940 void
13941 ix86_split_long_move (rtx operands[])
13942 {
13943   rtx part[2][3];
13944   int nparts;
13945   int push = 0;
13946   int collisions = 0;
13947   enum machine_mode mode = GET_MODE (operands[0]);
13948
13949   /* The DFmode expanders may ask us to move double.
13950      For 64bit target this is single move.  By hiding the fact
13951      here we simplify i386.md splitters.  */
13952   if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13953     {
13954       /* Optimize constant pool reference to immediates.  This is used by
13955          fp moves, that force all constants to memory to allow combining.  */
13956
13957       if (MEM_P (operands[1])
13958           && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13959           && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13960         operands[1] = get_pool_constant (XEXP (operands[1], 0));
13961       if (push_operand (operands[0], VOIDmode))
13962         {
13963           operands[0] = copy_rtx (operands[0]);
13964           PUT_MODE (operands[0], Pmode);
13965         }
13966       else
13967         operands[0] = gen_lowpart (DImode, operands[0]);
13968       operands[1] = gen_lowpart (DImode, operands[1]);
13969       emit_move_insn (operands[0], operands[1]);
13970       return;
13971     }
13972
13973   /* The only non-offsettable memory we handle is push.  */
13974   if (push_operand (operands[0], VOIDmode))
13975     push = 1;
13976   else
13977     gcc_assert (!MEM_P (operands[0])
13978                 || offsettable_memref_p (operands[0]));
13979
13980   nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13981   ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13982
13983   /* When emitting push, take care for source operands on the stack.  */
13984   if (push && MEM_P (operands[1])
13985       && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13986     {
13987       if (nparts == 3)
13988         part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13989                                      XEXP (part[1][2], 0));
13990       part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13991                                    XEXP (part[1][1], 0));
13992     }
13993
13994   /* We need to do copy in the right order in case an address register
13995      of the source overlaps the destination.  */
13996   if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13997     {
13998       if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13999         collisions++;
14000       if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14001         collisions++;
14002       if (nparts == 3
14003           && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
14004         collisions++;
14005
14006       /* Collision in the middle part can be handled by reordering.  */
14007       if (collisions == 1 && nparts == 3
14008           && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14009         {
14010           rtx tmp;
14011           tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
14012           tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
14013         }
14014
14015       /* If there are more collisions, we can't handle it by reordering.
14016          Do an lea to the last part and use only one colliding move.  */
14017       else if (collisions > 1)
14018         {
14019           rtx base;
14020
14021           collisions = 1;
14022
14023           base = part[0][nparts - 1];
14024
14025           /* Handle the case when the last part isn't valid for lea.
14026              Happens in 64-bit mode storing the 12-byte XFmode.  */
14027           if (GET_MODE (base) != Pmode)
14028             base = gen_rtx_REG (Pmode, REGNO (base));
14029
14030           emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
14031           part[1][0] = replace_equiv_address (part[1][0], base);
14032           part[1][1] = replace_equiv_address (part[1][1],
14033                                       plus_constant (base, UNITS_PER_WORD));
14034           if (nparts == 3)
14035             part[1][2] = replace_equiv_address (part[1][2],
14036                                       plus_constant (base, 8));
14037         }
14038     }
14039
14040   if (push)
14041     {
14042       if (!TARGET_64BIT)
14043         {
14044           if (nparts == 3)
14045             {
14046               if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
14047                 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
14048               emit_move_insn (part[0][2], part[1][2]);
14049             }
14050         }
14051       else
14052         {
14053           /* In 64bit mode we don't have 32bit push available.  In case this is
14054              register, it is OK - we will just use larger counterpart.  We also
14055              retype memory - these comes from attempt to avoid REX prefix on
14056              moving of second half of TFmode value.  */
14057           if (GET_MODE (part[1][1]) == SImode)
14058             {
14059               switch (GET_CODE (part[1][1]))
14060                 {
14061                 case MEM:
14062                   part[1][1] = adjust_address (part[1][1], DImode, 0);
14063                   break;
14064
14065                 case REG:
14066                   part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
14067                   break;
14068
14069                 default:
14070                   gcc_unreachable ();
14071                 }
14072
14073               if (GET_MODE (part[1][0]) == SImode)
14074                 part[1][0] = part[1][1];
14075             }
14076         }
14077       emit_move_insn (part[0][1], part[1][1]);
14078       emit_move_insn (part[0][0], part[1][0]);
14079       return;
14080     }
14081
14082   /* Choose correct order to not overwrite the source before it is copied.  */
14083   if ((REG_P (part[0][0])
14084        && REG_P (part[1][1])
14085        && (REGNO (part[0][0]) == REGNO (part[1][1])
14086            || (nparts == 3
14087                && REGNO (part[0][0]) == REGNO (part[1][2]))))
14088       || (collisions > 0
14089           && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
14090     {
14091       if (nparts == 3)
14092         {
14093           operands[2] = part[0][2];
14094           operands[3] = part[0][1];
14095           operands[4] = part[0][0];
14096           operands[5] = part[1][2];
14097           operands[6] = part[1][1];
14098           operands[7] = part[1][0];
14099         }
14100       else
14101         {
14102           operands[2] = part[0][1];
14103           operands[3] = part[0][0];
14104           operands[5] = part[1][1];
14105           operands[6] = part[1][0];
14106         }
14107     }
14108   else
14109     {
14110       if (nparts == 3)
14111         {
14112           operands[2] = part[0][0];
14113           operands[3] = part[0][1];
14114           operands[4] = part[0][2];
14115           operands[5] = part[1][0];
14116           operands[6] = part[1][1];
14117           operands[7] = part[1][2];
14118         }
14119       else
14120         {
14121           operands[2] = part[0][0];
14122           operands[3] = part[0][1];
14123           operands[5] = part[1][0];
14124           operands[6] = part[1][1];
14125         }
14126     }
14127
14128   /* If optimizing for size, attempt to locally unCSE nonzero constants.  */
14129   if (optimize_size)
14130     {
14131       if (CONST_INT_P (operands[5])
14132           && operands[5] != const0_rtx
14133           && REG_P (operands[2]))
14134         {
14135           if (CONST_INT_P (operands[6])
14136               && INTVAL (operands[6]) == INTVAL (operands[5]))
14137             operands[6] = operands[2];
14138
14139           if (nparts == 3
14140               && CONST_INT_P (operands[7])
14141               && INTVAL (operands[7]) == INTVAL (operands[5]))
14142             operands[7] = operands[2];
14143         }
14144
14145       if (nparts == 3
14146           && CONST_INT_P (operands[6])
14147           && operands[6] != const0_rtx
14148           && REG_P (operands[3])
14149           && CONST_INT_P (operands[7])
14150           && INTVAL (operands[7]) == INTVAL (operands[6]))
14151         operands[7] = operands[3];
14152     }
14153
14154   emit_move_insn (operands[2], operands[5]);
14155   emit_move_insn (operands[3], operands[6]);
14156   if (nparts == 3)
14157     emit_move_insn (operands[4], operands[7]);
14158
14159   return;
14160 }
14161
14162 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14163    left shift by a constant, either using a single shift or
14164    a sequence of add instructions.  */
14165
14166 static void
14167 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
14168 {
14169   if (count == 1)
14170     {
14171       emit_insn ((mode == DImode
14172                   ? gen_addsi3
14173                   : gen_adddi3) (operand, operand, operand));
14174     }
14175   else if (!optimize_size
14176            && count * ix86_cost->add <= ix86_cost->shift_const)
14177     {
14178       int i;
14179       for (i=0; i<count; i++)
14180         {
14181           emit_insn ((mode == DImode
14182                       ? gen_addsi3
14183                       : gen_adddi3) (operand, operand, operand));
14184         }
14185     }
14186   else
14187     emit_insn ((mode == DImode
14188                 ? gen_ashlsi3
14189                 : gen_ashldi3) (operand, operand, GEN_INT (count)));
14190 }
14191
14192 void
14193 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
14194 {
14195   rtx low[2], high[2];
14196   int count;
14197   const int single_width = mode == DImode ? 32 : 64;
14198
14199   if (CONST_INT_P (operands[2]))
14200     {
14201       (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14202       count = INTVAL (operands[2]) & (single_width * 2 - 1);
14203
14204       if (count >= single_width)
14205         {
14206           emit_move_insn (high[0], low[1]);
14207           emit_move_insn (low[0], const0_rtx);
14208
14209           if (count > single_width)
14210             ix86_expand_ashl_const (high[0], count - single_width, mode);
14211         }
14212       else
14213         {
14214           if (!rtx_equal_p (operands[0], operands[1]))
14215             emit_move_insn (operands[0], operands[1]);
14216           emit_insn ((mode == DImode
14217                      ? gen_x86_shld_1
14218                      : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
14219           ix86_expand_ashl_const (low[0], count, mode);
14220         }
14221       return;
14222     }
14223
14224   (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14225
14226   if (operands[1] == const1_rtx)
14227     {
14228       /* Assuming we've chosen a QImode capable registers, then 1 << N
14229          can be done with two 32/64-bit shifts, no branches, no cmoves.  */
14230       if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
14231         {
14232           rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
14233
14234           ix86_expand_clear (low[0]);
14235           ix86_expand_clear (high[0]);
14236           emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
14237
14238           d = gen_lowpart (QImode, low[0]);
14239           d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14240           s = gen_rtx_EQ (QImode, flags, const0_rtx);
14241           emit_insn (gen_rtx_SET (VOIDmode, d, s));
14242
14243           d = gen_lowpart (QImode, high[0]);
14244           d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14245           s = gen_rtx_NE (QImode, flags, const0_rtx);
14246           emit_insn (gen_rtx_SET (VOIDmode, d, s));
14247         }
14248
14249       /* Otherwise, we can get the same results by manually performing
14250          a bit extract operation on bit 5/6, and then performing the two
14251          shifts.  The two methods of getting 0/1 into low/high are exactly
14252          the same size.  Avoiding the shift in the bit extract case helps
14253          pentium4 a bit; no one else seems to care much either way.  */
14254       else
14255         {
14256           rtx x;
14257
14258           if (TARGET_PARTIAL_REG_STALL && !optimize_size)
14259             x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
14260           else
14261             x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
14262           emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
14263
14264           emit_insn ((mode == DImode
14265                       ? gen_lshrsi3
14266                       : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
14267           emit_insn ((mode == DImode
14268                       ? gen_andsi3
14269                       : gen_anddi3) (high[0], high[0], GEN_INT (1)));
14270           emit_move_insn (low[0], high[0]);
14271           emit_insn ((mode == DImode
14272                       ? gen_xorsi3
14273                       : gen_xordi3) (low[0], low[0], GEN_INT (1)));
14274         }
14275
14276       emit_insn ((mode == DImode
14277                     ? gen_ashlsi3
14278                     : gen_ashldi3) (low[0], low[0], operands[2]));
14279       emit_insn ((mode == DImode
14280                     ? gen_ashlsi3
14281                     : gen_ashldi3) (high[0], high[0], operands[2]));
14282       return;
14283     }
14284
14285   if (operands[1] == constm1_rtx)
14286     {
14287       /* For -1 << N, we can avoid the shld instruction, because we
14288          know that we're shifting 0...31/63 ones into a -1.  */
14289       emit_move_insn (low[0], constm1_rtx);
14290       if (optimize_size)
14291         emit_move_insn (high[0], low[0]);
14292       else
14293         emit_move_insn (high[0], constm1_rtx);
14294     }
14295   else
14296     {
14297       if (!rtx_equal_p (operands[0], operands[1]))
14298         emit_move_insn (operands[0], operands[1]);
14299
14300       (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14301       emit_insn ((mode == DImode
14302                   ? gen_x86_shld_1
14303                   : gen_x86_64_shld) (high[0], low[0], operands[2]));
14304     }
14305
14306   emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
14307
14308   if (TARGET_CMOVE && scratch)
14309     {
14310       ix86_expand_clear (scratch);
14311       emit_insn ((mode == DImode
14312                   ? gen_x86_shift_adj_1
14313                   : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
14314     }
14315   else
14316     emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
14317 }
14318
14319 void
14320 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
14321 {
14322   rtx low[2], high[2];
14323   int count;
14324   const int single_width = mode == DImode ? 32 : 64;
14325
14326   if (CONST_INT_P (operands[2]))
14327     {
14328       (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14329       count = INTVAL (operands[2]) & (single_width * 2 - 1);
14330
14331       if (count == single_width * 2 - 1)
14332         {
14333           emit_move_insn (high[0], high[1]);
14334           emit_insn ((mode == DImode
14335                       ? gen_ashrsi3
14336                       : gen_ashrdi3) (high[0], high[0],
14337                                       GEN_INT (single_width - 1)));
14338           emit_move_insn (low[0], high[0]);
14339
14340         }
14341       else if (count >= single_width)
14342         {
14343           emit_move_insn (low[0], high[1]);
14344           emit_move_insn (high[0], low[0]);
14345           emit_insn ((mode == DImode
14346                       ? gen_ashrsi3
14347                       : gen_ashrdi3) (high[0], high[0],
14348                                       GEN_INT (single_width - 1)));
14349           if (count > single_width)
14350             emit_insn ((mode == DImode
14351                         ? gen_ashrsi3
14352                         : gen_ashrdi3) (low[0], low[0],
14353                                         GEN_INT (count - single_width)));
14354         }
14355       else
14356         {
14357           if (!rtx_equal_p (operands[0], operands[1]))
14358             emit_move_insn (operands[0], operands[1]);
14359           emit_insn ((mode == DImode
14360                       ? gen_x86_shrd_1
14361                       : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14362           emit_insn ((mode == DImode
14363                       ? gen_ashrsi3
14364                       : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
14365         }
14366     }
14367   else
14368     {
14369       if (!rtx_equal_p (operands[0], operands[1]))
14370         emit_move_insn (operands[0], operands[1]);
14371
14372       (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14373
14374       emit_insn ((mode == DImode
14375                   ? gen_x86_shrd_1
14376                   : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14377       emit_insn ((mode == DImode
14378                   ? gen_ashrsi3
14379                   : gen_ashrdi3)  (high[0], high[0], operands[2]));
14380
14381       if (TARGET_CMOVE && scratch)
14382         {
14383           emit_move_insn (scratch, high[0]);
14384           emit_insn ((mode == DImode
14385                       ? gen_ashrsi3
14386                       : gen_ashrdi3) (scratch, scratch,
14387                                       GEN_INT (single_width - 1)));
14388           emit_insn ((mode == DImode
14389                       ? gen_x86_shift_adj_1
14390                       : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14391                                          scratch));
14392         }
14393       else
14394         emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
14395     }
14396 }
14397
14398 void
14399 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
14400 {
14401   rtx low[2], high[2];
14402   int count;
14403   const int single_width = mode == DImode ? 32 : 64;
14404
14405   if (CONST_INT_P (operands[2]))
14406     {
14407       (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14408       count = INTVAL (operands[2]) & (single_width * 2 - 1);
14409
14410       if (count >= single_width)
14411         {
14412           emit_move_insn (low[0], high[1]);
14413           ix86_expand_clear (high[0]);
14414
14415           if (count > single_width)
14416             emit_insn ((mode == DImode
14417                         ? gen_lshrsi3
14418                         : gen_lshrdi3) (low[0], low[0],
14419                                         GEN_INT (count - single_width)));
14420         }
14421       else
14422         {
14423           if (!rtx_equal_p (operands[0], operands[1]))
14424             emit_move_insn (operands[0], operands[1]);
14425           emit_insn ((mode == DImode
14426                       ? gen_x86_shrd_1
14427                       : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14428           emit_insn ((mode == DImode
14429                       ? gen_lshrsi3
14430                       : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
14431         }
14432     }
14433   else
14434     {
14435       if (!rtx_equal_p (operands[0], operands[1]))
14436         emit_move_insn (operands[0], operands[1]);
14437
14438       (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14439
14440       emit_insn ((mode == DImode
14441                   ? gen_x86_shrd_1
14442                   : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14443       emit_insn ((mode == DImode
14444                   ? gen_lshrsi3
14445                   : gen_lshrdi3) (high[0], high[0], operands[2]));
14446
14447       /* Heh.  By reversing the arguments, we can reuse this pattern.  */
14448       if (TARGET_CMOVE && scratch)
14449         {
14450           ix86_expand_clear (scratch);
14451           emit_insn ((mode == DImode
14452                       ? gen_x86_shift_adj_1
14453                       : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14454                                                scratch));
14455         }
14456       else
14457         emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
14458     }
14459 }
14460
14461 /* Predict just emitted jump instruction to be taken with probability PROB.  */
14462 static void
14463 predict_jump (int prob)
14464 {
14465   rtx insn = get_last_insn ();
14466   gcc_assert (JUMP_P (insn));
14467   REG_NOTES (insn)
14468     = gen_rtx_EXPR_LIST (REG_BR_PROB,
14469                          GEN_INT (prob),
14470                          REG_NOTES (insn));
14471 }
14472
14473 /* Helper function for the string operations below.  Dest VARIABLE whether
14474    it is aligned to VALUE bytes.  If true, jump to the label.  */
14475 static rtx
14476 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
14477 {
14478   rtx label = gen_label_rtx ();
14479   rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14480   if (GET_MODE (variable) == DImode)
14481     emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14482   else
14483     emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14484   emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
14485                            1, label);
14486   if (epilogue)
14487     predict_jump (REG_BR_PROB_BASE * 50 / 100);
14488   else
14489     predict_jump (REG_BR_PROB_BASE * 90 / 100);
14490   return label;
14491 }
14492
14493 /* Adjust COUNTER by the VALUE.  */
14494 static void
14495 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14496 {
14497   if (GET_MODE (countreg) == DImode)
14498     emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14499   else
14500     emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14501 }
14502
14503 /* Zero extend possibly SImode EXP to Pmode register.  */
14504 rtx
14505 ix86_zero_extend_to_Pmode (rtx exp)
14506 {
14507   rtx r;
14508   if (GET_MODE (exp) == VOIDmode)
14509     return force_reg (Pmode, exp);
14510   if (GET_MODE (exp) == Pmode)
14511     return copy_to_mode_reg (Pmode, exp);
14512   r = gen_reg_rtx (Pmode);
14513   emit_insn (gen_zero_extendsidi2 (r, exp));
14514   return r;
14515 }
14516
14517 /* Divide COUNTREG by SCALE.  */
14518 static rtx
14519 scale_counter (rtx countreg, int scale)
14520 {
14521   rtx sc;
14522   rtx piece_size_mask;
14523
14524   if (scale == 1)
14525     return countreg;
14526   if (CONST_INT_P (countreg))
14527     return GEN_INT (INTVAL (countreg) / scale);
14528   gcc_assert (REG_P (countreg));
14529
14530   piece_size_mask = GEN_INT (scale - 1);
14531   sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14532                             GEN_INT (exact_log2 (scale)),
14533                             NULL, 1, OPTAB_DIRECT);
14534   return sc;
14535 }
14536
14537 /* Return mode for the memcpy/memset loop counter.  Prefer SImode over
14538    DImode for constant loop counts.  */
14539
14540 static enum machine_mode
14541 counter_mode (rtx count_exp)
14542 {
14543   if (GET_MODE (count_exp) != VOIDmode)
14544     return GET_MODE (count_exp);
14545   if (GET_CODE (count_exp) != CONST_INT)
14546     return Pmode;
14547   if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14548     return DImode;
14549   return SImode;
14550 }
14551
14552 /* When SRCPTR is non-NULL, output simple loop to move memory
14553    pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14554    overall size is COUNT specified in bytes.  When SRCPTR is NULL, output the
14555    equivalent loop to set memory by VALUE (supposed to be in MODE).
14556
14557    The size is rounded down to whole number of chunk size moved at once.
14558    SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info.  */
14559
14560
14561 static void
14562 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14563                                rtx destptr, rtx srcptr, rtx value,
14564                                rtx count, enum machine_mode mode, int unroll,
14565                                int expected_size)
14566 {
14567   rtx out_label, top_label, iter, tmp;
14568   enum machine_mode iter_mode = counter_mode (count);
14569   rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14570   rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14571   rtx size;
14572   rtx x_addr;
14573   rtx y_addr;
14574   int i;
14575
14576   top_label = gen_label_rtx ();
14577   out_label = gen_label_rtx ();
14578   iter = gen_reg_rtx (iter_mode);
14579
14580   size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14581                               NULL, 1, OPTAB_DIRECT);
14582   /* Those two should combine.  */
14583   if (piece_size == const1_rtx)
14584     {
14585       emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14586                                true, out_label);
14587       predict_jump (REG_BR_PROB_BASE * 10 / 100);
14588     }
14589   emit_move_insn (iter, const0_rtx);
14590
14591   emit_label (top_label);
14592
14593   tmp = convert_modes (Pmode, iter_mode, iter, true);
14594   x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14595   destmem = change_address (destmem, mode, x_addr);
14596
14597   if (srcmem)
14598     {
14599       y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14600       srcmem = change_address (srcmem, mode, y_addr);
14601
14602       /* When unrolling for chips that reorder memory reads and writes,
14603          we can save registers by using single temporary.
14604          Also using 4 temporaries is overkill in 32bit mode.  */
14605       if (!TARGET_64BIT && 0)
14606         {
14607           for (i = 0; i < unroll; i++)
14608             {
14609               if (i)
14610                 {
14611                   destmem =
14612                     adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14613                   srcmem =
14614                     adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14615                 }
14616               emit_move_insn (destmem, srcmem);
14617             }
14618         }
14619       else
14620         {
14621           rtx tmpreg[4];
14622           gcc_assert (unroll <= 4);
14623           for (i = 0; i < unroll; i++)
14624             {
14625               tmpreg[i] = gen_reg_rtx (mode);
14626               if (i)
14627                 {
14628                   srcmem =
14629                     adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14630                 }
14631               emit_move_insn (tmpreg[i], srcmem);
14632             }
14633           for (i = 0; i < unroll; i++)
14634             {
14635               if (i)
14636                 {
14637                   destmem =
14638                     adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14639                 }
14640               emit_move_insn (destmem, tmpreg[i]);
14641             }
14642         }
14643     }
14644   else
14645     for (i = 0; i < unroll; i++)
14646       {
14647         if (i)
14648           destmem =
14649             adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14650         emit_move_insn (destmem, value);
14651       }
14652
14653   tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14654                              true, OPTAB_LIB_WIDEN);
14655   if (tmp != iter)
14656     emit_move_insn (iter, tmp);
14657
14658   emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14659                            true, top_label);
14660   if (expected_size != -1)
14661     {
14662       expected_size /= GET_MODE_SIZE (mode) * unroll;
14663       if (expected_size == 0)
14664         predict_jump (0);
14665       else if (expected_size > REG_BR_PROB_BASE)
14666         predict_jump (REG_BR_PROB_BASE - 1);
14667       else
14668         predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14669     }
14670   else
14671     predict_jump (REG_BR_PROB_BASE * 80 / 100);
14672   iter = ix86_zero_extend_to_Pmode (iter);
14673   tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14674                              true, OPTAB_LIB_WIDEN);
14675   if (tmp != destptr)
14676     emit_move_insn (destptr, tmp);
14677   if (srcptr)
14678     {
14679       tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14680                                  true, OPTAB_LIB_WIDEN);
14681       if (tmp != srcptr)
14682         emit_move_insn (srcptr, tmp);
14683     }
14684   emit_label (out_label);
14685 }
14686
14687 /* Output "rep; mov" instruction.
14688    Arguments have same meaning as for previous function */
14689 static void
14690 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14691                            rtx destptr, rtx srcptr,
14692                            rtx count,
14693                            enum machine_mode mode)
14694 {
14695   rtx destexp;
14696   rtx srcexp;
14697   rtx countreg;
14698
14699   /* If the size is known, it is shorter to use rep movs.  */
14700   if (mode == QImode && CONST_INT_P (count)
14701       && !(INTVAL (count) & 3))
14702     mode = SImode;
14703
14704   if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14705     destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14706   if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14707     srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14708   countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14709   if (mode != QImode)
14710     {
14711       destexp = gen_rtx_ASHIFT (Pmode, countreg,
14712                                 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14713       destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14714       srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14715                                GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14716       srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14717     }
14718   else
14719     {
14720       destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14721       srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14722     }
14723   emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14724                           destexp, srcexp));
14725 }
14726
14727 /* Output "rep; stos" instruction.
14728    Arguments have same meaning as for previous function */
14729 static void
14730 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14731                             rtx count,
14732                             enum machine_mode mode)
14733 {
14734   rtx destexp;
14735   rtx countreg;
14736
14737   if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14738     destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14739   value = force_reg (mode, gen_lowpart (mode, value));
14740   countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14741   if (mode != QImode)
14742     {
14743       destexp = gen_rtx_ASHIFT (Pmode, countreg,
14744                                 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14745       destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14746     }
14747   else
14748     destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14749   emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14750 }
14751
14752 static void
14753 emit_strmov (rtx destmem, rtx srcmem,
14754              rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14755 {
14756   rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14757   rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14758   emit_insn (gen_strmov (destptr, dest, srcptr, src));
14759 }
14760
14761 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST.  */
14762 static void
14763 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14764                         rtx destptr, rtx srcptr, rtx count, int max_size)
14765 {
14766   rtx src, dest;
14767   if (CONST_INT_P (count))
14768     {
14769       HOST_WIDE_INT countval = INTVAL (count);
14770       int offset = 0;
14771
14772       if ((countval & 0x10) && max_size > 16)
14773         {
14774           if (TARGET_64BIT)
14775             {
14776               emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14777               emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14778             }
14779           else
14780             gcc_unreachable ();
14781           offset += 16;
14782         }
14783       if ((countval & 0x08) && max_size > 8)
14784         {
14785           if (TARGET_64BIT)
14786             emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14787           else
14788             {
14789               emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14790               emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14791             }
14792           offset += 8;
14793         }
14794       if ((countval & 0x04) && max_size > 4)
14795         {
14796           emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14797           offset += 4;
14798         }
14799       if ((countval & 0x02) && max_size > 2)
14800         {
14801           emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14802           offset += 2;
14803         }
14804       if ((countval & 0x01) && max_size > 1)
14805         {
14806           emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14807           offset += 1;
14808         }
14809       return;
14810     }
14811   if (max_size > 8)
14812     {
14813       count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14814                                     count, 1, OPTAB_DIRECT);
14815       expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14816                                      count, QImode, 1, 4);
14817       return;
14818     }
14819
14820   /* When there are stringops, we can cheaply increase dest and src pointers.
14821      Otherwise we save code size by maintaining offset (zero is readily
14822      available from preceding rep operation) and using x86 addressing modes.
14823    */
14824   if (TARGET_SINGLE_STRINGOP)
14825     {
14826       if (max_size > 4)
14827         {
14828           rtx label = ix86_expand_aligntest (count, 4, true);
14829           src = change_address (srcmem, SImode, srcptr);
14830           dest = change_address (destmem, SImode, destptr);
14831           emit_insn (gen_strmov (destptr, dest, srcptr, src));
14832           emit_label (label);
14833           LABEL_NUSES (label) = 1;
14834         }
14835       if (max_size > 2)
14836         {
14837           rtx label = ix86_expand_aligntest (count, 2, true);
14838           src = change_address (srcmem, HImode, srcptr);
14839           dest = change_address (destmem, HImode, destptr);
14840           emit_insn (gen_strmov (destptr, dest, srcptr, src));
14841           emit_label (label);
14842           LABEL_NUSES (label) = 1;
14843         }
14844       if (max_size > 1)
14845         {
14846           rtx label = ix86_expand_aligntest (count, 1, true);
14847           src = change_address (srcmem, QImode, srcptr);
14848           dest = change_address (destmem, QImode, destptr);
14849           emit_insn (gen_strmov (destptr, dest, srcptr, src));
14850           emit_label (label);
14851           LABEL_NUSES (label) = 1;
14852         }
14853     }
14854   else
14855     {
14856       rtx offset = force_reg (Pmode, const0_rtx);
14857       rtx tmp;
14858
14859       if (max_size > 4)
14860         {
14861           rtx label = ix86_expand_aligntest (count, 4, true);
14862           src = change_address (srcmem, SImode, srcptr);
14863           dest = change_address (destmem, SImode, destptr);
14864           emit_move_insn (dest, src);
14865           tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14866                                      true, OPTAB_LIB_WIDEN);
14867           if (tmp != offset)
14868             emit_move_insn (offset, tmp);
14869           emit_label (label);
14870           LABEL_NUSES (label) = 1;
14871         }
14872       if (max_size > 2)
14873         {
14874           rtx label = ix86_expand_aligntest (count, 2, true);
14875           tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14876           src = change_address (srcmem, HImode, tmp);
14877           tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14878           dest = change_address (destmem, HImode, tmp);
14879           emit_move_insn (dest, src);
14880           tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14881                                      true, OPTAB_LIB_WIDEN);
14882           if (tmp != offset)
14883             emit_move_insn (offset, tmp);
14884           emit_label (label);
14885           LABEL_NUSES (label) = 1;
14886         }
14887       if (max_size > 1)
14888         {
14889           rtx label = ix86_expand_aligntest (count, 1, true);
14890           tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14891           src = change_address (srcmem, QImode, tmp);
14892           tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14893           dest = change_address (destmem, QImode, tmp);
14894           emit_move_insn (dest, src);
14895           emit_label (label);
14896           LABEL_NUSES (label) = 1;
14897         }
14898     }
14899 }
14900
14901 /* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
14902 static void
14903 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14904                                  rtx count, int max_size)
14905 {
14906   count =
14907     expand_simple_binop (counter_mode (count), AND, count,
14908                          GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14909   expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14910                                  gen_lowpart (QImode, value), count, QImode,
14911                                  1, max_size / 2);
14912 }
14913
14914 /* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
14915 static void
14916 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14917 {
14918   rtx dest;
14919
14920   if (CONST_INT_P (count))
14921     {
14922       HOST_WIDE_INT countval = INTVAL (count);
14923       int offset = 0;
14924
14925       if ((countval & 0x10) && max_size > 16)
14926         {
14927           if (TARGET_64BIT)
14928             {
14929               dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14930               emit_insn (gen_strset (destptr, dest, value));
14931               dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14932               emit_insn (gen_strset (destptr, dest, value));
14933             }
14934           else
14935             gcc_unreachable ();
14936           offset += 16;
14937         }
14938       if ((countval & 0x08) && max_size > 8)
14939         {
14940           if (TARGET_64BIT)
14941             {
14942               dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14943               emit_insn (gen_strset (destptr, dest, value));
14944             }
14945           else
14946             {
14947               dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14948               emit_insn (gen_strset (destptr, dest, value));
14949               dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14950               emit_insn (gen_strset (destptr, dest, value));
14951             }
14952           offset += 8;
14953         }
14954       if ((countval & 0x04) && max_size > 4)
14955         {
14956           dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14957           emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14958           offset += 4;
14959         }
14960       if ((countval & 0x02) && max_size > 2)
14961         {
14962           dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14963           emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14964           offset += 2;
14965         }
14966       if ((countval & 0x01) && max_size > 1)
14967         {
14968           dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14969           emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14970           offset += 1;
14971         }
14972       return;
14973     }
14974   if (max_size > 32)
14975     {
14976       expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14977       return;
14978     }
14979   if (max_size > 16)
14980     {
14981       rtx label = ix86_expand_aligntest (count, 16, true);
14982       if (TARGET_64BIT)
14983         {
14984           dest = change_address (destmem, DImode, destptr);
14985           emit_insn (gen_strset (destptr, dest, value));
14986           emit_insn (gen_strset (destptr, dest, value));
14987         }
14988       else
14989         {
14990           dest = change_address (destmem, SImode, destptr);
14991           emit_insn (gen_strset (destptr, dest, value));
14992           emit_insn (gen_strset (destptr, dest, value));
14993           emit_insn (gen_strset (destptr, dest, value));
14994           emit_insn (gen_strset (destptr, dest, value));
14995         }
14996       emit_label (label);
14997       LABEL_NUSES (label) = 1;
14998     }
14999   if (max_size > 8)
15000     {
15001       rtx label = ix86_expand_aligntest (count, 8, true);
15002       if (TARGET_64BIT)
15003         {
15004           dest = change_address (destmem, DImode, destptr);
15005           emit_insn (gen_strset (destptr, dest, value));
15006         }
15007       else
15008         {
15009           dest = change_address (destmem, SImode, destptr);
15010           emit_insn (gen_strset (destptr, dest, value));
15011           emit_insn (gen_strset (destptr, dest, value));
15012         }
15013       emit_label (label);
15014       LABEL_NUSES (label) = 1;
15015     }
15016   if (max_size > 4)
15017     {
15018       rtx label = ix86_expand_aligntest (count, 4, true);
15019       dest = change_address (destmem, SImode, destptr);
15020       emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15021       emit_label (label);
15022       LABEL_NUSES (label) = 1;
15023     }
15024   if (max_size > 2)
15025     {
15026       rtx label = ix86_expand_aligntest (count, 2, true);
15027       dest = change_address (destmem, HImode, destptr);
15028       emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15029       emit_label (label);
15030       LABEL_NUSES (label) = 1;
15031     }
15032   if (max_size > 1)
15033     {
15034       rtx label = ix86_expand_aligntest (count, 1, true);
15035       dest = change_address (destmem, QImode, destptr);
15036       emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15037       emit_label (label);
15038       LABEL_NUSES (label) = 1;
15039     }
15040 }
15041
15042 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
15043    DESIRED_ALIGNMENT.  */
15044 static void
15045 expand_movmem_prologue (rtx destmem, rtx srcmem,
15046                         rtx destptr, rtx srcptr, rtx count,
15047                         int align, int desired_alignment)
15048 {
15049   if (align <= 1 && desired_alignment > 1)
15050     {
15051       rtx label = ix86_expand_aligntest (destptr, 1, false);
15052       srcmem = change_address (srcmem, QImode, srcptr);
15053       destmem = change_address (destmem, QImode, destptr);
15054       emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15055       ix86_adjust_counter (count, 1);
15056       emit_label (label);
15057       LABEL_NUSES (label) = 1;
15058     }
15059   if (align <= 2 && desired_alignment > 2)
15060     {
15061       rtx label = ix86_expand_aligntest (destptr, 2, false);
15062       srcmem = change_address (srcmem, HImode, srcptr);
15063       destmem = change_address (destmem, HImode, destptr);
15064       emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15065       ix86_adjust_counter (count, 2);
15066       emit_label (label);
15067       LABEL_NUSES (label) = 1;
15068     }
15069   if (align <= 4 && desired_alignment > 4)
15070     {
15071       rtx label = ix86_expand_aligntest (destptr, 4, false);
15072       srcmem = change_address (srcmem, SImode, srcptr);
15073       destmem = change_address (destmem, SImode, destptr);
15074       emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15075       ix86_adjust_counter (count, 4);
15076       emit_label (label);
15077       LABEL_NUSES (label) = 1;
15078     }
15079   gcc_assert (desired_alignment <= 8);
15080 }
15081
15082 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15083    DESIRED_ALIGNMENT.  */
15084 static void
15085 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
15086                         int align, int desired_alignment)
15087 {
15088   if (align <= 1 && desired_alignment > 1)
15089     {
15090       rtx label = ix86_expand_aligntest (destptr, 1, false);
15091       destmem = change_address (destmem, QImode, destptr);
15092       emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
15093       ix86_adjust_counter (count, 1);
15094       emit_label (label);
15095       LABEL_NUSES (label) = 1;
15096     }
15097   if (align <= 2 && desired_alignment > 2)
15098     {
15099       rtx label = ix86_expand_aligntest (destptr, 2, false);
15100       destmem = change_address (destmem, HImode, destptr);
15101       emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
15102       ix86_adjust_counter (count, 2);
15103       emit_label (label);
15104       LABEL_NUSES (label) = 1;
15105     }
15106   if (align <= 4 && desired_alignment > 4)
15107     {
15108       rtx label = ix86_expand_aligntest (destptr, 4, false);
15109       destmem = change_address (destmem, SImode, destptr);
15110       emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
15111       ix86_adjust_counter (count, 4);
15112       emit_label (label);
15113       LABEL_NUSES (label) = 1;
15114     }
15115   gcc_assert (desired_alignment <= 8);
15116 }
15117
15118 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation.  */
15119 static enum stringop_alg
15120 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
15121             int *dynamic_check)
15122 {
15123   const struct stringop_algs * algs;
15124   /* Algorithms using the rep prefix want at least edi and ecx;
15125      additionally, memset wants eax and memcpy wants esi.  Don't
15126      consider such algorithms if the user has appropriated those
15127      registers for their own purposes.  */
15128   bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
15129                              || (memset
15130                                  ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
15131
15132 #define ALG_USABLE_P(alg) (rep_prefix_usable                    \
15133                            || (alg != rep_prefix_1_byte         \
15134                                && alg != rep_prefix_4_byte      \
15135                                && alg != rep_prefix_8_byte))
15136
15137   *dynamic_check = -1;
15138   if (memset)
15139     algs = &ix86_cost->memset[TARGET_64BIT != 0];
15140   else
15141     algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
15142   if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
15143     return stringop_alg;
15144   /* rep; movq or rep; movl is the smallest variant.  */
15145   else if (optimize_size)
15146     {
15147       if (!count || (count & 3))
15148         return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
15149       else
15150         return rep_prefix_usable ? rep_prefix_4_byte : loop;
15151     }
15152   /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15153    */
15154   else if (expected_size != -1 && expected_size < 4)
15155     return loop_1_byte;
15156   else if (expected_size != -1)
15157     {
15158       unsigned int i;
15159       enum stringop_alg alg = libcall;
15160       for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15161         {
15162           /* We get here if the algorithms that were not libcall-based
15163              were rep-prefix based and we are unable to use rep prefixes
15164              based on global register usage.  Break out of the loop and
15165              use the heuristic below.  */
15166           if (algs->size[i].max == 0)
15167             break;
15168           if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
15169             {
15170               enum stringop_alg candidate = algs->size[i].alg;
15171
15172               if (candidate != libcall && ALG_USABLE_P (candidate))
15173                 alg = candidate;
15174               /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15175                  last non-libcall inline algorithm.  */
15176               if (TARGET_INLINE_ALL_STRINGOPS)
15177                 {
15178                   /* When the current size is best to be copied by a libcall,
15179                      but we are still forced to inline, run the heuristic below
15180                      that will pick code for medium sized blocks.  */
15181                   if (alg != libcall)
15182                     return alg;
15183                   break;
15184                 }
15185               else if (ALG_USABLE_P (candidate))
15186                 return candidate;
15187             }
15188         }
15189       gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
15190     }
15191   /* When asked to inline the call anyway, try to pick meaningful choice.
15192      We look for maximal size of block that is faster to copy by hand and
15193      take blocks of at most of that size guessing that average size will
15194      be roughly half of the block.
15195
15196      If this turns out to be bad, we might simply specify the preferred
15197      choice in ix86_costs.  */
15198   if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15199       && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
15200     {
15201       int max = -1;
15202       enum stringop_alg alg;
15203       int i;
15204       bool any_alg_usable_p = true;
15205
15206       for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15207         {
15208           enum stringop_alg candidate = algs->size[i].alg;
15209           any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
15210
15211           if (candidate != libcall && candidate
15212               && ALG_USABLE_P (candidate))
15213               max = algs->size[i].max;
15214         }
15215       /* If there aren't any usable algorithms, then recursing on
15216          smaller sizes isn't going to find anything.  Just return the
15217          simple byte-at-a-time copy loop.  */
15218       if (!any_alg_usable_p)
15219         {
15220           /* Pick something reasonable.  */
15221           if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15222             *dynamic_check = 128;
15223           return loop_1_byte;
15224         }
15225       if (max == -1)
15226         max = 4096;
15227       alg = decide_alg (count, max / 2, memset, dynamic_check);
15228       gcc_assert (*dynamic_check == -1);
15229       gcc_assert (alg != libcall);
15230       if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15231         *dynamic_check = max;
15232       return alg;
15233     }
15234   return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
15235 #undef ALG_USABLE_P
15236 }
15237
15238 /* Decide on alignment.  We know that the operand is already aligned to ALIGN
15239    (ALIGN can be based on profile feedback and thus it is not 100% guaranteed).  */
15240 static int
15241 decide_alignment (int align,
15242                   enum stringop_alg alg,
15243                   int expected_size)
15244 {
15245   int desired_align = 0;
15246   switch (alg)
15247     {
15248       case no_stringop:
15249         gcc_unreachable ();
15250       case loop:
15251       case unrolled_loop:
15252         desired_align = GET_MODE_SIZE (Pmode);
15253         break;
15254       case rep_prefix_8_byte:
15255         desired_align = 8;
15256         break;
15257       case rep_prefix_4_byte:
15258         /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15259            copying whole cacheline at once.  */
15260         if (TARGET_PENTIUMPRO)
15261           desired_align = 8;
15262         else
15263           desired_align = 4;
15264         break;
15265       case rep_prefix_1_byte:
15266         /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15267            copying whole cacheline at once.  */
15268         if (TARGET_PENTIUMPRO)
15269           desired_align = 8;
15270         else
15271           desired_align = 1;
15272         break;
15273       case loop_1_byte:
15274         desired_align = 1;
15275         break;
15276       case libcall:
15277         return 0;
15278     }
15279
15280   if (optimize_size)
15281     desired_align = 1;
15282   if (desired_align < align)
15283     desired_align = align;
15284   if (expected_size != -1 && expected_size < 4)
15285     desired_align = align;
15286   return desired_align;
15287 }
15288
15289 /* Return the smallest power of 2 greater than VAL.  */
15290 static int
15291 smallest_pow2_greater_than (int val)
15292 {
15293   int ret = 1;
15294   while (ret <= val)
15295     ret <<= 1;
15296   return ret;
15297 }
15298
15299 /* Expand string move (memcpy) operation.  Use i386 string operations when
15300    profitable.  expand_setmem contains similar code.  The code depends upon
15301    architecture, block size and alignment, but always has the same
15302    overall structure:
15303
15304    1) Prologue guard: Conditional that jumps up to epilogues for small
15305       blocks that can be handled by epilogue alone.  This is faster but
15306       also needed for correctness, since prologue assume the block is larger
15307       than the desired alignment.
15308
15309       Optional dynamic check for size and libcall for large
15310       blocks is emitted here too, with -minline-stringops-dynamically.
15311
15312    2) Prologue: copy first few bytes in order to get destination aligned
15313       to DESIRED_ALIGN.  It is emitted only when ALIGN is less than
15314       DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15315       We emit either a jump tree on power of two sized blocks, or a byte loop.
15316
15317    3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15318       with specified algorithm.
15319
15320    4) Epilogue: code copying tail of the block that is too small to be
15321       handled by main body (or up to size guarded by prologue guard).  */
15322
15323 int
15324 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
15325                     rtx expected_align_exp, rtx expected_size_exp)
15326 {
15327   rtx destreg;
15328   rtx srcreg;
15329   rtx label = NULL;
15330   rtx tmp;
15331   rtx jump_around_label = NULL;
15332   HOST_WIDE_INT align = 1;
15333   unsigned HOST_WIDE_INT count = 0;
15334   HOST_WIDE_INT expected_size = -1;
15335   int size_needed = 0, epilogue_size_needed;
15336   int desired_align = 0;
15337   enum stringop_alg alg;
15338   int dynamic_check;
15339
15340   if (CONST_INT_P (align_exp))
15341     align = INTVAL (align_exp);
15342   /* i386 can do misaligned access on reasonably increased cost.  */
15343   if (CONST_INT_P (expected_align_exp)
15344       && INTVAL (expected_align_exp) > align)
15345     align = INTVAL (expected_align_exp);
15346   if (CONST_INT_P (count_exp))
15347     count = expected_size = INTVAL (count_exp);
15348   if (CONST_INT_P (expected_size_exp) && count == 0)
15349     expected_size = INTVAL (expected_size_exp);
15350
15351   /* Make sure we don't need to care about overflow later on.  */
15352   if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15353     return 0;
15354
15355   /* Step 0: Decide on preferred algorithm, desired alignment and
15356      size of chunks to be copied by main loop.  */
15357
15358   alg = decide_alg (count, expected_size, false, &dynamic_check);
15359   desired_align = decide_alignment (align, alg, expected_size);
15360
15361   if (!TARGET_ALIGN_STRINGOPS)
15362     align = desired_align;
15363
15364   if (alg == libcall)
15365     return 0;
15366   gcc_assert (alg != no_stringop);
15367   if (!count)
15368     count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15369   destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15370   srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15371   switch (alg)
15372     {
15373     case libcall:
15374     case no_stringop:
15375       gcc_unreachable ();
15376     case loop:
15377       size_needed = GET_MODE_SIZE (Pmode);
15378       break;
15379     case unrolled_loop:
15380       size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15381       break;
15382     case rep_prefix_8_byte:
15383       size_needed = 8;
15384       break;
15385     case rep_prefix_4_byte:
15386       size_needed = 4;
15387       break;
15388     case rep_prefix_1_byte:
15389     case loop_1_byte:
15390       size_needed = 1;
15391       break;
15392     }
15393
15394   epilogue_size_needed = size_needed;
15395
15396   /* Step 1: Prologue guard.  */
15397
15398   /* Alignment code needs count to be in register.  */
15399   if (CONST_INT_P (count_exp) && desired_align > align)
15400     count_exp = force_reg (counter_mode (count_exp), count_exp);
15401   gcc_assert (desired_align >= 1 && align >= 1);
15402
15403   /* Ensure that alignment prologue won't copy past end of block.  */
15404   if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15405     {
15406       epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15407       /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15408          Make sure it is power of 2.  */
15409       epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15410
15411       if (CONST_INT_P (count_exp))
15412         {
15413           if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
15414             goto epilogue;
15415         }
15416       else
15417         {
15418           label = gen_label_rtx ();
15419           emit_cmp_and_jump_insns (count_exp,
15420                                    GEN_INT (epilogue_size_needed),
15421                                    LTU, 0, counter_mode (count_exp), 1, label);
15422           if (expected_size == -1 || expected_size < epilogue_size_needed)
15423             predict_jump (REG_BR_PROB_BASE * 60 / 100);
15424           else
15425             predict_jump (REG_BR_PROB_BASE * 20 / 100);
15426         }
15427     }
15428
15429   /* Emit code to decide on runtime whether library call or inline should be
15430      used.  */
15431   if (dynamic_check != -1)
15432     {
15433       if (CONST_INT_P (count_exp))
15434         {
15435           if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
15436             {
15437               emit_block_move_via_libcall (dst, src, count_exp, false);
15438               count_exp = const0_rtx;
15439               goto epilogue;
15440             }
15441         }
15442       else
15443         {
15444           rtx hot_label = gen_label_rtx ();
15445           jump_around_label = gen_label_rtx ();
15446           emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15447                                    LEU, 0, GET_MODE (count_exp), 1, hot_label);
15448           predict_jump (REG_BR_PROB_BASE * 90 / 100);
15449           emit_block_move_via_libcall (dst, src, count_exp, false);
15450           emit_jump (jump_around_label);
15451           emit_label (hot_label);
15452         }
15453     }
15454
15455   /* Step 2: Alignment prologue.  */
15456
15457   if (desired_align > align)
15458     {
15459       /* Except for the first move in epilogue, we no longer know
15460          constant offset in aliasing info.  It don't seems to worth
15461          the pain to maintain it for the first move, so throw away
15462          the info early.  */
15463       src = change_address (src, BLKmode, srcreg);
15464       dst = change_address (dst, BLKmode, destreg);
15465       expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15466                               desired_align);
15467     }
15468   if (label && size_needed == 1)
15469     {
15470       emit_label (label);
15471       LABEL_NUSES (label) = 1;
15472       label = NULL;
15473     }
15474
15475   /* Step 3: Main loop.  */
15476
15477   switch (alg)
15478     {
15479     case libcall:
15480     case no_stringop:
15481       gcc_unreachable ();
15482     case loop_1_byte:
15483       expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15484                                      count_exp, QImode, 1, expected_size);
15485       break;
15486     case loop:
15487       expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15488                                      count_exp, Pmode, 1, expected_size);
15489       break;
15490     case unrolled_loop:
15491       /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15492          registers for 4 temporaries anyway.  */
15493       expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15494                                      count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15495                                      expected_size);
15496       break;
15497     case rep_prefix_8_byte:
15498       expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15499                                  DImode);
15500       break;
15501     case rep_prefix_4_byte:
15502       expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15503                                  SImode);
15504       break;
15505     case rep_prefix_1_byte:
15506       expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15507                                  QImode);
15508       break;
15509     }
15510   /* Adjust properly the offset of src and dest memory for aliasing.  */
15511   if (CONST_INT_P (count_exp))
15512     {
15513       src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15514                                           (count / size_needed) * size_needed);
15515       dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15516                                           (count / size_needed) * size_needed);
15517     }
15518   else
15519     {
15520       src = change_address (src, BLKmode, srcreg);
15521       dst = change_address (dst, BLKmode, destreg);
15522     }
15523
15524   /* Step 4: Epilogue to copy the remaining bytes.  */
15525  epilogue:
15526   if (label)
15527     {
15528       /* When the main loop is done, COUNT_EXP might hold original count,
15529          while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15530          Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15531          bytes. Compensate if needed.  */
15532
15533       if (size_needed < epilogue_size_needed)
15534         {
15535           tmp =
15536             expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15537                                  GEN_INT (size_needed - 1), count_exp, 1,
15538                                  OPTAB_DIRECT);
15539           if (tmp != count_exp)
15540             emit_move_insn (count_exp, tmp);
15541         }
15542       emit_label (label);
15543       LABEL_NUSES (label) = 1;
15544     }
15545
15546   if (count_exp != const0_rtx && epilogue_size_needed > 1)
15547     expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15548                             epilogue_size_needed);
15549   if (jump_around_label)
15550     emit_label (jump_around_label);
15551   return 1;
15552 }
15553
15554 /* Helper function for memcpy.  For QImode value 0xXY produce
15555    0xXYXYXYXY of wide specified by MODE.  This is essentially
15556    a * 0x10101010, but we can do slightly better than
15557    synth_mult by unwinding the sequence by hand on CPUs with
15558    slow multiply.  */
15559 static rtx
15560 promote_duplicated_reg (enum machine_mode mode, rtx val)
15561 {
15562   enum machine_mode valmode = GET_MODE (val);
15563   rtx tmp;
15564   int nops = mode == DImode ? 3 : 2;
15565
15566   gcc_assert (mode == SImode || mode == DImode);
15567   if (val == const0_rtx)
15568     return copy_to_mode_reg (mode, const0_rtx);
15569   if (CONST_INT_P (val))
15570     {
15571       HOST_WIDE_INT v = INTVAL (val) & 255;
15572
15573       v |= v << 8;
15574       v |= v << 16;
15575       if (mode == DImode)
15576         v |= (v << 16) << 16;
15577       return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15578     }
15579
15580   if (valmode == VOIDmode)
15581     valmode = QImode;
15582   if (valmode != QImode)
15583     val = gen_lowpart (QImode, val);
15584   if (mode == QImode)
15585     return val;
15586   if (!TARGET_PARTIAL_REG_STALL)
15587     nops--;
15588   if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15589       + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15590       <= (ix86_cost->shift_const + ix86_cost->add) * nops
15591           + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15592     {
15593       rtx reg = convert_modes (mode, QImode, val, true);
15594       tmp = promote_duplicated_reg (mode, const1_rtx);
15595       return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15596                                   OPTAB_DIRECT);
15597     }
15598   else
15599     {
15600       rtx reg = convert_modes (mode, QImode, val, true);
15601
15602       if (!TARGET_PARTIAL_REG_STALL)
15603         if (mode == SImode)
15604           emit_insn (gen_movsi_insv_1 (reg, reg));
15605         else
15606           emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15607       else
15608         {
15609           tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15610                                      NULL, 1, OPTAB_DIRECT);
15611           reg =
15612             expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15613         }
15614       tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15615                                  NULL, 1, OPTAB_DIRECT);
15616       reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15617       if (mode == SImode)
15618         return reg;
15619       tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15620                                  NULL, 1, OPTAB_DIRECT);
15621       reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15622       return reg;
15623     }
15624 }
15625
15626 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15627    be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15628    alignment from ALIGN to DESIRED_ALIGN.  */
15629 static rtx
15630 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15631 {
15632   rtx promoted_val;
15633
15634   if (TARGET_64BIT
15635       && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15636     promoted_val = promote_duplicated_reg (DImode, val);
15637   else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15638     promoted_val = promote_duplicated_reg (SImode, val);
15639   else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15640     promoted_val = promote_duplicated_reg (HImode, val);
15641   else
15642     promoted_val = val;
15643
15644   return promoted_val;
15645 }
15646
15647 /* Expand string clear operation (bzero).  Use i386 string operations when
15648    profitable.  See expand_movmem comment for explanation of individual
15649    steps performed.  */
15650 int
15651 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15652                     rtx expected_align_exp, rtx expected_size_exp)
15653 {
15654   rtx destreg;
15655   rtx label = NULL;
15656   rtx tmp;
15657   rtx jump_around_label = NULL;
15658   HOST_WIDE_INT align = 1;
15659   unsigned HOST_WIDE_INT count = 0;
15660   HOST_WIDE_INT expected_size = -1;
15661   int size_needed = 0, epilogue_size_needed;
15662   int desired_align = 0;
15663   enum stringop_alg alg;
15664   rtx promoted_val = NULL;
15665   bool force_loopy_epilogue = false;
15666   int dynamic_check;
15667
15668   if (CONST_INT_P (align_exp))
15669     align = INTVAL (align_exp);
15670   /* i386 can do misaligned access on reasonably increased cost.  */
15671   if (CONST_INT_P (expected_align_exp)
15672       && INTVAL (expected_align_exp) > align)
15673     align = INTVAL (expected_align_exp);
15674   if (CONST_INT_P (count_exp))
15675     count = expected_size = INTVAL (count_exp);
15676   if (CONST_INT_P (expected_size_exp) && count == 0)
15677     expected_size = INTVAL (expected_size_exp);
15678
15679   /* Make sure we don't need to care about overflow later on.  */
15680   if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15681     return 0;
15682
15683   /* Step 0: Decide on preferred algorithm, desired alignment and
15684      size of chunks to be copied by main loop.  */
15685
15686   alg = decide_alg (count, expected_size, true, &dynamic_check);
15687   desired_align = decide_alignment (align, alg, expected_size);
15688
15689   if (!TARGET_ALIGN_STRINGOPS)
15690     align = desired_align;
15691
15692   if (alg == libcall)
15693     return 0;
15694   gcc_assert (alg != no_stringop);
15695   if (!count)
15696     count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15697   destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15698   switch (alg)
15699     {
15700     case libcall:
15701     case no_stringop:
15702       gcc_unreachable ();
15703     case loop:
15704       size_needed = GET_MODE_SIZE (Pmode);
15705       break;
15706     case unrolled_loop:
15707       size_needed = GET_MODE_SIZE (Pmode) * 4;
15708       break;
15709     case rep_prefix_8_byte:
15710       size_needed = 8;
15711       break;
15712     case rep_prefix_4_byte:
15713       size_needed = 4;
15714       break;
15715     case rep_prefix_1_byte:
15716     case loop_1_byte:
15717       size_needed = 1;
15718       break;
15719     }
15720   epilogue_size_needed = size_needed;
15721
15722   /* Step 1: Prologue guard.  */
15723
15724   /* Alignment code needs count to be in register.  */
15725   if (CONST_INT_P (count_exp) && desired_align > align)
15726     {
15727       enum machine_mode mode = SImode;
15728       if (TARGET_64BIT && (count & ~0xffffffff))
15729         mode = DImode;
15730       count_exp = force_reg (mode, count_exp);
15731     }
15732   /* Do the cheap promotion to allow better CSE across the
15733      main loop and epilogue (ie one load of the big constant in the
15734      front of all code.  */
15735   if (CONST_INT_P (val_exp))
15736     promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15737                                                    desired_align, align);
15738   /* Ensure that alignment prologue won't copy past end of block.  */
15739   if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15740     {
15741       epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15742       /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15743          Make sure it is power of 2.  */
15744       epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15745
15746       /* To improve performance of small blocks, we jump around the VAL
15747          promoting mode.  This mean that if the promoted VAL is not constant,
15748          we might not use it in the epilogue and have to use byte
15749          loop variant.  */
15750       if (epilogue_size_needed > 2 && !promoted_val)
15751         force_loopy_epilogue = true;
15752       label = gen_label_rtx ();
15753       emit_cmp_and_jump_insns (count_exp,
15754                                GEN_INT (epilogue_size_needed),
15755                                LTU, 0, counter_mode (count_exp), 1, label);
15756       if (GET_CODE (count_exp) == CONST_INT)
15757         ;
15758       else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15759         predict_jump (REG_BR_PROB_BASE * 60 / 100);
15760       else
15761         predict_jump (REG_BR_PROB_BASE * 20 / 100);
15762     }
15763   if (dynamic_check != -1)
15764     {
15765       rtx hot_label = gen_label_rtx ();
15766       jump_around_label = gen_label_rtx ();
15767       emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15768                                LEU, 0, counter_mode (count_exp), 1, hot_label);
15769       predict_jump (REG_BR_PROB_BASE * 90 / 100);
15770       set_storage_via_libcall (dst, count_exp, val_exp, false);
15771       emit_jump (jump_around_label);
15772       emit_label (hot_label);
15773     }
15774
15775   /* Step 2: Alignment prologue.  */
15776
15777   /* Do the expensive promotion once we branched off the small blocks.  */
15778   if (!promoted_val)
15779     promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15780                                                    desired_align, align);
15781   gcc_assert (desired_align >= 1 && align >= 1);
15782
15783   if (desired_align > align)
15784     {
15785       /* Except for the first move in epilogue, we no longer know
15786          constant offset in aliasing info.  It don't seems to worth
15787          the pain to maintain it for the first move, so throw away
15788          the info early.  */
15789       dst = change_address (dst, BLKmode, destreg);
15790       expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15791                               desired_align);
15792     }
15793   if (label && size_needed == 1)
15794     {
15795       emit_label (label);
15796       LABEL_NUSES (label) = 1;
15797       label = NULL;
15798     }
15799
15800   /* Step 3: Main loop.  */
15801
15802   switch (alg)
15803     {
15804     case libcall:
15805     case no_stringop:
15806       gcc_unreachable ();
15807     case loop_1_byte:
15808       expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15809                                      count_exp, QImode, 1, expected_size);
15810       break;
15811     case loop:
15812       expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15813                                      count_exp, Pmode, 1, expected_size);
15814       break;
15815     case unrolled_loop:
15816       expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15817                                      count_exp, Pmode, 4, expected_size);
15818       break;
15819     case rep_prefix_8_byte:
15820       expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15821                                   DImode);
15822       break;
15823     case rep_prefix_4_byte:
15824       expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15825                                   SImode);
15826       break;
15827     case rep_prefix_1_byte:
15828       expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15829                                   QImode);
15830       break;
15831     }
15832   /* Adjust properly the offset of src and dest memory for aliasing.  */
15833   if (CONST_INT_P (count_exp))
15834     dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15835                                         (count / size_needed) * size_needed);
15836   else
15837     dst = change_address (dst, BLKmode, destreg);
15838
15839   /* Step 4: Epilogue to copy the remaining bytes.  */
15840
15841   if (label)
15842     {
15843       /* When the main loop is done, COUNT_EXP might hold original count,
15844          while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15845          Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15846          bytes. Compensate if needed.  */
15847
15848       if (size_needed < desired_align - align)
15849         {
15850           tmp =
15851             expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15852                                  GEN_INT (size_needed - 1), count_exp, 1,
15853                                  OPTAB_DIRECT);
15854           size_needed = desired_align - align + 1;
15855           if (tmp != count_exp)
15856             emit_move_insn (count_exp, tmp);
15857         }
15858       emit_label (label);
15859       LABEL_NUSES (label) = 1;
15860     }
15861   if (count_exp != const0_rtx && epilogue_size_needed > 1)
15862     {
15863       if (force_loopy_epilogue)
15864         expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15865                                          size_needed);
15866       else
15867         expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15868                                 size_needed);
15869     }
15870   if (jump_around_label)
15871     emit_label (jump_around_label);
15872   return 1;
15873 }
15874
15875 /* Expand the appropriate insns for doing strlen if not just doing
15876    repnz; scasb
15877
15878    out = result, initialized with the start address
15879    align_rtx = alignment of the address.
15880    scratch = scratch register, initialized with the startaddress when
15881         not aligned, otherwise undefined
15882
15883    This is just the body. It needs the initializations mentioned above and
15884    some address computing at the end.  These things are done in i386.md.  */
15885
15886 static void
15887 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
15888 {
15889   int align;
15890   rtx tmp;
15891   rtx align_2_label = NULL_RTX;
15892   rtx align_3_label = NULL_RTX;
15893   rtx align_4_label = gen_label_rtx ();
15894   rtx end_0_label = gen_label_rtx ();
15895   rtx mem;
15896   rtx tmpreg = gen_reg_rtx (SImode);
15897   rtx scratch = gen_reg_rtx (SImode);
15898   rtx cmp;
15899
15900   align = 0;
15901   if (CONST_INT_P (align_rtx))
15902     align = INTVAL (align_rtx);
15903
15904   /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
15905
15906   /* Is there a known alignment and is it less than 4?  */
15907   if (align < 4)
15908     {
15909       rtx scratch1 = gen_reg_rtx (Pmode);
15910       emit_move_insn (scratch1, out);
15911       /* Is there a known alignment and is it not 2? */
15912       if (align != 2)
15913         {
15914           align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15915           align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15916
15917           /* Leave just the 3 lower bits.  */
15918           align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
15919                                     NULL_RTX, 0, OPTAB_WIDEN);
15920
15921           emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15922                                    Pmode, 1, align_4_label);
15923           emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
15924                                    Pmode, 1, align_2_label);
15925           emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
15926                                    Pmode, 1, align_3_label);
15927         }
15928       else
15929         {
15930           /* Since the alignment is 2, we have to check 2 or 0 bytes;
15931              check if is aligned to 4 - byte.  */
15932
15933           align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
15934                                     NULL_RTX, 0, OPTAB_WIDEN);
15935
15936           emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15937                                    Pmode, 1, align_4_label);
15938         }
15939
15940       mem = change_address (src, QImode, out);
15941
15942       /* Now compare the bytes.  */
15943
15944       /* Compare the first n unaligned byte on a byte per byte basis.  */
15945       emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15946                                QImode, 1, end_0_label);
15947
15948       /* Increment the address.  */
15949       if (TARGET_64BIT)
15950         emit_insn (gen_adddi3 (out, out, const1_rtx));
15951       else
15952         emit_insn (gen_addsi3 (out, out, const1_rtx));
15953
15954       /* Not needed with an alignment of 2 */
15955       if (align != 2)
15956         {
15957           emit_label (align_2_label);
15958
15959           emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15960                                    end_0_label);
15961
15962           if (TARGET_64BIT)
15963             emit_insn (gen_adddi3 (out, out, const1_rtx));
15964           else
15965             emit_insn (gen_addsi3 (out, out, const1_rtx));
15966
15967           emit_label (align_3_label);
15968         }
15969
15970       emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15971                                end_0_label);
15972
15973       if (TARGET_64BIT)
15974         emit_insn (gen_adddi3 (out, out, const1_rtx));
15975       else
15976         emit_insn (gen_addsi3 (out, out, const1_rtx));
15977     }
15978
15979   /* Generate loop to check 4 bytes at a time.  It is not a good idea to
15980      align this loop.  It gives only huge programs, but does not help to
15981      speed up.  */
15982   emit_label (align_4_label);
15983
15984   mem = change_address (src, SImode, out);
15985   emit_move_insn (scratch, mem);
15986   if (TARGET_64BIT)
15987     emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15988   else
15989     emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15990
15991   /* This formula yields a nonzero result iff one of the bytes is zero.
15992      This saves three branches inside loop and many cycles.  */
15993
15994   emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15995   emit_insn (gen_one_cmplsi2 (scratch, scratch));
15996   emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15997   emit_insn (gen_andsi3 (tmpreg, tmpreg,
15998                          gen_int_mode (0x80808080, SImode)));
15999   emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
16000                            align_4_label);
16001
16002   if (TARGET_CMOVE)
16003     {
16004        rtx reg = gen_reg_rtx (SImode);
16005        rtx reg2 = gen_reg_rtx (Pmode);
16006        emit_move_insn (reg, tmpreg);
16007        emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
16008
16009        /* If zero is not in the first two bytes, move two bytes forward.  */
16010        emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16011        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16012        tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16013        emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
16014                                gen_rtx_IF_THEN_ELSE (SImode, tmp,
16015                                                      reg,
16016                                                      tmpreg)));
16017        /* Emit lea manually to avoid clobbering of flags.  */
16018        emit_insn (gen_rtx_SET (SImode, reg2,
16019                                gen_rtx_PLUS (Pmode, out, const2_rtx)));
16020
16021        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16022        tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16023        emit_insn (gen_rtx_SET (VOIDmode, out,
16024                                gen_rtx_IF_THEN_ELSE (Pmode, tmp,
16025                                                      reg2,
16026                                                      out)));
16027
16028     }
16029   else
16030     {
16031        rtx end_2_label = gen_label_rtx ();
16032        /* Is zero in the first two bytes? */
16033
16034        emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16035        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16036        tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
16037        tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16038                             gen_rtx_LABEL_REF (VOIDmode, end_2_label),
16039                             pc_rtx);
16040        tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16041        JUMP_LABEL (tmp) = end_2_label;
16042
16043        /* Not in the first two.  Move two bytes forward.  */
16044        emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
16045        if (TARGET_64BIT)
16046          emit_insn (gen_adddi3 (out, out, const2_rtx));
16047        else
16048          emit_insn (gen_addsi3 (out, out, const2_rtx));
16049
16050        emit_label (end_2_label);
16051
16052     }
16053
16054   /* Avoid branch in fixing the byte.  */
16055   tmpreg = gen_lowpart (QImode, tmpreg);
16056   emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
16057   cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
16058   if (TARGET_64BIT)
16059     emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
16060   else
16061     emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
16062
16063   emit_label (end_0_label);
16064 }
16065
16066 /* Expand strlen.  */
16067
16068 int
16069 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
16070 {
16071   rtx addr, scratch1, scratch2, scratch3, scratch4;
16072
16073   /* The generic case of strlen expander is long.  Avoid it's
16074      expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
16075
16076   if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16077       && !TARGET_INLINE_ALL_STRINGOPS
16078       && !optimize_size
16079       && (!CONST_INT_P (align) || INTVAL (align) < 4))
16080     return 0;
16081
16082   addr = force_reg (Pmode, XEXP (src, 0));
16083   scratch1 = gen_reg_rtx (Pmode);
16084
16085   if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16086       && !optimize_size)
16087     {
16088       /* Well it seems that some optimizer does not combine a call like
16089          foo(strlen(bar), strlen(bar));
16090          when the move and the subtraction is done here.  It does calculate
16091          the length just once when these instructions are done inside of
16092          output_strlen_unroll().  But I think since &bar[strlen(bar)] is
16093          often used and I use one fewer register for the lifetime of
16094          output_strlen_unroll() this is better.  */
16095
16096       emit_move_insn (out, addr);
16097
16098       ix86_expand_strlensi_unroll_1 (out, src, align);
16099
16100       /* strlensi_unroll_1 returns the address of the zero at the end of
16101          the string, like memchr(), so compute the length by subtracting
16102          the start address.  */
16103       if (TARGET_64BIT)
16104         emit_insn (gen_subdi3 (out, out, addr));
16105       else
16106         emit_insn (gen_subsi3 (out, out, addr));
16107     }
16108   else
16109     {
16110       rtx unspec;
16111
16112       /* Can't use this if the user has appropriated eax, ecx, or edi.  */
16113       if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
16114         return false;
16115
16116       scratch2 = gen_reg_rtx (Pmode);
16117       scratch3 = gen_reg_rtx (Pmode);
16118       scratch4 = force_reg (Pmode, constm1_rtx);
16119
16120       emit_move_insn (scratch3, addr);
16121       eoschar = force_reg (QImode, eoschar);
16122
16123       src = replace_equiv_address_nv (src, scratch3);
16124
16125       /* If .md starts supporting :P, this can be done in .md.  */
16126       unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
16127                                                  scratch4), UNSPEC_SCAS);
16128       emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
16129       if (TARGET_64BIT)
16130         {
16131           emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
16132           emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
16133         }
16134       else
16135         {
16136           emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
16137           emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
16138         }
16139     }
16140   return 1;
16141 }
16142
16143 /* For given symbol (function) construct code to compute address of it's PLT
16144    entry in large x86-64 PIC model.  */
16145 rtx
16146 construct_plt_address (rtx symbol)
16147 {
16148   rtx tmp = gen_reg_rtx (Pmode);
16149   rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
16150
16151   gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
16152   gcc_assert (ix86_cmodel == CM_LARGE_PIC);
16153
16154   emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
16155   emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
16156   return tmp;
16157 }
16158
16159 void
16160 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
16161                   rtx callarg2 ATTRIBUTE_UNUSED,
16162                   rtx pop, int sibcall)
16163 {
16164   rtx use = NULL, call;
16165
16166   if (pop == const0_rtx)
16167     pop = NULL;
16168   gcc_assert (!TARGET_64BIT || !pop);
16169
16170   if (TARGET_MACHO && !TARGET_64BIT)
16171     {
16172 #if TARGET_MACHO
16173       if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
16174         fnaddr = machopic_indirect_call_target (fnaddr);
16175 #endif
16176     }
16177   else
16178     {
16179       /* Static functions and indirect calls don't need the pic register.  */
16180       if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
16181           && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16182           && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
16183         use_reg (&use, pic_offset_table_rtx);
16184     }
16185
16186   if (TARGET_64BIT && INTVAL (callarg2) >= 0)
16187     {
16188       rtx al = gen_rtx_REG (QImode, AX_REG);
16189       emit_move_insn (al, callarg2);
16190       use_reg (&use, al);
16191     }
16192
16193   if (ix86_cmodel == CM_LARGE_PIC
16194       && GET_CODE (fnaddr) == MEM
16195       && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16196       && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
16197     fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
16198   else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
16199     {
16200       fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16201       fnaddr = gen_rtx_MEM (QImode, fnaddr);
16202     }
16203   if (sibcall && TARGET_64BIT
16204       && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
16205     {
16206       rtx addr;
16207       addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16208       fnaddr = gen_rtx_REG (Pmode, R11_REG);
16209       emit_move_insn (fnaddr, addr);
16210       fnaddr = gen_rtx_MEM (QImode, fnaddr);
16211     }
16212
16213   call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
16214   if (retval)
16215     call = gen_rtx_SET (VOIDmode, retval, call);
16216   if (pop)
16217     {
16218       pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
16219       pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
16220       call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
16221     }
16222
16223   call = emit_call_insn (call);
16224   if (use)
16225     CALL_INSN_FUNCTION_USAGE (call) = use;
16226 }
16227
16228 \f
16229 /* Clear stack slot assignments remembered from previous functions.
16230    This is called from INIT_EXPANDERS once before RTL is emitted for each
16231    function.  */
16232
16233 static struct machine_function *
16234 ix86_init_machine_status (void)
16235 {
16236   struct machine_function *f;
16237
16238   f = GGC_CNEW (struct machine_function);
16239   f->use_fast_prologue_epilogue_nregs = -1;
16240   f->tls_descriptor_call_expanded_p = 0;
16241
16242   return f;
16243 }
16244
16245 /* Return a MEM corresponding to a stack slot with mode MODE.
16246    Allocate a new slot if necessary.
16247
16248    The RTL for a function can have several slots available: N is
16249    which slot to use.  */
16250
16251 rtx
16252 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
16253 {
16254   struct stack_local_entry *s;
16255
16256   gcc_assert (n < MAX_386_STACK_LOCALS);
16257
16258   /* Virtual slot is valid only before vregs are instantiated.  */
16259   gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
16260
16261   for (s = ix86_stack_locals; s; s = s->next)
16262     if (s->mode == mode && s->n == n)
16263       return copy_rtx (s->rtl);
16264
16265   s = (struct stack_local_entry *)
16266     ggc_alloc (sizeof (struct stack_local_entry));
16267   s->n = n;
16268   s->mode = mode;
16269   s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16270
16271   s->next = ix86_stack_locals;
16272   ix86_stack_locals = s;
16273   return s->rtl;
16274 }
16275
16276 /* Construct the SYMBOL_REF for the tls_get_addr function.  */
16277
16278 static GTY(()) rtx ix86_tls_symbol;
16279 rtx
16280 ix86_tls_get_addr (void)
16281 {
16282
16283   if (!ix86_tls_symbol)
16284     {
16285       ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
16286                                             (TARGET_ANY_GNU_TLS
16287                                              && !TARGET_64BIT)
16288                                             ? "___tls_get_addr"
16289                                             : "__tls_get_addr");
16290     }
16291
16292   return ix86_tls_symbol;
16293 }
16294
16295 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
16296
16297 static GTY(()) rtx ix86_tls_module_base_symbol;
16298 rtx
16299 ix86_tls_module_base (void)
16300 {
16301
16302   if (!ix86_tls_module_base_symbol)
16303     {
16304       ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
16305                                                         "_TLS_MODULE_BASE_");
16306       SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16307         |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16308     }
16309
16310   return ix86_tls_module_base_symbol;
16311 }
16312 \f
16313 /* Calculate the length of the memory address in the instruction
16314    encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
16315
16316 int
16317 memory_address_length (rtx addr)
16318 {
16319   struct ix86_address parts;
16320   rtx base, index, disp;
16321   int len;
16322   int ok;
16323
16324   if (GET_CODE (addr) == PRE_DEC
16325       || GET_CODE (addr) == POST_INC
16326       || GET_CODE (addr) == PRE_MODIFY
16327       || GET_CODE (addr) == POST_MODIFY)
16328     return 0;
16329
16330   ok = ix86_decompose_address (addr, &parts);
16331   gcc_assert (ok);
16332
16333   if (parts.base && GET_CODE (parts.base) == SUBREG)
16334     parts.base = SUBREG_REG (parts.base);
16335   if (parts.index && GET_CODE (parts.index) == SUBREG)
16336     parts.index = SUBREG_REG (parts.index);
16337
16338   base = parts.base;
16339   index = parts.index;
16340   disp = parts.disp;
16341   len = 0;
16342
16343   /* Rule of thumb:
16344        - esp as the base always wants an index,
16345        - ebp as the base always wants a displacement.  */
16346
16347   /* Register Indirect.  */
16348   if (base && !index && !disp)
16349     {
16350       /* esp (for its index) and ebp (for its displacement) need
16351          the two-byte modrm form.  */
16352       if (addr == stack_pointer_rtx
16353           || addr == arg_pointer_rtx
16354           || addr == frame_pointer_rtx
16355           || addr == hard_frame_pointer_rtx)
16356         len = 1;
16357     }
16358
16359   /* Direct Addressing.  */
16360   else if (disp && !base && !index)
16361     len = 4;
16362
16363   else
16364     {
16365       /* Find the length of the displacement constant.  */
16366       if (disp)
16367         {
16368           if (base && satisfies_constraint_K (disp))
16369             len = 1;
16370           else
16371             len = 4;
16372         }
16373       /* ebp always wants a displacement.  */
16374       else if (base == hard_frame_pointer_rtx)
16375         len = 1;
16376
16377       /* An index requires the two-byte modrm form....  */
16378       if (index
16379           /* ...like esp, which always wants an index.  */
16380           || base == stack_pointer_rtx
16381           || base == arg_pointer_rtx
16382           || base == frame_pointer_rtx)
16383         len += 1;
16384     }
16385
16386   return len;
16387 }
16388
16389 /* Compute default value for "length_immediate" attribute.  When SHORTFORM
16390    is set, expect that insn have 8bit immediate alternative.  */
16391 int
16392 ix86_attr_length_immediate_default (rtx insn, int shortform)
16393 {
16394   int len = 0;
16395   int i;
16396   extract_insn_cached (insn);
16397   for (i = recog_data.n_operands - 1; i >= 0; --i)
16398     if (CONSTANT_P (recog_data.operand[i]))
16399       {
16400         gcc_assert (!len);
16401         if (shortform && satisfies_constraint_K (recog_data.operand[i]))
16402           len = 1;
16403         else
16404           {
16405             switch (get_attr_mode (insn))
16406               {
16407                 case MODE_QI:
16408                   len+=1;
16409                   break;
16410                 case MODE_HI:
16411                   len+=2;
16412                   break;
16413                 case MODE_SI:
16414                   len+=4;
16415                   break;
16416                 /* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
16417                 case MODE_DI:
16418                   len+=4;
16419                   break;
16420                 default:
16421                   fatal_insn ("unknown insn mode", insn);
16422               }
16423           }
16424       }
16425   return len;
16426 }
16427 /* Compute default value for "length_address" attribute.  */
16428 int
16429 ix86_attr_length_address_default (rtx insn)
16430 {
16431   int i;
16432
16433   if (get_attr_type (insn) == TYPE_LEA)
16434     {
16435       rtx set = PATTERN (insn);
16436
16437       if (GET_CODE (set) == PARALLEL)
16438         set = XVECEXP (set, 0, 0);
16439
16440       gcc_assert (GET_CODE (set) == SET);
16441
16442       return memory_address_length (SET_SRC (set));
16443     }
16444
16445   extract_insn_cached (insn);
16446   for (i = recog_data.n_operands - 1; i >= 0; --i)
16447     if (MEM_P (recog_data.operand[i]))
16448       {
16449         return memory_address_length (XEXP (recog_data.operand[i], 0));
16450         break;
16451       }
16452   return 0;
16453 }
16454 \f
16455 /* Return the maximum number of instructions a cpu can issue.  */
16456
16457 static int
16458 ix86_issue_rate (void)
16459 {
16460   switch (ix86_tune)
16461     {
16462     case PROCESSOR_PENTIUM:
16463     case PROCESSOR_K6:
16464       return 2;
16465
16466     case PROCESSOR_PENTIUMPRO:
16467     case PROCESSOR_PENTIUM4:
16468     case PROCESSOR_ATHLON:
16469     case PROCESSOR_K8:
16470     case PROCESSOR_AMDFAM10:
16471     case PROCESSOR_NOCONA:
16472     case PROCESSOR_GENERIC32:
16473     case PROCESSOR_GENERIC64:
16474       return 3;
16475
16476     case PROCESSOR_CORE2:
16477       return 4;
16478
16479     default:
16480       return 1;
16481     }
16482 }
16483
16484 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16485    by DEP_INSN and nothing set by DEP_INSN.  */
16486
16487 static int
16488 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16489 {
16490   rtx set, set2;
16491
16492   /* Simplify the test for uninteresting insns.  */
16493   if (insn_type != TYPE_SETCC
16494       && insn_type != TYPE_ICMOV
16495       && insn_type != TYPE_FCMOV
16496       && insn_type != TYPE_IBR)
16497     return 0;
16498
16499   if ((set = single_set (dep_insn)) != 0)
16500     {
16501       set = SET_DEST (set);
16502       set2 = NULL_RTX;
16503     }
16504   else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16505            && XVECLEN (PATTERN (dep_insn), 0) == 2
16506            && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16507            && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16508     {
16509       set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16510       set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16511     }
16512   else
16513     return 0;
16514
16515   if (!REG_P (set) || REGNO (set) != FLAGS_REG)
16516     return 0;
16517
16518   /* This test is true if the dependent insn reads the flags but
16519      not any other potentially set register.  */
16520   if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16521     return 0;
16522
16523   if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16524     return 0;
16525
16526   return 1;
16527 }
16528
16529 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16530    address with operands set by DEP_INSN.  */
16531
16532 static int
16533 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16534 {
16535   rtx addr;
16536
16537   if (insn_type == TYPE_LEA
16538       && TARGET_PENTIUM)
16539     {
16540       addr = PATTERN (insn);
16541
16542       if (GET_CODE (addr) == PARALLEL)
16543         addr = XVECEXP (addr, 0, 0);
16544
16545       gcc_assert (GET_CODE (addr) == SET);
16546
16547       addr = SET_SRC (addr);
16548     }
16549   else
16550     {
16551       int i;
16552       extract_insn_cached (insn);
16553       for (i = recog_data.n_operands - 1; i >= 0; --i)
16554         if (MEM_P (recog_data.operand[i]))
16555           {
16556             addr = XEXP (recog_data.operand[i], 0);
16557             goto found;
16558           }
16559       return 0;
16560     found:;
16561     }
16562
16563   return modified_in_p (addr, dep_insn);
16564 }
16565
16566 static int
16567 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16568 {
16569   enum attr_type insn_type, dep_insn_type;
16570   enum attr_memory memory;
16571   rtx set, set2;
16572   int dep_insn_code_number;
16573
16574   /* Anti and output dependencies have zero cost on all CPUs.  */
16575   if (REG_NOTE_KIND (link) != 0)
16576     return 0;
16577
16578   dep_insn_code_number = recog_memoized (dep_insn);
16579
16580   /* If we can't recognize the insns, we can't really do anything.  */
16581   if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16582     return cost;
16583
16584   insn_type = get_attr_type (insn);
16585   dep_insn_type = get_attr_type (dep_insn);
16586
16587   switch (ix86_tune)
16588     {
16589     case PROCESSOR_PENTIUM:
16590       /* Address Generation Interlock adds a cycle of latency.  */
16591       if (ix86_agi_dependent (insn, dep_insn, insn_type))
16592         cost += 1;
16593
16594       /* ??? Compares pair with jump/setcc.  */
16595       if (ix86_flags_dependent (insn, dep_insn, insn_type))
16596         cost = 0;
16597
16598       /* Floating point stores require value to be ready one cycle earlier.  */
16599       if (insn_type == TYPE_FMOV
16600           && get_attr_memory (insn) == MEMORY_STORE
16601           && !ix86_agi_dependent (insn, dep_insn, insn_type))
16602         cost += 1;
16603       break;
16604
16605     case PROCESSOR_PENTIUMPRO:
16606       memory = get_attr_memory (insn);
16607
16608       /* INT->FP conversion is expensive.  */
16609       if (get_attr_fp_int_src (dep_insn))
16610         cost += 5;
16611
16612       /* There is one cycle extra latency between an FP op and a store.  */
16613       if (insn_type == TYPE_FMOV
16614           && (set = single_set (dep_insn)) != NULL_RTX
16615           && (set2 = single_set (insn)) != NULL_RTX
16616           && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16617           && MEM_P (SET_DEST (set2)))
16618         cost += 1;
16619
16620       /* Show ability of reorder buffer to hide latency of load by executing
16621          in parallel with previous instruction in case
16622          previous instruction is not needed to compute the address.  */
16623       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16624           && !ix86_agi_dependent (insn, dep_insn, insn_type))
16625         {
16626           /* Claim moves to take one cycle, as core can issue one load
16627              at time and the next load can start cycle later.  */
16628           if (dep_insn_type == TYPE_IMOV
16629               || dep_insn_type == TYPE_FMOV)
16630             cost = 1;
16631           else if (cost > 1)
16632             cost--;
16633         }
16634       break;
16635
16636     case PROCESSOR_K6:
16637       memory = get_attr_memory (insn);
16638
16639       /* The esp dependency is resolved before the instruction is really
16640          finished.  */
16641       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16642           && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16643         return 1;
16644
16645       /* INT->FP conversion is expensive.  */
16646       if (get_attr_fp_int_src (dep_insn))
16647         cost += 5;
16648
16649       /* Show ability of reorder buffer to hide latency of load by executing
16650          in parallel with previous instruction in case
16651          previous instruction is not needed to compute the address.  */
16652       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16653           && !ix86_agi_dependent (insn, dep_insn, insn_type))
16654         {
16655           /* Claim moves to take one cycle, as core can issue one load
16656              at time and the next load can start cycle later.  */
16657           if (dep_insn_type == TYPE_IMOV
16658               || dep_insn_type == TYPE_FMOV)
16659             cost = 1;
16660           else if (cost > 2)
16661             cost -= 2;
16662           else
16663             cost = 1;
16664         }
16665       break;
16666
16667     case PROCESSOR_ATHLON:
16668     case PROCESSOR_K8:
16669     case PROCESSOR_AMDFAM10:
16670     case PROCESSOR_GENERIC32:
16671     case PROCESSOR_GENERIC64:
16672       memory = get_attr_memory (insn);
16673
16674       /* Show ability of reorder buffer to hide latency of load by executing
16675          in parallel with previous instruction in case
16676          previous instruction is not needed to compute the address.  */
16677       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16678           && !ix86_agi_dependent (insn, dep_insn, insn_type))
16679         {
16680           enum attr_unit unit = get_attr_unit (insn);
16681           int loadcost = 3;
16682
16683           /* Because of the difference between the length of integer and
16684              floating unit pipeline preparation stages, the memory operands
16685              for floating point are cheaper.
16686
16687              ??? For Athlon it the difference is most probably 2.  */
16688           if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16689             loadcost = 3;
16690           else
16691             loadcost = TARGET_ATHLON ? 2 : 0;
16692
16693           if (cost >= loadcost)
16694             cost -= loadcost;
16695           else
16696             cost = 0;
16697         }
16698
16699     default:
16700       break;
16701     }
16702
16703   return cost;
16704 }
16705
16706 /* How many alternative schedules to try.  This should be as wide as the
16707    scheduling freedom in the DFA, but no wider.  Making this value too
16708    large results extra work for the scheduler.  */
16709
16710 static int
16711 ia32_multipass_dfa_lookahead (void)
16712 {
16713   switch (ix86_tune)
16714     {
16715     case PROCESSOR_PENTIUM:
16716       return 2;
16717
16718     case PROCESSOR_PENTIUMPRO:
16719     case PROCESSOR_K6:
16720       return 1;
16721
16722     default:
16723       return 0;
16724     }
16725 }
16726
16727 \f
16728 /* Compute the alignment given to a constant that is being placed in memory.
16729    EXP is the constant and ALIGN is the alignment that the object would
16730    ordinarily have.
16731    The value of this function is used instead of that alignment to align
16732    the object.  */
16733
16734 int
16735 ix86_constant_alignment (tree exp, int align)
16736 {
16737   if (TREE_CODE (exp) == REAL_CST)
16738     {
16739       if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16740         return 64;
16741       else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16742         return 128;
16743     }
16744   else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16745            && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16746     return BITS_PER_WORD;
16747
16748   return align;
16749 }
16750
16751 /* Compute the alignment for a static variable.
16752    TYPE is the data type, and ALIGN is the alignment that
16753    the object would ordinarily have.  The value of this function is used
16754    instead of that alignment to align the object.  */
16755
16756 int
16757 ix86_data_alignment (tree type, int align)
16758 {
16759   int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16760
16761   if (AGGREGATE_TYPE_P (type)
16762       && TYPE_SIZE (type)
16763       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16764       && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16765           || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16766       && align < max_align)
16767     align = max_align;
16768
16769   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16770      to 16byte boundary.  */
16771   if (TARGET_64BIT)
16772     {
16773       if (AGGREGATE_TYPE_P (type)
16774            && TYPE_SIZE (type)
16775            && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16776            && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16777                || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16778         return 128;
16779     }
16780
16781   if (TREE_CODE (type) == ARRAY_TYPE)
16782     {
16783       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16784         return 64;
16785       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16786         return 128;
16787     }
16788   else if (TREE_CODE (type) == COMPLEX_TYPE)
16789     {
16790
16791       if (TYPE_MODE (type) == DCmode && align < 64)
16792         return 64;
16793       if (TYPE_MODE (type) == XCmode && align < 128)
16794         return 128;
16795     }
16796   else if ((TREE_CODE (type) == RECORD_TYPE
16797             || TREE_CODE (type) == UNION_TYPE
16798             || TREE_CODE (type) == QUAL_UNION_TYPE)
16799            && TYPE_FIELDS (type))
16800     {
16801       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16802         return 64;
16803       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16804         return 128;
16805     }
16806   else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16807            || TREE_CODE (type) == INTEGER_TYPE)
16808     {
16809       if (TYPE_MODE (type) == DFmode && align < 64)
16810         return 64;
16811       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16812         return 128;
16813     }
16814
16815   return align;
16816 }
16817
16818 /* Compute the alignment for a local variable.
16819    TYPE is the data type, and ALIGN is the alignment that
16820    the object would ordinarily have.  The value of this macro is used
16821    instead of that alignment to align the object.  */
16822
16823 int
16824 ix86_local_alignment (tree type, int align)
16825 {
16826   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16827      to 16byte boundary.  */
16828   if (TARGET_64BIT)
16829     {
16830       if (AGGREGATE_TYPE_P (type)
16831            && TYPE_SIZE (type)
16832            && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16833            && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16834                || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16835         return 128;
16836     }
16837   if (TREE_CODE (type) == ARRAY_TYPE)
16838     {
16839       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16840         return 64;
16841       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16842         return 128;
16843     }
16844   else if (TREE_CODE (type) == COMPLEX_TYPE)
16845     {
16846       if (TYPE_MODE (type) == DCmode && align < 64)
16847         return 64;
16848       if (TYPE_MODE (type) == XCmode && align < 128)
16849         return 128;
16850     }
16851   else if ((TREE_CODE (type) == RECORD_TYPE
16852             || TREE_CODE (type) == UNION_TYPE
16853             || TREE_CODE (type) == QUAL_UNION_TYPE)
16854            && TYPE_FIELDS (type))
16855     {
16856       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16857         return 64;
16858       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16859         return 128;
16860     }
16861   else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16862            || TREE_CODE (type) == INTEGER_TYPE)
16863     {
16864
16865       if (TYPE_MODE (type) == DFmode && align < 64)
16866         return 64;
16867       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16868         return 128;
16869     }
16870   return align;
16871 }
16872 \f
16873 /* Emit RTL insns to initialize the variable parts of a trampoline.
16874    FNADDR is an RTX for the address of the function's pure code.
16875    CXT is an RTX for the static chain value for the function.  */
16876 void
16877 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
16878 {
16879   if (!TARGET_64BIT)
16880     {
16881       /* Compute offset from the end of the jmp to the target function.  */
16882       rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16883                                plus_constant (tramp, 10),
16884                                NULL_RTX, 1, OPTAB_DIRECT);
16885       emit_move_insn (gen_rtx_MEM (QImode, tramp),
16886                       gen_int_mode (0xb9, QImode));
16887       emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16888       emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
16889                       gen_int_mode (0xe9, QImode));
16890       emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16891     }
16892   else
16893     {
16894       int offset = 0;
16895       /* Try to load address using shorter movl instead of movabs.
16896          We may want to support movq for kernel mode, but kernel does not use
16897          trampolines at the moment.  */
16898       if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16899         {
16900           fnaddr = copy_to_mode_reg (DImode, fnaddr);
16901           emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16902                           gen_int_mode (0xbb41, HImode));
16903           emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16904                           gen_lowpart (SImode, fnaddr));
16905           offset += 6;
16906         }
16907       else
16908         {
16909           emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16910                           gen_int_mode (0xbb49, HImode));
16911           emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16912                           fnaddr);
16913           offset += 10;
16914         }
16915       /* Load static chain using movabs to r10.  */
16916       emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16917                       gen_int_mode (0xba49, HImode));
16918       emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16919                       cxt);
16920       offset += 10;
16921       /* Jump to the r11 */
16922       emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16923                       gen_int_mode (0xff49, HImode));
16924       emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
16925                       gen_int_mode (0xe3, QImode));
16926       offset += 3;
16927       gcc_assert (offset <= TRAMPOLINE_SIZE);
16928     }
16929
16930 #ifdef ENABLE_EXECUTE_STACK
16931   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16932                      LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16933 #endif
16934 }
16935 \f
16936 /* Codes for all the SSE/MMX builtins.  */
16937 enum ix86_builtins
16938 {
16939   IX86_BUILTIN_ADDPS,
16940   IX86_BUILTIN_ADDSS,
16941   IX86_BUILTIN_DIVPS,
16942   IX86_BUILTIN_DIVSS,
16943   IX86_BUILTIN_MULPS,
16944   IX86_BUILTIN_MULSS,
16945   IX86_BUILTIN_SUBPS,
16946   IX86_BUILTIN_SUBSS,
16947
16948   IX86_BUILTIN_CMPEQPS,
16949   IX86_BUILTIN_CMPLTPS,
16950   IX86_BUILTIN_CMPLEPS,
16951   IX86_BUILTIN_CMPGTPS,
16952   IX86_BUILTIN_CMPGEPS,
16953   IX86_BUILTIN_CMPNEQPS,
16954   IX86_BUILTIN_CMPNLTPS,
16955   IX86_BUILTIN_CMPNLEPS,
16956   IX86_BUILTIN_CMPNGTPS,
16957   IX86_BUILTIN_CMPNGEPS,
16958   IX86_BUILTIN_CMPORDPS,
16959   IX86_BUILTIN_CMPUNORDPS,
16960   IX86_BUILTIN_CMPEQSS,
16961   IX86_BUILTIN_CMPLTSS,
16962   IX86_BUILTIN_CMPLESS,
16963   IX86_BUILTIN_CMPNEQSS,
16964   IX86_BUILTIN_CMPNLTSS,
16965   IX86_BUILTIN_CMPNLESS,
16966   IX86_BUILTIN_CMPNGTSS,
16967   IX86_BUILTIN_CMPNGESS,
16968   IX86_BUILTIN_CMPORDSS,
16969   IX86_BUILTIN_CMPUNORDSS,
16970
16971   IX86_BUILTIN_COMIEQSS,
16972   IX86_BUILTIN_COMILTSS,
16973   IX86_BUILTIN_COMILESS,
16974   IX86_BUILTIN_COMIGTSS,
16975   IX86_BUILTIN_COMIGESS,
16976   IX86_BUILTIN_COMINEQSS,
16977   IX86_BUILTIN_UCOMIEQSS,
16978   IX86_BUILTIN_UCOMILTSS,
16979   IX86_BUILTIN_UCOMILESS,
16980   IX86_BUILTIN_UCOMIGTSS,
16981   IX86_BUILTIN_UCOMIGESS,
16982   IX86_BUILTIN_UCOMINEQSS,
16983
16984   IX86_BUILTIN_CVTPI2PS,
16985   IX86_BUILTIN_CVTPS2PI,
16986   IX86_BUILTIN_CVTSI2SS,
16987   IX86_BUILTIN_CVTSI642SS,
16988   IX86_BUILTIN_CVTSS2SI,
16989   IX86_BUILTIN_CVTSS2SI64,
16990   IX86_BUILTIN_CVTTPS2PI,
16991   IX86_BUILTIN_CVTTSS2SI,
16992   IX86_BUILTIN_CVTTSS2SI64,
16993
16994   IX86_BUILTIN_MAXPS,
16995   IX86_BUILTIN_MAXSS,
16996   IX86_BUILTIN_MINPS,
16997   IX86_BUILTIN_MINSS,
16998
16999   IX86_BUILTIN_LOADUPS,
17000   IX86_BUILTIN_STOREUPS,
17001   IX86_BUILTIN_MOVSS,
17002
17003   IX86_BUILTIN_MOVHLPS,
17004   IX86_BUILTIN_MOVLHPS,
17005   IX86_BUILTIN_LOADHPS,
17006   IX86_BUILTIN_LOADLPS,
17007   IX86_BUILTIN_STOREHPS,
17008   IX86_BUILTIN_STORELPS,
17009
17010   IX86_BUILTIN_MASKMOVQ,
17011   IX86_BUILTIN_MOVMSKPS,
17012   IX86_BUILTIN_PMOVMSKB,
17013
17014   IX86_BUILTIN_MOVNTPS,
17015   IX86_BUILTIN_MOVNTQ,
17016
17017   IX86_BUILTIN_LOADDQU,
17018   IX86_BUILTIN_STOREDQU,
17019
17020   IX86_BUILTIN_PACKSSWB,
17021   IX86_BUILTIN_PACKSSDW,
17022   IX86_BUILTIN_PACKUSWB,
17023
17024   IX86_BUILTIN_PADDB,
17025   IX86_BUILTIN_PADDW,
17026   IX86_BUILTIN_PADDD,
17027   IX86_BUILTIN_PADDQ,
17028   IX86_BUILTIN_PADDSB,
17029   IX86_BUILTIN_PADDSW,
17030   IX86_BUILTIN_PADDUSB,
17031   IX86_BUILTIN_PADDUSW,
17032   IX86_BUILTIN_PSUBB,
17033   IX86_BUILTIN_PSUBW,
17034   IX86_BUILTIN_PSUBD,
17035   IX86_BUILTIN_PSUBQ,
17036   IX86_BUILTIN_PSUBSB,
17037   IX86_BUILTIN_PSUBSW,
17038   IX86_BUILTIN_PSUBUSB,
17039   IX86_BUILTIN_PSUBUSW,
17040
17041   IX86_BUILTIN_PAND,
17042   IX86_BUILTIN_PANDN,
17043   IX86_BUILTIN_POR,
17044   IX86_BUILTIN_PXOR,
17045
17046   IX86_BUILTIN_PAVGB,
17047   IX86_BUILTIN_PAVGW,
17048
17049   IX86_BUILTIN_PCMPEQB,
17050   IX86_BUILTIN_PCMPEQW,
17051   IX86_BUILTIN_PCMPEQD,
17052   IX86_BUILTIN_PCMPGTB,
17053   IX86_BUILTIN_PCMPGTW,
17054   IX86_BUILTIN_PCMPGTD,
17055
17056   IX86_BUILTIN_PMADDWD,
17057
17058   IX86_BUILTIN_PMAXSW,
17059   IX86_BUILTIN_PMAXUB,
17060   IX86_BUILTIN_PMINSW,
17061   IX86_BUILTIN_PMINUB,
17062
17063   IX86_BUILTIN_PMULHUW,
17064   IX86_BUILTIN_PMULHW,
17065   IX86_BUILTIN_PMULLW,
17066
17067   IX86_BUILTIN_PSADBW,
17068   IX86_BUILTIN_PSHUFW,
17069
17070   IX86_BUILTIN_PSLLW,
17071   IX86_BUILTIN_PSLLD,
17072   IX86_BUILTIN_PSLLQ,
17073   IX86_BUILTIN_PSRAW,
17074   IX86_BUILTIN_PSRAD,
17075   IX86_BUILTIN_PSRLW,
17076   IX86_BUILTIN_PSRLD,
17077   IX86_BUILTIN_PSRLQ,
17078   IX86_BUILTIN_PSLLWI,
17079   IX86_BUILTIN_PSLLDI,
17080   IX86_BUILTIN_PSLLQI,
17081   IX86_BUILTIN_PSRAWI,
17082   IX86_BUILTIN_PSRADI,
17083   IX86_BUILTIN_PSRLWI,
17084   IX86_BUILTIN_PSRLDI,
17085   IX86_BUILTIN_PSRLQI,
17086
17087   IX86_BUILTIN_PUNPCKHBW,
17088   IX86_BUILTIN_PUNPCKHWD,
17089   IX86_BUILTIN_PUNPCKHDQ,
17090   IX86_BUILTIN_PUNPCKLBW,
17091   IX86_BUILTIN_PUNPCKLWD,
17092   IX86_BUILTIN_PUNPCKLDQ,
17093
17094   IX86_BUILTIN_SHUFPS,
17095
17096   IX86_BUILTIN_RCPPS,
17097   IX86_BUILTIN_RCPSS,
17098   IX86_BUILTIN_RSQRTPS,
17099   IX86_BUILTIN_RSQRTPS_NR,
17100   IX86_BUILTIN_RSQRTSS,
17101   IX86_BUILTIN_RSQRTF,
17102   IX86_BUILTIN_SQRTPS,
17103   IX86_BUILTIN_SQRTPS_NR,
17104   IX86_BUILTIN_SQRTSS,
17105
17106   IX86_BUILTIN_UNPCKHPS,
17107   IX86_BUILTIN_UNPCKLPS,
17108
17109   IX86_BUILTIN_ANDPS,
17110   IX86_BUILTIN_ANDNPS,
17111   IX86_BUILTIN_ORPS,
17112   IX86_BUILTIN_XORPS,
17113
17114   IX86_BUILTIN_EMMS,
17115   IX86_BUILTIN_LDMXCSR,
17116   IX86_BUILTIN_STMXCSR,
17117   IX86_BUILTIN_SFENCE,
17118
17119   /* 3DNow! Original */
17120   IX86_BUILTIN_FEMMS,
17121   IX86_BUILTIN_PAVGUSB,
17122   IX86_BUILTIN_PF2ID,
17123   IX86_BUILTIN_PFACC,
17124   IX86_BUILTIN_PFADD,
17125   IX86_BUILTIN_PFCMPEQ,
17126   IX86_BUILTIN_PFCMPGE,
17127   IX86_BUILTIN_PFCMPGT,
17128   IX86_BUILTIN_PFMAX,
17129   IX86_BUILTIN_PFMIN,
17130   IX86_BUILTIN_PFMUL,
17131   IX86_BUILTIN_PFRCP,
17132   IX86_BUILTIN_PFRCPIT1,
17133   IX86_BUILTIN_PFRCPIT2,
17134   IX86_BUILTIN_PFRSQIT1,
17135   IX86_BUILTIN_PFRSQRT,
17136   IX86_BUILTIN_PFSUB,
17137   IX86_BUILTIN_PFSUBR,
17138   IX86_BUILTIN_PI2FD,
17139   IX86_BUILTIN_PMULHRW,
17140
17141   /* 3DNow! Athlon Extensions */
17142   IX86_BUILTIN_PF2IW,
17143   IX86_BUILTIN_PFNACC,
17144   IX86_BUILTIN_PFPNACC,
17145   IX86_BUILTIN_PI2FW,
17146   IX86_BUILTIN_PSWAPDSI,
17147   IX86_BUILTIN_PSWAPDSF,
17148
17149   /* SSE2 */
17150   IX86_BUILTIN_ADDPD,
17151   IX86_BUILTIN_ADDSD,
17152   IX86_BUILTIN_DIVPD,
17153   IX86_BUILTIN_DIVSD,
17154   IX86_BUILTIN_MULPD,
17155   IX86_BUILTIN_MULSD,
17156   IX86_BUILTIN_SUBPD,
17157   IX86_BUILTIN_SUBSD,
17158
17159   IX86_BUILTIN_CMPEQPD,
17160   IX86_BUILTIN_CMPLTPD,
17161   IX86_BUILTIN_CMPLEPD,
17162   IX86_BUILTIN_CMPGTPD,
17163   IX86_BUILTIN_CMPGEPD,
17164   IX86_BUILTIN_CMPNEQPD,
17165   IX86_BUILTIN_CMPNLTPD,
17166   IX86_BUILTIN_CMPNLEPD,
17167   IX86_BUILTIN_CMPNGTPD,
17168   IX86_BUILTIN_CMPNGEPD,
17169   IX86_BUILTIN_CMPORDPD,
17170   IX86_BUILTIN_CMPUNORDPD,
17171   IX86_BUILTIN_CMPEQSD,
17172   IX86_BUILTIN_CMPLTSD,
17173   IX86_BUILTIN_CMPLESD,
17174   IX86_BUILTIN_CMPNEQSD,
17175   IX86_BUILTIN_CMPNLTSD,
17176   IX86_BUILTIN_CMPNLESD,
17177   IX86_BUILTIN_CMPORDSD,
17178   IX86_BUILTIN_CMPUNORDSD,
17179
17180   IX86_BUILTIN_COMIEQSD,
17181   IX86_BUILTIN_COMILTSD,
17182   IX86_BUILTIN_COMILESD,
17183   IX86_BUILTIN_COMIGTSD,
17184   IX86_BUILTIN_COMIGESD,
17185   IX86_BUILTIN_COMINEQSD,
17186   IX86_BUILTIN_UCOMIEQSD,
17187   IX86_BUILTIN_UCOMILTSD,
17188   IX86_BUILTIN_UCOMILESD,
17189   IX86_BUILTIN_UCOMIGTSD,
17190   IX86_BUILTIN_UCOMIGESD,
17191   IX86_BUILTIN_UCOMINEQSD,
17192
17193   IX86_BUILTIN_MAXPD,
17194   IX86_BUILTIN_MAXSD,
17195   IX86_BUILTIN_MINPD,
17196   IX86_BUILTIN_MINSD,
17197
17198   IX86_BUILTIN_ANDPD,
17199   IX86_BUILTIN_ANDNPD,
17200   IX86_BUILTIN_ORPD,
17201   IX86_BUILTIN_XORPD,
17202
17203   IX86_BUILTIN_SQRTPD,
17204   IX86_BUILTIN_SQRTSD,
17205
17206   IX86_BUILTIN_UNPCKHPD,
17207   IX86_BUILTIN_UNPCKLPD,
17208
17209   IX86_BUILTIN_SHUFPD,
17210
17211   IX86_BUILTIN_LOADUPD,
17212   IX86_BUILTIN_STOREUPD,
17213   IX86_BUILTIN_MOVSD,
17214
17215   IX86_BUILTIN_LOADHPD,
17216   IX86_BUILTIN_LOADLPD,
17217
17218   IX86_BUILTIN_CVTDQ2PD,
17219   IX86_BUILTIN_CVTDQ2PS,
17220
17221   IX86_BUILTIN_CVTPD2DQ,
17222   IX86_BUILTIN_CVTPD2PI,
17223   IX86_BUILTIN_CVTPD2PS,
17224   IX86_BUILTIN_CVTTPD2DQ,
17225   IX86_BUILTIN_CVTTPD2PI,
17226
17227   IX86_BUILTIN_CVTPI2PD,
17228   IX86_BUILTIN_CVTSI2SD,
17229   IX86_BUILTIN_CVTSI642SD,
17230
17231   IX86_BUILTIN_CVTSD2SI,
17232   IX86_BUILTIN_CVTSD2SI64,
17233   IX86_BUILTIN_CVTSD2SS,
17234   IX86_BUILTIN_CVTSS2SD,
17235   IX86_BUILTIN_CVTTSD2SI,
17236   IX86_BUILTIN_CVTTSD2SI64,
17237
17238   IX86_BUILTIN_CVTPS2DQ,
17239   IX86_BUILTIN_CVTPS2PD,
17240   IX86_BUILTIN_CVTTPS2DQ,
17241
17242   IX86_BUILTIN_MOVNTI,
17243   IX86_BUILTIN_MOVNTPD,
17244   IX86_BUILTIN_MOVNTDQ,
17245
17246   /* SSE2 MMX */
17247   IX86_BUILTIN_MASKMOVDQU,
17248   IX86_BUILTIN_MOVMSKPD,
17249   IX86_BUILTIN_PMOVMSKB128,
17250
17251   IX86_BUILTIN_PACKSSWB128,
17252   IX86_BUILTIN_PACKSSDW128,
17253   IX86_BUILTIN_PACKUSWB128,
17254
17255   IX86_BUILTIN_PADDB128,
17256   IX86_BUILTIN_PADDW128,
17257   IX86_BUILTIN_PADDD128,
17258   IX86_BUILTIN_PADDQ128,
17259   IX86_BUILTIN_PADDSB128,
17260   IX86_BUILTIN_PADDSW128,
17261   IX86_BUILTIN_PADDUSB128,
17262   IX86_BUILTIN_PADDUSW128,
17263   IX86_BUILTIN_PSUBB128,
17264   IX86_BUILTIN_PSUBW128,
17265   IX86_BUILTIN_PSUBD128,
17266   IX86_BUILTIN_PSUBQ128,
17267   IX86_BUILTIN_PSUBSB128,
17268   IX86_BUILTIN_PSUBSW128,
17269   IX86_BUILTIN_PSUBUSB128,
17270   IX86_BUILTIN_PSUBUSW128,
17271
17272   IX86_BUILTIN_PAND128,
17273   IX86_BUILTIN_PANDN128,
17274   IX86_BUILTIN_POR128,
17275   IX86_BUILTIN_PXOR128,
17276
17277   IX86_BUILTIN_PAVGB128,
17278   IX86_BUILTIN_PAVGW128,
17279
17280   IX86_BUILTIN_PCMPEQB128,
17281   IX86_BUILTIN_PCMPEQW128,
17282   IX86_BUILTIN_PCMPEQD128,
17283   IX86_BUILTIN_PCMPGTB128,
17284   IX86_BUILTIN_PCMPGTW128,
17285   IX86_BUILTIN_PCMPGTD128,
17286
17287   IX86_BUILTIN_PMADDWD128,
17288
17289   IX86_BUILTIN_PMAXSW128,
17290   IX86_BUILTIN_PMAXUB128,
17291   IX86_BUILTIN_PMINSW128,
17292   IX86_BUILTIN_PMINUB128,
17293
17294   IX86_BUILTIN_PMULUDQ,
17295   IX86_BUILTIN_PMULUDQ128,
17296   IX86_BUILTIN_PMULHUW128,
17297   IX86_BUILTIN_PMULHW128,
17298   IX86_BUILTIN_PMULLW128,
17299
17300   IX86_BUILTIN_PSADBW128,
17301   IX86_BUILTIN_PSHUFHW,
17302   IX86_BUILTIN_PSHUFLW,
17303   IX86_BUILTIN_PSHUFD,
17304
17305   IX86_BUILTIN_PSLLDQI128,
17306   IX86_BUILTIN_PSLLWI128,
17307   IX86_BUILTIN_PSLLDI128,
17308   IX86_BUILTIN_PSLLQI128,
17309   IX86_BUILTIN_PSRAWI128,
17310   IX86_BUILTIN_PSRADI128,
17311   IX86_BUILTIN_PSRLDQI128,
17312   IX86_BUILTIN_PSRLWI128,
17313   IX86_BUILTIN_PSRLDI128,
17314   IX86_BUILTIN_PSRLQI128,
17315
17316   IX86_BUILTIN_PSLLDQ128,
17317   IX86_BUILTIN_PSLLW128,
17318   IX86_BUILTIN_PSLLD128,
17319   IX86_BUILTIN_PSLLQ128,
17320   IX86_BUILTIN_PSRAW128,
17321   IX86_BUILTIN_PSRAD128,
17322   IX86_BUILTIN_PSRLW128,
17323   IX86_BUILTIN_PSRLD128,
17324   IX86_BUILTIN_PSRLQ128,
17325
17326   IX86_BUILTIN_PUNPCKHBW128,
17327   IX86_BUILTIN_PUNPCKHWD128,
17328   IX86_BUILTIN_PUNPCKHDQ128,
17329   IX86_BUILTIN_PUNPCKHQDQ128,
17330   IX86_BUILTIN_PUNPCKLBW128,
17331   IX86_BUILTIN_PUNPCKLWD128,
17332   IX86_BUILTIN_PUNPCKLDQ128,
17333   IX86_BUILTIN_PUNPCKLQDQ128,
17334
17335   IX86_BUILTIN_CLFLUSH,
17336   IX86_BUILTIN_MFENCE,
17337   IX86_BUILTIN_LFENCE,
17338
17339   /* Prescott New Instructions.  */
17340   IX86_BUILTIN_ADDSUBPS,
17341   IX86_BUILTIN_HADDPS,
17342   IX86_BUILTIN_HSUBPS,
17343   IX86_BUILTIN_MOVSHDUP,
17344   IX86_BUILTIN_MOVSLDUP,
17345   IX86_BUILTIN_ADDSUBPD,
17346   IX86_BUILTIN_HADDPD,
17347   IX86_BUILTIN_HSUBPD,
17348   IX86_BUILTIN_LDDQU,
17349
17350   IX86_BUILTIN_MONITOR,
17351   IX86_BUILTIN_MWAIT,
17352
17353   /* SSSE3.  */
17354   IX86_BUILTIN_PHADDW,
17355   IX86_BUILTIN_PHADDD,
17356   IX86_BUILTIN_PHADDSW,
17357   IX86_BUILTIN_PHSUBW,
17358   IX86_BUILTIN_PHSUBD,
17359   IX86_BUILTIN_PHSUBSW,
17360   IX86_BUILTIN_PMADDUBSW,
17361   IX86_BUILTIN_PMULHRSW,
17362   IX86_BUILTIN_PSHUFB,
17363   IX86_BUILTIN_PSIGNB,
17364   IX86_BUILTIN_PSIGNW,
17365   IX86_BUILTIN_PSIGND,
17366   IX86_BUILTIN_PALIGNR,
17367   IX86_BUILTIN_PABSB,
17368   IX86_BUILTIN_PABSW,
17369   IX86_BUILTIN_PABSD,
17370
17371   IX86_BUILTIN_PHADDW128,
17372   IX86_BUILTIN_PHADDD128,
17373   IX86_BUILTIN_PHADDSW128,
17374   IX86_BUILTIN_PHSUBW128,
17375   IX86_BUILTIN_PHSUBD128,
17376   IX86_BUILTIN_PHSUBSW128,
17377   IX86_BUILTIN_PMADDUBSW128,
17378   IX86_BUILTIN_PMULHRSW128,
17379   IX86_BUILTIN_PSHUFB128,
17380   IX86_BUILTIN_PSIGNB128,
17381   IX86_BUILTIN_PSIGNW128,
17382   IX86_BUILTIN_PSIGND128,
17383   IX86_BUILTIN_PALIGNR128,
17384   IX86_BUILTIN_PABSB128,
17385   IX86_BUILTIN_PABSW128,
17386   IX86_BUILTIN_PABSD128,
17387
17388   /* AMDFAM10 - SSE4A New Instructions.  */
17389   IX86_BUILTIN_MOVNTSD,
17390   IX86_BUILTIN_MOVNTSS,
17391   IX86_BUILTIN_EXTRQI,
17392   IX86_BUILTIN_EXTRQ,
17393   IX86_BUILTIN_INSERTQI,
17394   IX86_BUILTIN_INSERTQ,
17395
17396   /* SSE4.1.  */
17397   IX86_BUILTIN_BLENDPD,
17398   IX86_BUILTIN_BLENDPS,
17399   IX86_BUILTIN_BLENDVPD,
17400   IX86_BUILTIN_BLENDVPS,
17401   IX86_BUILTIN_PBLENDVB128,
17402   IX86_BUILTIN_PBLENDW128,
17403
17404   IX86_BUILTIN_DPPD,
17405   IX86_BUILTIN_DPPS,
17406
17407   IX86_BUILTIN_INSERTPS128,
17408
17409   IX86_BUILTIN_MOVNTDQA,
17410   IX86_BUILTIN_MPSADBW128,
17411   IX86_BUILTIN_PACKUSDW128,
17412   IX86_BUILTIN_PCMPEQQ,
17413   IX86_BUILTIN_PHMINPOSUW128,
17414
17415   IX86_BUILTIN_PMAXSB128,
17416   IX86_BUILTIN_PMAXSD128,
17417   IX86_BUILTIN_PMAXUD128,
17418   IX86_BUILTIN_PMAXUW128,
17419
17420   IX86_BUILTIN_PMINSB128,
17421   IX86_BUILTIN_PMINSD128,
17422   IX86_BUILTIN_PMINUD128,
17423   IX86_BUILTIN_PMINUW128,
17424
17425   IX86_BUILTIN_PMOVSXBW128,
17426   IX86_BUILTIN_PMOVSXBD128,
17427   IX86_BUILTIN_PMOVSXBQ128,
17428   IX86_BUILTIN_PMOVSXWD128,
17429   IX86_BUILTIN_PMOVSXWQ128,
17430   IX86_BUILTIN_PMOVSXDQ128,
17431
17432   IX86_BUILTIN_PMOVZXBW128,
17433   IX86_BUILTIN_PMOVZXBD128,
17434   IX86_BUILTIN_PMOVZXBQ128,
17435   IX86_BUILTIN_PMOVZXWD128,
17436   IX86_BUILTIN_PMOVZXWQ128,
17437   IX86_BUILTIN_PMOVZXDQ128,
17438
17439   IX86_BUILTIN_PMULDQ128,
17440   IX86_BUILTIN_PMULLD128,
17441
17442   IX86_BUILTIN_ROUNDPD,
17443   IX86_BUILTIN_ROUNDPS,
17444   IX86_BUILTIN_ROUNDSD,
17445   IX86_BUILTIN_ROUNDSS,
17446
17447   IX86_BUILTIN_PTESTZ,
17448   IX86_BUILTIN_PTESTC,
17449   IX86_BUILTIN_PTESTNZC,
17450
17451   IX86_BUILTIN_VEC_INIT_V2SI,
17452   IX86_BUILTIN_VEC_INIT_V4HI,
17453   IX86_BUILTIN_VEC_INIT_V8QI,
17454   IX86_BUILTIN_VEC_EXT_V2DF,
17455   IX86_BUILTIN_VEC_EXT_V2DI,
17456   IX86_BUILTIN_VEC_EXT_V4SF,
17457   IX86_BUILTIN_VEC_EXT_V4SI,
17458   IX86_BUILTIN_VEC_EXT_V8HI,
17459   IX86_BUILTIN_VEC_EXT_V2SI,
17460   IX86_BUILTIN_VEC_EXT_V4HI,
17461   IX86_BUILTIN_VEC_EXT_V16QI,
17462   IX86_BUILTIN_VEC_SET_V2DI,
17463   IX86_BUILTIN_VEC_SET_V4SF,
17464   IX86_BUILTIN_VEC_SET_V4SI,
17465   IX86_BUILTIN_VEC_SET_V8HI,
17466   IX86_BUILTIN_VEC_SET_V4HI,
17467   IX86_BUILTIN_VEC_SET_V16QI,
17468
17469   IX86_BUILTIN_VEC_PACK_SFIX,
17470
17471   /* SSE4.2.  */
17472   IX86_BUILTIN_CRC32QI,
17473   IX86_BUILTIN_CRC32HI,
17474   IX86_BUILTIN_CRC32SI,
17475   IX86_BUILTIN_CRC32DI,
17476
17477   IX86_BUILTIN_PCMPESTRI128,
17478   IX86_BUILTIN_PCMPESTRM128,
17479   IX86_BUILTIN_PCMPESTRA128,
17480   IX86_BUILTIN_PCMPESTRC128,
17481   IX86_BUILTIN_PCMPESTRO128,
17482   IX86_BUILTIN_PCMPESTRS128,
17483   IX86_BUILTIN_PCMPESTRZ128,
17484   IX86_BUILTIN_PCMPISTRI128,
17485   IX86_BUILTIN_PCMPISTRM128,
17486   IX86_BUILTIN_PCMPISTRA128,
17487   IX86_BUILTIN_PCMPISTRC128,
17488   IX86_BUILTIN_PCMPISTRO128,
17489   IX86_BUILTIN_PCMPISTRS128,
17490   IX86_BUILTIN_PCMPISTRZ128,
17491
17492   IX86_BUILTIN_PCMPGTQ,
17493
17494   /* TFmode support builtins.  */
17495   IX86_BUILTIN_INFQ,
17496   IX86_BUILTIN_FABSQ,
17497   IX86_BUILTIN_COPYSIGNQ,
17498
17499   /* SSE5 instructions */
17500   IX86_BUILTIN_FMADDSS,
17501   IX86_BUILTIN_FMADDSD,
17502   IX86_BUILTIN_FMADDPS,
17503   IX86_BUILTIN_FMADDPD,
17504   IX86_BUILTIN_FMSUBSS,
17505   IX86_BUILTIN_FMSUBSD,
17506   IX86_BUILTIN_FMSUBPS,
17507   IX86_BUILTIN_FMSUBPD,
17508   IX86_BUILTIN_FNMADDSS,
17509   IX86_BUILTIN_FNMADDSD,
17510   IX86_BUILTIN_FNMADDPS,
17511   IX86_BUILTIN_FNMADDPD,
17512   IX86_BUILTIN_FNMSUBSS,
17513   IX86_BUILTIN_FNMSUBSD,
17514   IX86_BUILTIN_FNMSUBPS,
17515   IX86_BUILTIN_FNMSUBPD,
17516   IX86_BUILTIN_PCMOV_V2DI,
17517   IX86_BUILTIN_PCMOV_V4SI,
17518   IX86_BUILTIN_PCMOV_V8HI,
17519   IX86_BUILTIN_PCMOV_V16QI,
17520   IX86_BUILTIN_PCMOV_V4SF,
17521   IX86_BUILTIN_PCMOV_V2DF,
17522   IX86_BUILTIN_PPERM,
17523   IX86_BUILTIN_PERMPS,
17524   IX86_BUILTIN_PERMPD,
17525   IX86_BUILTIN_PMACSSWW,
17526   IX86_BUILTIN_PMACSWW,
17527   IX86_BUILTIN_PMACSSWD,
17528   IX86_BUILTIN_PMACSWD,
17529   IX86_BUILTIN_PMACSSDD,
17530   IX86_BUILTIN_PMACSDD,
17531   IX86_BUILTIN_PMACSSDQL,
17532   IX86_BUILTIN_PMACSSDQH,
17533   IX86_BUILTIN_PMACSDQL,
17534   IX86_BUILTIN_PMACSDQH,
17535   IX86_BUILTIN_PMADCSSWD,
17536   IX86_BUILTIN_PMADCSWD,
17537   IX86_BUILTIN_PHADDBW,
17538   IX86_BUILTIN_PHADDBD,
17539   IX86_BUILTIN_PHADDBQ,
17540   IX86_BUILTIN_PHADDWD,
17541   IX86_BUILTIN_PHADDWQ,
17542   IX86_BUILTIN_PHADDDQ,
17543   IX86_BUILTIN_PHADDUBW,
17544   IX86_BUILTIN_PHADDUBD,
17545   IX86_BUILTIN_PHADDUBQ,
17546   IX86_BUILTIN_PHADDUWD,
17547   IX86_BUILTIN_PHADDUWQ,
17548   IX86_BUILTIN_PHADDUDQ,
17549   IX86_BUILTIN_PHSUBBW,
17550   IX86_BUILTIN_PHSUBWD,
17551   IX86_BUILTIN_PHSUBDQ,
17552   IX86_BUILTIN_PROTB,
17553   IX86_BUILTIN_PROTW,
17554   IX86_BUILTIN_PROTD,
17555   IX86_BUILTIN_PROTQ,
17556   IX86_BUILTIN_PROTB_IMM,
17557   IX86_BUILTIN_PROTW_IMM,
17558   IX86_BUILTIN_PROTD_IMM,
17559   IX86_BUILTIN_PROTQ_IMM,
17560   IX86_BUILTIN_PSHLB,
17561   IX86_BUILTIN_PSHLW,
17562   IX86_BUILTIN_PSHLD,
17563   IX86_BUILTIN_PSHLQ,
17564   IX86_BUILTIN_PSHAB,
17565   IX86_BUILTIN_PSHAW,
17566   IX86_BUILTIN_PSHAD,
17567   IX86_BUILTIN_PSHAQ,
17568   IX86_BUILTIN_FRCZSS,
17569   IX86_BUILTIN_FRCZSD,
17570   IX86_BUILTIN_FRCZPS,
17571   IX86_BUILTIN_FRCZPD,
17572   IX86_BUILTIN_CVTPH2PS,
17573   IX86_BUILTIN_CVTPS2PH,
17574
17575   IX86_BUILTIN_COMEQSS,
17576   IX86_BUILTIN_COMNESS,
17577   IX86_BUILTIN_COMLTSS,
17578   IX86_BUILTIN_COMLESS,
17579   IX86_BUILTIN_COMGTSS,
17580   IX86_BUILTIN_COMGESS,
17581   IX86_BUILTIN_COMUEQSS,
17582   IX86_BUILTIN_COMUNESS,
17583   IX86_BUILTIN_COMULTSS,
17584   IX86_BUILTIN_COMULESS,
17585   IX86_BUILTIN_COMUGTSS,
17586   IX86_BUILTIN_COMUGESS,
17587   IX86_BUILTIN_COMORDSS,
17588   IX86_BUILTIN_COMUNORDSS,
17589   IX86_BUILTIN_COMFALSESS,
17590   IX86_BUILTIN_COMTRUESS,
17591
17592   IX86_BUILTIN_COMEQSD,
17593   IX86_BUILTIN_COMNESD,
17594   IX86_BUILTIN_COMLTSD,
17595   IX86_BUILTIN_COMLESD,
17596   IX86_BUILTIN_COMGTSD,
17597   IX86_BUILTIN_COMGESD,
17598   IX86_BUILTIN_COMUEQSD,
17599   IX86_BUILTIN_COMUNESD,
17600   IX86_BUILTIN_COMULTSD,
17601   IX86_BUILTIN_COMULESD,
17602   IX86_BUILTIN_COMUGTSD,
17603   IX86_BUILTIN_COMUGESD,
17604   IX86_BUILTIN_COMORDSD,
17605   IX86_BUILTIN_COMUNORDSD,
17606   IX86_BUILTIN_COMFALSESD,
17607   IX86_BUILTIN_COMTRUESD,
17608
17609   IX86_BUILTIN_COMEQPS,
17610   IX86_BUILTIN_COMNEPS,
17611   IX86_BUILTIN_COMLTPS,
17612   IX86_BUILTIN_COMLEPS,
17613   IX86_BUILTIN_COMGTPS,
17614   IX86_BUILTIN_COMGEPS,
17615   IX86_BUILTIN_COMUEQPS,
17616   IX86_BUILTIN_COMUNEPS,
17617   IX86_BUILTIN_COMULTPS,
17618   IX86_BUILTIN_COMULEPS,
17619   IX86_BUILTIN_COMUGTPS,
17620   IX86_BUILTIN_COMUGEPS,
17621   IX86_BUILTIN_COMORDPS,
17622   IX86_BUILTIN_COMUNORDPS,
17623   IX86_BUILTIN_COMFALSEPS,
17624   IX86_BUILTIN_COMTRUEPS,
17625
17626   IX86_BUILTIN_COMEQPD,
17627   IX86_BUILTIN_COMNEPD,
17628   IX86_BUILTIN_COMLTPD,
17629   IX86_BUILTIN_COMLEPD,
17630   IX86_BUILTIN_COMGTPD,
17631   IX86_BUILTIN_COMGEPD,
17632   IX86_BUILTIN_COMUEQPD,
17633   IX86_BUILTIN_COMUNEPD,
17634   IX86_BUILTIN_COMULTPD,
17635   IX86_BUILTIN_COMULEPD,
17636   IX86_BUILTIN_COMUGTPD,
17637   IX86_BUILTIN_COMUGEPD,
17638   IX86_BUILTIN_COMORDPD,
17639   IX86_BUILTIN_COMUNORDPD,
17640   IX86_BUILTIN_COMFALSEPD,
17641   IX86_BUILTIN_COMTRUEPD,
17642
17643   IX86_BUILTIN_PCOMEQUB,
17644   IX86_BUILTIN_PCOMNEUB,
17645   IX86_BUILTIN_PCOMLTUB,
17646   IX86_BUILTIN_PCOMLEUB,
17647   IX86_BUILTIN_PCOMGTUB,
17648   IX86_BUILTIN_PCOMGEUB,
17649   IX86_BUILTIN_PCOMFALSEUB,
17650   IX86_BUILTIN_PCOMTRUEUB,
17651   IX86_BUILTIN_PCOMEQUW,
17652   IX86_BUILTIN_PCOMNEUW,
17653   IX86_BUILTIN_PCOMLTUW,
17654   IX86_BUILTIN_PCOMLEUW,
17655   IX86_BUILTIN_PCOMGTUW,
17656   IX86_BUILTIN_PCOMGEUW,
17657   IX86_BUILTIN_PCOMFALSEUW,
17658   IX86_BUILTIN_PCOMTRUEUW,
17659   IX86_BUILTIN_PCOMEQUD,
17660   IX86_BUILTIN_PCOMNEUD,
17661   IX86_BUILTIN_PCOMLTUD,
17662   IX86_BUILTIN_PCOMLEUD,
17663   IX86_BUILTIN_PCOMGTUD,
17664   IX86_BUILTIN_PCOMGEUD,
17665   IX86_BUILTIN_PCOMFALSEUD,
17666   IX86_BUILTIN_PCOMTRUEUD,
17667   IX86_BUILTIN_PCOMEQUQ,
17668   IX86_BUILTIN_PCOMNEUQ,
17669   IX86_BUILTIN_PCOMLTUQ,
17670   IX86_BUILTIN_PCOMLEUQ,
17671   IX86_BUILTIN_PCOMGTUQ,
17672   IX86_BUILTIN_PCOMGEUQ,
17673   IX86_BUILTIN_PCOMFALSEUQ,
17674   IX86_BUILTIN_PCOMTRUEUQ,
17675
17676   IX86_BUILTIN_PCOMEQB,
17677   IX86_BUILTIN_PCOMNEB,
17678   IX86_BUILTIN_PCOMLTB,
17679   IX86_BUILTIN_PCOMLEB,
17680   IX86_BUILTIN_PCOMGTB,
17681   IX86_BUILTIN_PCOMGEB,
17682   IX86_BUILTIN_PCOMFALSEB,
17683   IX86_BUILTIN_PCOMTRUEB,
17684   IX86_BUILTIN_PCOMEQW,
17685   IX86_BUILTIN_PCOMNEW,
17686   IX86_BUILTIN_PCOMLTW,
17687   IX86_BUILTIN_PCOMLEW,
17688   IX86_BUILTIN_PCOMGTW,
17689   IX86_BUILTIN_PCOMGEW,
17690   IX86_BUILTIN_PCOMFALSEW,
17691   IX86_BUILTIN_PCOMTRUEW,
17692   IX86_BUILTIN_PCOMEQD,
17693   IX86_BUILTIN_PCOMNED,
17694   IX86_BUILTIN_PCOMLTD,
17695   IX86_BUILTIN_PCOMLED,
17696   IX86_BUILTIN_PCOMGTD,
17697   IX86_BUILTIN_PCOMGED,
17698   IX86_BUILTIN_PCOMFALSED,
17699   IX86_BUILTIN_PCOMTRUED,
17700   IX86_BUILTIN_PCOMEQQ,
17701   IX86_BUILTIN_PCOMNEQ,
17702   IX86_BUILTIN_PCOMLTQ,
17703   IX86_BUILTIN_PCOMLEQ,
17704   IX86_BUILTIN_PCOMGTQ,
17705   IX86_BUILTIN_PCOMGEQ,
17706   IX86_BUILTIN_PCOMFALSEQ,
17707   IX86_BUILTIN_PCOMTRUEQ,
17708
17709   IX86_BUILTIN_MAX
17710 };
17711
17712 /* Table for the ix86 builtin decls.  */
17713 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
17714
17715 /* Add an ix86 target builtin function with CODE, NAME and TYPE.  Do so,
17716  * if the target_flags include one of MASK.  Stores the function decl
17717  * in the ix86_builtins array.
17718  * Returns the function decl or NULL_TREE, if the builtin was not added.  */
17719
17720 static inline tree
17721 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
17722 {
17723   tree decl = NULL_TREE;
17724
17725   if (mask & ix86_isa_flags
17726       && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
17727     {
17728       decl = add_builtin_function (name, type, code, BUILT_IN_MD,
17729                                    NULL, NULL_TREE);
17730       ix86_builtins[(int) code] = decl;
17731     }
17732
17733   return decl;
17734 }
17735
17736 /* Like def_builtin, but also marks the function decl "const".  */
17737
17738 static inline tree
17739 def_builtin_const (int mask, const char *name, tree type,
17740                    enum ix86_builtins code)
17741 {
17742   tree decl = def_builtin (mask, name, type, code);
17743   if (decl)
17744     TREE_READONLY (decl) = 1;
17745   return decl;
17746 }
17747
17748 /* Bits for builtin_description.flag.  */
17749
17750 /* Set when we don't support the comparison natively, and should
17751    swap_comparison in order to support it.  */
17752 #define BUILTIN_DESC_SWAP_OPERANDS      1
17753
17754 struct builtin_description
17755 {
17756   const unsigned int mask;
17757   const enum insn_code icode;
17758   const char *const name;
17759   const enum ix86_builtins code;
17760   const enum rtx_code comparison;
17761   const int flag;
17762 };
17763
17764 static const struct builtin_description bdesc_comi[] =
17765 {
17766   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
17767   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
17768   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
17769   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
17770   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
17771   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
17772   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
17773   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
17774   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
17775   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
17776   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
17777   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
17778   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
17779   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
17780   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
17781   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
17782   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
17783   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
17784   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
17785   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
17786   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
17787   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
17788   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
17789   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
17790 };
17791
17792 static const struct builtin_description bdesc_ptest[] =
17793 {
17794   /* SSE4.1 */
17795   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
17796   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
17797   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
17798 };
17799
17800 static const struct builtin_description bdesc_pcmpestr[] =
17801 {
17802   /* SSE4.2 */
17803   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
17804   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
17805   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
17806   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
17807   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
17808   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
17809   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
17810 };
17811
17812 static const struct builtin_description bdesc_pcmpistr[] =
17813 {
17814   /* SSE4.2 */
17815   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
17816   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
17817   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
17818   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
17819   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
17820   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
17821   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
17822 };
17823
17824 static const struct builtin_description bdesc_crc32[] =
17825 {
17826   /* SSE4.2 */
17827   { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
17828   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
17829   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
17830   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
17831 };
17832
17833 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0.  */
17834 static const struct builtin_description bdesc_sse_3arg[] =
17835 {
17836   /* SSE4.1 */
17837   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
17838   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
17839   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
17840   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
17841   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
17842   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
17843   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
17844   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
17845   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
17846   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
17847   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
17848   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
17849 };
17850
17851 static const struct builtin_description bdesc_2arg[] =
17852 {
17853   /* SSE */
17854   { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
17855   { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
17856   { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
17857   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
17858   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
17859   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
17860   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
17861   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
17862
17863   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
17864   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
17865   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
17866   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
17867   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
17868   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
17869   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
17870   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
17871   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
17872   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17873   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17874   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
17875   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
17876   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
17877   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
17878   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
17879   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
17880   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
17881   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
17882   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17883   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17884   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
17885
17886   { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
17887   { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
17888   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
17889   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
17890
17891   { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
17892   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
17893   { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
17894   { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
17895
17896   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
17897   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
17898   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
17899   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
17900   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
17901
17902   /* MMX */
17903   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
17904   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
17905   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
17906   { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
17907   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
17908   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
17909   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
17910   { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
17911
17912   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
17913   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
17914   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
17915   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
17916   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
17917   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
17918   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
17919   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
17920
17921   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
17922   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
17923   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
17924
17925   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
17926   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
17927   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
17928   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
17929
17930   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
17931   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
17932
17933   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
17934   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
17935   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
17936   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
17937   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
17938   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
17939
17940   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
17941   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
17942   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
17943   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
17944
17945   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
17946   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
17947   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
17948   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
17949   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
17950   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
17951
17952   /* Special.  */
17953   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
17954   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
17955   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
17956
17957   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
17958   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
17959   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
17960
17961   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
17962   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
17963   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
17964   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
17965   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
17966   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
17967
17968   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
17969   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
17970   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
17971   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
17972   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
17973   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
17974
17975   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
17976   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
17977   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
17978   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
17979
17980   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
17981   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
17982
17983   /* SSE2 */
17984   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
17985   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
17986   { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
17987   { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
17988   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
17989   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
17990   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
17991   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
17992
17993   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
17994   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
17995   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
17996   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
17997   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
17998   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
17999   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
18000   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
18001   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
18002   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
18003   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
18004   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
18005   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
18006   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
18007   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
18008   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
18009   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
18010   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
18011   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
18012   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
18013
18014   { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
18015   { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
18016   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
18017   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
18018
18019   { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
18020   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
18021   { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
18022   { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
18023
18024   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
18025   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
18026   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
18027
18028   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
18029
18030   /* SSE2 MMX */
18031   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
18032   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
18033   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
18034   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
18035   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
18036   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
18037   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
18038   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
18039
18040   { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
18041   { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
18042   { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
18043   { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
18044   { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
18045   { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
18046   { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
18047   { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
18048
18049   { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
18050   { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
18051
18052   { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
18053   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
18054   { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
18055   { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
18056
18057   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
18058   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
18059
18060   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
18061   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
18062   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
18063   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
18064   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
18065   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
18066
18067   { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
18068   { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
18069   { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
18070   { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
18071
18072   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
18073   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
18074   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
18075   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
18076   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
18077   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
18078   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
18079   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
18080
18081   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
18082   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
18083   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
18084
18085   { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
18086   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
18087
18088   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
18089   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
18090
18091   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
18092   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
18093   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
18094
18095   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
18096   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
18097   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
18098
18099   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
18100   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
18101
18102   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
18103
18104   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
18105   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
18106   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
18107   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
18108
18109   /* SSE3 MMX */
18110   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
18111   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
18112   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
18113   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
18114   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
18115   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
18116
18117   /* SSSE3 */
18118   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
18119   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
18120   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
18121   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
18122   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
18123   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
18124   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
18125   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
18126   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
18127   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
18128   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
18129   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
18130   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
18131   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
18132   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
18133   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
18134   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
18135   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
18136   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
18137   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
18138   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
18139   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
18140   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
18141   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
18142
18143   /* SSE4.1 */
18144   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
18145   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
18146   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
18147   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
18148   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
18149   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
18150   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
18151   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
18152   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
18153   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
18154   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
18155   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
18156
18157   /* SSE4.2 */
18158   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
18159 };
18160
18161 static const struct builtin_description bdesc_1arg[] =
18162 {
18163   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
18164   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
18165
18166   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
18167   { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS_NR, UNKNOWN, 0 },
18168   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
18169   { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, 0 },
18170   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
18171
18172   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
18173   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
18174   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
18175   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
18176   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
18177   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
18178
18179   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
18180   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
18181
18182   { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
18183
18184   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
18185   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
18186
18187   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
18188   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
18189   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
18190   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
18191   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
18192
18193   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
18194
18195   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
18196   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
18197   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
18198   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
18199
18200   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
18201   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
18202   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
18203
18204   /* SSE3 */
18205   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
18206   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
18207
18208   /* SSSE3 */
18209   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
18210   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
18211   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
18212   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
18213   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
18214   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
18215
18216   /* SSE4.1 */
18217   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
18218   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
18219   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
18220   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
18221   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
18222   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
18223   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
18224   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
18225   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
18226   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
18227   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
18228   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
18229   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
18230
18231   /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg.  */
18232   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
18233   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
18234 };
18235
18236 /* SSE5 */
18237 enum multi_arg_type {
18238   MULTI_ARG_UNKNOWN,
18239   MULTI_ARG_3_SF,
18240   MULTI_ARG_3_DF,
18241   MULTI_ARG_3_DI,
18242   MULTI_ARG_3_SI,
18243   MULTI_ARG_3_SI_DI,
18244   MULTI_ARG_3_HI,
18245   MULTI_ARG_3_HI_SI,
18246   MULTI_ARG_3_QI,
18247   MULTI_ARG_3_PERMPS,
18248   MULTI_ARG_3_PERMPD,
18249   MULTI_ARG_2_SF,
18250   MULTI_ARG_2_DF,
18251   MULTI_ARG_2_DI,
18252   MULTI_ARG_2_SI,
18253   MULTI_ARG_2_HI,
18254   MULTI_ARG_2_QI,
18255   MULTI_ARG_2_DI_IMM,
18256   MULTI_ARG_2_SI_IMM,
18257   MULTI_ARG_2_HI_IMM,
18258   MULTI_ARG_2_QI_IMM,
18259   MULTI_ARG_2_SF_CMP,
18260   MULTI_ARG_2_DF_CMP,
18261   MULTI_ARG_2_DI_CMP,
18262   MULTI_ARG_2_SI_CMP,
18263   MULTI_ARG_2_HI_CMP,
18264   MULTI_ARG_2_QI_CMP,
18265   MULTI_ARG_2_DI_TF,
18266   MULTI_ARG_2_SI_TF,
18267   MULTI_ARG_2_HI_TF,
18268   MULTI_ARG_2_QI_TF,
18269   MULTI_ARG_2_SF_TF,
18270   MULTI_ARG_2_DF_TF,
18271   MULTI_ARG_1_SF,
18272   MULTI_ARG_1_DF,
18273   MULTI_ARG_1_DI,
18274   MULTI_ARG_1_SI,
18275   MULTI_ARG_1_HI,
18276   MULTI_ARG_1_QI,
18277   MULTI_ARG_1_SI_DI,
18278   MULTI_ARG_1_HI_DI,
18279   MULTI_ARG_1_HI_SI,
18280   MULTI_ARG_1_QI_DI,
18281   MULTI_ARG_1_QI_SI,
18282   MULTI_ARG_1_QI_HI,
18283   MULTI_ARG_1_PH2PS,
18284   MULTI_ARG_1_PS2PH
18285 };
18286
18287 static const struct builtin_description bdesc_multi_arg[] =
18288 {
18289   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4,     "__builtin_ia32_fmaddss",    IX86_BUILTIN_FMADDSS,    0,            (int)MULTI_ARG_3_SF },
18290   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4,     "__builtin_ia32_fmaddsd",    IX86_BUILTIN_FMADDSD,    0,            (int)MULTI_ARG_3_DF },
18291   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4,       "__builtin_ia32_fmaddps",    IX86_BUILTIN_FMADDPS,    0,            (int)MULTI_ARG_3_SF },
18292   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4,       "__builtin_ia32_fmaddpd",    IX86_BUILTIN_FMADDPD,    0,            (int)MULTI_ARG_3_DF },
18293   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4,     "__builtin_ia32_fmsubss",    IX86_BUILTIN_FMSUBSS,    0,            (int)MULTI_ARG_3_SF },
18294   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4,     "__builtin_ia32_fmsubsd",    IX86_BUILTIN_FMSUBSD,    0,            (int)MULTI_ARG_3_DF },
18295   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4,       "__builtin_ia32_fmsubps",    IX86_BUILTIN_FMSUBPS,    0,            (int)MULTI_ARG_3_SF },
18296   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4,       "__builtin_ia32_fmsubpd",    IX86_BUILTIN_FMSUBPD,    0,            (int)MULTI_ARG_3_DF },
18297   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4,    "__builtin_ia32_fnmaddss",   IX86_BUILTIN_FNMADDSS,   0,            (int)MULTI_ARG_3_SF },
18298   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4,    "__builtin_ia32_fnmaddsd",   IX86_BUILTIN_FNMADDSD,   0,            (int)MULTI_ARG_3_DF },
18299   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4,      "__builtin_ia32_fnmaddps",   IX86_BUILTIN_FNMADDPS,   0,            (int)MULTI_ARG_3_SF },
18300   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4,      "__builtin_ia32_fnmaddpd",   IX86_BUILTIN_FNMADDPD,   0,            (int)MULTI_ARG_3_DF },
18301   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4,    "__builtin_ia32_fnmsubss",   IX86_BUILTIN_FNMSUBSS,   0,            (int)MULTI_ARG_3_SF },
18302   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4,    "__builtin_ia32_fnmsubsd",   IX86_BUILTIN_FNMSUBSD,   0,            (int)MULTI_ARG_3_DF },
18303   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4,      "__builtin_ia32_fnmsubps",   IX86_BUILTIN_FNMSUBPS,   0,            (int)MULTI_ARG_3_SF },
18304   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4,      "__builtin_ia32_fnmsubpd",   IX86_BUILTIN_FNMSUBPD,   0,            (int)MULTI_ARG_3_DF },
18305   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di,        "__builtin_ia32_pcmov",      IX86_BUILTIN_PCMOV_V2DI, 0,            (int)MULTI_ARG_3_DI },
18306   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di,        "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0,            (int)MULTI_ARG_3_DI },
18307   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si,        "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0,            (int)MULTI_ARG_3_SI },
18308   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi,        "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0,            (int)MULTI_ARG_3_HI },
18309   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi,       "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0,            (int)MULTI_ARG_3_QI },
18310   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df,        "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0,            (int)MULTI_ARG_3_DF },
18311   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf,        "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0,            (int)MULTI_ARG_3_SF },
18312   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm,             "__builtin_ia32_pperm",      IX86_BUILTIN_PPERM,      0,            (int)MULTI_ARG_3_QI },
18313   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf,          "__builtin_ia32_permps",     IX86_BUILTIN_PERMPS,     0,            (int)MULTI_ARG_3_PERMPS },
18314   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df,          "__builtin_ia32_permpd",     IX86_BUILTIN_PERMPD,     0,            (int)MULTI_ARG_3_PERMPD },
18315   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww,          "__builtin_ia32_pmacssww",   IX86_BUILTIN_PMACSSWW,   0,            (int)MULTI_ARG_3_HI },
18316   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww,           "__builtin_ia32_pmacsww",    IX86_BUILTIN_PMACSWW,    0,            (int)MULTI_ARG_3_HI },
18317   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd,          "__builtin_ia32_pmacsswd",   IX86_BUILTIN_PMACSSWD,   0,            (int)MULTI_ARG_3_HI_SI },
18318   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd,           "__builtin_ia32_pmacswd",    IX86_BUILTIN_PMACSWD,    0,            (int)MULTI_ARG_3_HI_SI },
18319   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd,          "__builtin_ia32_pmacssdd",   IX86_BUILTIN_PMACSSDD,   0,            (int)MULTI_ARG_3_SI },
18320   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd,           "__builtin_ia32_pmacsdd",    IX86_BUILTIN_PMACSDD,    0,            (int)MULTI_ARG_3_SI },
18321   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql,         "__builtin_ia32_pmacssdql",  IX86_BUILTIN_PMACSSDQL,  0,            (int)MULTI_ARG_3_SI_DI },
18322   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh,         "__builtin_ia32_pmacssdqh",  IX86_BUILTIN_PMACSSDQH,  0,            (int)MULTI_ARG_3_SI_DI },
18323   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql,          "__builtin_ia32_pmacsdql",   IX86_BUILTIN_PMACSDQL,   0,            (int)MULTI_ARG_3_SI_DI },
18324   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh,          "__builtin_ia32_pmacsdqh",   IX86_BUILTIN_PMACSDQH,   0,            (int)MULTI_ARG_3_SI_DI },
18325   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd,         "__builtin_ia32_pmadcsswd",  IX86_BUILTIN_PMADCSSWD,  0,            (int)MULTI_ARG_3_HI_SI },
18326   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd,          "__builtin_ia32_pmadcswd",   IX86_BUILTIN_PMADCSWD,   0,            (int)MULTI_ARG_3_HI_SI },
18327   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3,         "__builtin_ia32_protq",      IX86_BUILTIN_PROTQ,      0,            (int)MULTI_ARG_2_DI },
18328   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3,         "__builtin_ia32_protd",      IX86_BUILTIN_PROTD,      0,            (int)MULTI_ARG_2_SI },
18329   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3,         "__builtin_ia32_protw",      IX86_BUILTIN_PROTW,      0,            (int)MULTI_ARG_2_HI },
18330   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3,        "__builtin_ia32_protb",      IX86_BUILTIN_PROTB,      0,            (int)MULTI_ARG_2_QI },
18331   { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv2di3,              "__builtin_ia32_protqi",     IX86_BUILTIN_PROTQ_IMM,  0,            (int)MULTI_ARG_2_DI_IMM },
18332   { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv4si3,              "__builtin_ia32_protdi",     IX86_BUILTIN_PROTD_IMM,  0,            (int)MULTI_ARG_2_SI_IMM },
18333   { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv8hi3,              "__builtin_ia32_protwi",     IX86_BUILTIN_PROTW_IMM,  0,            (int)MULTI_ARG_2_HI_IMM },
18334   { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv16qi3,             "__builtin_ia32_protbi",     IX86_BUILTIN_PROTB_IMM,  0,            (int)MULTI_ARG_2_QI_IMM },
18335   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3,         "__builtin_ia32_pshaq",      IX86_BUILTIN_PSHAQ,      0,            (int)MULTI_ARG_2_DI },
18336   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3,         "__builtin_ia32_pshad",      IX86_BUILTIN_PSHAD,      0,            (int)MULTI_ARG_2_SI },
18337   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3,         "__builtin_ia32_pshaw",      IX86_BUILTIN_PSHAW,      0,            (int)MULTI_ARG_2_HI },
18338   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3,        "__builtin_ia32_pshab",      IX86_BUILTIN_PSHAB,      0,            (int)MULTI_ARG_2_QI },
18339   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3,         "__builtin_ia32_pshlq",      IX86_BUILTIN_PSHLQ,      0,            (int)MULTI_ARG_2_DI },
18340   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3,         "__builtin_ia32_pshld",      IX86_BUILTIN_PSHLD,      0,            (int)MULTI_ARG_2_SI },
18341   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3,         "__builtin_ia32_pshlw",      IX86_BUILTIN_PSHLW,      0,            (int)MULTI_ARG_2_HI },
18342   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3,        "__builtin_ia32_pshlb",      IX86_BUILTIN_PSHLB,      0,            (int)MULTI_ARG_2_QI },
18343   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2,       "__builtin_ia32_frczss",     IX86_BUILTIN_FRCZSS,     0,            (int)MULTI_ARG_2_SF },
18344   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2,       "__builtin_ia32_frczsd",     IX86_BUILTIN_FRCZSD,     0,            (int)MULTI_ARG_2_DF },
18345   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2,         "__builtin_ia32_frczps",     IX86_BUILTIN_FRCZPS,     0,            (int)MULTI_ARG_1_SF },
18346   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2,         "__builtin_ia32_frczpd",     IX86_BUILTIN_FRCZPD,     0,            (int)MULTI_ARG_1_DF },
18347   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps,          "__builtin_ia32_cvtph2ps",   IX86_BUILTIN_CVTPH2PS,   0,            (int)MULTI_ARG_1_PH2PS },
18348   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph,          "__builtin_ia32_cvtps2ph",   IX86_BUILTIN_CVTPS2PH,   0,            (int)MULTI_ARG_1_PS2PH },
18349   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw,           "__builtin_ia32_phaddbw",    IX86_BUILTIN_PHADDBW,    0,            (int)MULTI_ARG_1_QI_HI },
18350   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd,           "__builtin_ia32_phaddbd",    IX86_BUILTIN_PHADDBD,    0,            (int)MULTI_ARG_1_QI_SI },
18351   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq,           "__builtin_ia32_phaddbq",    IX86_BUILTIN_PHADDBQ,    0,            (int)MULTI_ARG_1_QI_DI },
18352   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd,           "__builtin_ia32_phaddwd",    IX86_BUILTIN_PHADDWD,    0,            (int)MULTI_ARG_1_HI_SI },
18353   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq,           "__builtin_ia32_phaddwq",    IX86_BUILTIN_PHADDWQ,    0,            (int)MULTI_ARG_1_HI_DI },
18354   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq,           "__builtin_ia32_phadddq",    IX86_BUILTIN_PHADDDQ,    0,            (int)MULTI_ARG_1_SI_DI },
18355   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw,          "__builtin_ia32_phaddubw",   IX86_BUILTIN_PHADDUBW,   0,            (int)MULTI_ARG_1_QI_HI },
18356   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd,          "__builtin_ia32_phaddubd",   IX86_BUILTIN_PHADDUBD,   0,            (int)MULTI_ARG_1_QI_SI },
18357   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq,          "__builtin_ia32_phaddubq",   IX86_BUILTIN_PHADDUBQ,   0,            (int)MULTI_ARG_1_QI_DI },
18358   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd,          "__builtin_ia32_phadduwd",   IX86_BUILTIN_PHADDUWD,   0,            (int)MULTI_ARG_1_HI_SI },
18359   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq,          "__builtin_ia32_phadduwq",   IX86_BUILTIN_PHADDUWQ,   0,            (int)MULTI_ARG_1_HI_DI },
18360   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq,          "__builtin_ia32_phaddudq",   IX86_BUILTIN_PHADDUDQ,   0,            (int)MULTI_ARG_1_SI_DI },
18361   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw,           "__builtin_ia32_phsubbw",    IX86_BUILTIN_PHSUBBW,    0,            (int)MULTI_ARG_1_QI_HI },
18362   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd,           "__builtin_ia32_phsubwd",    IX86_BUILTIN_PHSUBWD,    0,            (int)MULTI_ARG_1_HI_SI },
18363   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq,           "__builtin_ia32_phsubdq",    IX86_BUILTIN_PHSUBDQ,    0,            (int)MULTI_ARG_1_SI_DI },
18364
18365   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comeqss",    IX86_BUILTIN_COMEQSS,    EQ,           (int)MULTI_ARG_2_SF_CMP },
18366   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comness",    IX86_BUILTIN_COMNESS,    NE,           (int)MULTI_ARG_2_SF_CMP },
18367   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comneqss",   IX86_BUILTIN_COMNESS,    NE,           (int)MULTI_ARG_2_SF_CMP },
18368   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comltss",    IX86_BUILTIN_COMLTSS,    LT,           (int)MULTI_ARG_2_SF_CMP },
18369   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comless",    IX86_BUILTIN_COMLESS,    LE,           (int)MULTI_ARG_2_SF_CMP },
18370   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comgtss",    IX86_BUILTIN_COMGTSS,    GT,           (int)MULTI_ARG_2_SF_CMP },
18371   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comgess",    IX86_BUILTIN_COMGESS,    GE,           (int)MULTI_ARG_2_SF_CMP },
18372   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comueqss",   IX86_BUILTIN_COMUEQSS,   UNEQ,         (int)MULTI_ARG_2_SF_CMP },
18373   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comuness",   IX86_BUILTIN_COMUNESS,   LTGT,         (int)MULTI_ARG_2_SF_CMP },
18374   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comuneqss",  IX86_BUILTIN_COMUNESS,   LTGT,         (int)MULTI_ARG_2_SF_CMP },
18375   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comunltss",  IX86_BUILTIN_COMULTSS,   UNLT,         (int)MULTI_ARG_2_SF_CMP },
18376   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comunless",  IX86_BUILTIN_COMULESS,   UNLE,         (int)MULTI_ARG_2_SF_CMP },
18377   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comungtss",  IX86_BUILTIN_COMUGTSS,   UNGT,         (int)MULTI_ARG_2_SF_CMP },
18378   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comungess",  IX86_BUILTIN_COMUGESS,   UNGE,         (int)MULTI_ARG_2_SF_CMP },
18379   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comordss",   IX86_BUILTIN_COMORDSS,   ORDERED,      (int)MULTI_ARG_2_SF_CMP },
18380   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED,    (int)MULTI_ARG_2_SF_CMP },
18381
18382   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comeqsd",    IX86_BUILTIN_COMEQSD,    EQ,           (int)MULTI_ARG_2_DF_CMP },
18383   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comnesd",    IX86_BUILTIN_COMNESD,    NE,           (int)MULTI_ARG_2_DF_CMP },
18384   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comneqsd",   IX86_BUILTIN_COMNESD,    NE,           (int)MULTI_ARG_2_DF_CMP },
18385   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comltsd",    IX86_BUILTIN_COMLTSD,    LT,           (int)MULTI_ARG_2_DF_CMP },
18386   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comlesd",    IX86_BUILTIN_COMLESD,    LE,           (int)MULTI_ARG_2_DF_CMP },
18387   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comgtsd",    IX86_BUILTIN_COMGTSD,    GT,           (int)MULTI_ARG_2_DF_CMP },
18388   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comgesd",    IX86_BUILTIN_COMGESD,    GE,           (int)MULTI_ARG_2_DF_CMP },
18389   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comueqsd",   IX86_BUILTIN_COMUEQSD,   UNEQ,         (int)MULTI_ARG_2_DF_CMP },
18390   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comunesd",   IX86_BUILTIN_COMUNESD,   LTGT,         (int)MULTI_ARG_2_DF_CMP },
18391   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comuneqsd",  IX86_BUILTIN_COMUNESD,   LTGT,         (int)MULTI_ARG_2_DF_CMP },
18392   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comunltsd",  IX86_BUILTIN_COMULTSD,   UNLT,         (int)MULTI_ARG_2_DF_CMP },
18393   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comunlesd",  IX86_BUILTIN_COMULESD,   UNLE,         (int)MULTI_ARG_2_DF_CMP },
18394   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comungtsd",  IX86_BUILTIN_COMUGTSD,   UNGT,         (int)MULTI_ARG_2_DF_CMP },
18395   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comungesd",  IX86_BUILTIN_COMUGESD,   UNGE,         (int)MULTI_ARG_2_DF_CMP },
18396   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comordsd",   IX86_BUILTIN_COMORDSD,   ORDERED,      (int)MULTI_ARG_2_DF_CMP },
18397   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED,    (int)MULTI_ARG_2_DF_CMP },
18398
18399   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comeqps",    IX86_BUILTIN_COMEQPS,    EQ,           (int)MULTI_ARG_2_SF_CMP },
18400   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comneps",    IX86_BUILTIN_COMNEPS,    NE,           (int)MULTI_ARG_2_SF_CMP },
18401   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comneqps",   IX86_BUILTIN_COMNEPS,    NE,           (int)MULTI_ARG_2_SF_CMP },
18402   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comltps",    IX86_BUILTIN_COMLTPS,    LT,           (int)MULTI_ARG_2_SF_CMP },
18403   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comleps",    IX86_BUILTIN_COMLEPS,    LE,           (int)MULTI_ARG_2_SF_CMP },
18404   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comgtps",    IX86_BUILTIN_COMGTPS,    GT,           (int)MULTI_ARG_2_SF_CMP },
18405   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comgeps",    IX86_BUILTIN_COMGEPS,    GE,           (int)MULTI_ARG_2_SF_CMP },
18406   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comueqps",   IX86_BUILTIN_COMUEQPS,   UNEQ,         (int)MULTI_ARG_2_SF_CMP },
18407   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comuneps",   IX86_BUILTIN_COMUNEPS,   LTGT,         (int)MULTI_ARG_2_SF_CMP },
18408   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comuneqps",  IX86_BUILTIN_COMUNEPS,   LTGT,         (int)MULTI_ARG_2_SF_CMP },
18409   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comunltps",  IX86_BUILTIN_COMULTPS,   UNLT,         (int)MULTI_ARG_2_SF_CMP },
18410   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comunleps",  IX86_BUILTIN_COMULEPS,   UNLE,         (int)MULTI_ARG_2_SF_CMP },
18411   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comungtps",  IX86_BUILTIN_COMUGTPS,   UNGT,         (int)MULTI_ARG_2_SF_CMP },
18412   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comungeps",  IX86_BUILTIN_COMUGEPS,   UNGE,         (int)MULTI_ARG_2_SF_CMP },
18413   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comordps",   IX86_BUILTIN_COMORDPS,   ORDERED,      (int)MULTI_ARG_2_SF_CMP },
18414   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED,    (int)MULTI_ARG_2_SF_CMP },
18415
18416   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comeqpd",    IX86_BUILTIN_COMEQPD,    EQ,           (int)MULTI_ARG_2_DF_CMP },
18417   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comnepd",    IX86_BUILTIN_COMNEPD,    NE,           (int)MULTI_ARG_2_DF_CMP },
18418   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comneqpd",   IX86_BUILTIN_COMNEPD,    NE,           (int)MULTI_ARG_2_DF_CMP },
18419   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comltpd",    IX86_BUILTIN_COMLTPD,    LT,           (int)MULTI_ARG_2_DF_CMP },
18420   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comlepd",    IX86_BUILTIN_COMLEPD,    LE,           (int)MULTI_ARG_2_DF_CMP },
18421   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comgtpd",    IX86_BUILTIN_COMGTPD,    GT,           (int)MULTI_ARG_2_DF_CMP },
18422   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comgepd",    IX86_BUILTIN_COMGEPD,    GE,           (int)MULTI_ARG_2_DF_CMP },
18423   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comueqpd",   IX86_BUILTIN_COMUEQPD,   UNEQ,         (int)MULTI_ARG_2_DF_CMP },
18424   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comunepd",   IX86_BUILTIN_COMUNEPD,   LTGT,         (int)MULTI_ARG_2_DF_CMP },
18425   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comuneqpd",  IX86_BUILTIN_COMUNEPD,   LTGT,         (int)MULTI_ARG_2_DF_CMP },
18426   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comunltpd",  IX86_BUILTIN_COMULTPD,   UNLT,         (int)MULTI_ARG_2_DF_CMP },
18427   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comunlepd",  IX86_BUILTIN_COMULEPD,   UNLE,         (int)MULTI_ARG_2_DF_CMP },
18428   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comungtpd",  IX86_BUILTIN_COMUGTPD,   UNGT,         (int)MULTI_ARG_2_DF_CMP },
18429   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comungepd",  IX86_BUILTIN_COMUGEPD,   UNGE,         (int)MULTI_ARG_2_DF_CMP },
18430   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comordpd",   IX86_BUILTIN_COMORDPD,   ORDERED,      (int)MULTI_ARG_2_DF_CMP },
18431   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED,    (int)MULTI_ARG_2_DF_CMP },
18432
18433   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomeqb",    IX86_BUILTIN_PCOMEQB,    EQ,           (int)MULTI_ARG_2_QI_CMP },
18434   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomneb",    IX86_BUILTIN_PCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
18435   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomneqb",   IX86_BUILTIN_PCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
18436   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomltb",    IX86_BUILTIN_PCOMLTB,    LT,           (int)MULTI_ARG_2_QI_CMP },
18437   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomleb",    IX86_BUILTIN_PCOMLEB,    LE,           (int)MULTI_ARG_2_QI_CMP },
18438   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomgtb",    IX86_BUILTIN_PCOMGTB,    GT,           (int)MULTI_ARG_2_QI_CMP },
18439   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomgeb",    IX86_BUILTIN_PCOMGEB,    GE,           (int)MULTI_ARG_2_QI_CMP },
18440
18441   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomeqw",    IX86_BUILTIN_PCOMEQW,    EQ,           (int)MULTI_ARG_2_HI_CMP },
18442   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomnew",    IX86_BUILTIN_PCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
18443   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomneqw",   IX86_BUILTIN_PCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
18444   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomltw",    IX86_BUILTIN_PCOMLTW,    LT,           (int)MULTI_ARG_2_HI_CMP },
18445   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomlew",    IX86_BUILTIN_PCOMLEW,    LE,           (int)MULTI_ARG_2_HI_CMP },
18446   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomgtw",    IX86_BUILTIN_PCOMGTW,    GT,           (int)MULTI_ARG_2_HI_CMP },
18447   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomgew",    IX86_BUILTIN_PCOMGEW,    GE,           (int)MULTI_ARG_2_HI_CMP },
18448
18449   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomeqd",    IX86_BUILTIN_PCOMEQD,    EQ,           (int)MULTI_ARG_2_SI_CMP },
18450   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomned",    IX86_BUILTIN_PCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
18451   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomneqd",   IX86_BUILTIN_PCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
18452   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomltd",    IX86_BUILTIN_PCOMLTD,    LT,           (int)MULTI_ARG_2_SI_CMP },
18453   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomled",    IX86_BUILTIN_PCOMLED,    LE,           (int)MULTI_ARG_2_SI_CMP },
18454   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomgtd",    IX86_BUILTIN_PCOMGTD,    GT,           (int)MULTI_ARG_2_SI_CMP },
18455   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomged",    IX86_BUILTIN_PCOMGED,    GE,           (int)MULTI_ARG_2_SI_CMP },
18456
18457   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomeqq",    IX86_BUILTIN_PCOMEQQ,    EQ,           (int)MULTI_ARG_2_DI_CMP },
18458   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomneq",    IX86_BUILTIN_PCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
18459   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomneqq",   IX86_BUILTIN_PCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
18460   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomltq",    IX86_BUILTIN_PCOMLTQ,    LT,           (int)MULTI_ARG_2_DI_CMP },
18461   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomleq",    IX86_BUILTIN_PCOMLEQ,    LE,           (int)MULTI_ARG_2_DI_CMP },
18462   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomgtq",    IX86_BUILTIN_PCOMGTQ,    GT,           (int)MULTI_ARG_2_DI_CMP },
18463   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomgeq",    IX86_BUILTIN_PCOMGEQ,    GE,           (int)MULTI_ARG_2_DI_CMP },
18464
18465   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb",   IX86_BUILTIN_PCOMEQUB,   EQ,           (int)MULTI_ARG_2_QI_CMP },
18466   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub",   IX86_BUILTIN_PCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
18467   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb",  IX86_BUILTIN_PCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
18468   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub",   IX86_BUILTIN_PCOMLTUB,   LTU,          (int)MULTI_ARG_2_QI_CMP },
18469   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub",   IX86_BUILTIN_PCOMLEUB,   LEU,          (int)MULTI_ARG_2_QI_CMP },
18470   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub",   IX86_BUILTIN_PCOMGTUB,   GTU,          (int)MULTI_ARG_2_QI_CMP },
18471   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub",   IX86_BUILTIN_PCOMGEUB,   GEU,          (int)MULTI_ARG_2_QI_CMP },
18472
18473   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw",   IX86_BUILTIN_PCOMEQUW,   EQ,           (int)MULTI_ARG_2_HI_CMP },
18474   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw",   IX86_BUILTIN_PCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
18475   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw",  IX86_BUILTIN_PCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
18476   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3,  "__builtin_ia32_pcomltuw",   IX86_BUILTIN_PCOMLTUW,   LTU,          (int)MULTI_ARG_2_HI_CMP },
18477   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3,  "__builtin_ia32_pcomleuw",   IX86_BUILTIN_PCOMLEUW,   LEU,          (int)MULTI_ARG_2_HI_CMP },
18478   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3,  "__builtin_ia32_pcomgtuw",   IX86_BUILTIN_PCOMGTUW,   GTU,          (int)MULTI_ARG_2_HI_CMP },
18479   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3,  "__builtin_ia32_pcomgeuw",   IX86_BUILTIN_PCOMGEUW,   GEU,          (int)MULTI_ARG_2_HI_CMP },
18480
18481   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd",   IX86_BUILTIN_PCOMEQUD,   EQ,           (int)MULTI_ARG_2_SI_CMP },
18482   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud",   IX86_BUILTIN_PCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
18483   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd",  IX86_BUILTIN_PCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
18484   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3,  "__builtin_ia32_pcomltud",   IX86_BUILTIN_PCOMLTUD,   LTU,          (int)MULTI_ARG_2_SI_CMP },
18485   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3,  "__builtin_ia32_pcomleud",   IX86_BUILTIN_PCOMLEUD,   LEU,          (int)MULTI_ARG_2_SI_CMP },
18486   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3,  "__builtin_ia32_pcomgtud",   IX86_BUILTIN_PCOMGTUD,   GTU,          (int)MULTI_ARG_2_SI_CMP },
18487   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3,  "__builtin_ia32_pcomgeud",   IX86_BUILTIN_PCOMGEUD,   GEU,          (int)MULTI_ARG_2_SI_CMP },
18488
18489   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq",   IX86_BUILTIN_PCOMEQUQ,   EQ,           (int)MULTI_ARG_2_DI_CMP },
18490   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq",   IX86_BUILTIN_PCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
18491   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq",  IX86_BUILTIN_PCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
18492   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3,  "__builtin_ia32_pcomltuq",   IX86_BUILTIN_PCOMLTUQ,   LTU,          (int)MULTI_ARG_2_DI_CMP },
18493   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3,  "__builtin_ia32_pcomleuq",   IX86_BUILTIN_PCOMLEUQ,   LEU,          (int)MULTI_ARG_2_DI_CMP },
18494   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3,  "__builtin_ia32_pcomgtuq",   IX86_BUILTIN_PCOMGTUQ,   GTU,          (int)MULTI_ARG_2_DI_CMP },
18495   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3,  "__builtin_ia32_pcomgeuq",   IX86_BUILTIN_PCOMGEUQ,   GEU,          (int)MULTI_ARG_2_DI_CMP },
18496
18497   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3,       "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S,  (int)MULTI_ARG_2_SF_TF },
18498   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3,       "__builtin_ia32_comtruess",  IX86_BUILTIN_COMTRUESS,  COM_TRUE_S,   (int)MULTI_ARG_2_SF_TF },
18499   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3,       "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P,  (int)MULTI_ARG_2_SF_TF },
18500   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3,       "__builtin_ia32_comtrueps",  IX86_BUILTIN_COMTRUEPS,  COM_TRUE_P,   (int)MULTI_ARG_2_SF_TF },
18501   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3,       "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S,  (int)MULTI_ARG_2_DF_TF },
18502   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3,       "__builtin_ia32_comtruesd",  IX86_BUILTIN_COMTRUESD,  COM_TRUE_S,   (int)MULTI_ARG_2_DF_TF },
18503   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3,       "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P,  (int)MULTI_ARG_2_DF_TF },
18504   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3,       "__builtin_ia32_comtruepd",  IX86_BUILTIN_COMTRUEPD,  COM_TRUE_P,   (int)MULTI_ARG_2_DF_TF },
18505
18506   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3,     "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
18507   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3,      "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
18508   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3,      "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
18509   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3,      "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
18510   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3,     "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
18511   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3,      "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
18512   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3,      "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
18513   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3,      "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
18514
18515   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3,     "__builtin_ia32_pcomtrueb",  IX86_BUILTIN_PCOMTRUEB,  PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
18516   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3,      "__builtin_ia32_pcomtruew",  IX86_BUILTIN_PCOMTRUEW,  PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
18517   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3,      "__builtin_ia32_pcomtrued",  IX86_BUILTIN_PCOMTRUED,  PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
18518   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3,      "__builtin_ia32_pcomtrueq",  IX86_BUILTIN_PCOMTRUEQ,  PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
18519   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3,     "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
18520   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3,      "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
18521   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3,      "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
18522   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3,      "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
18523 };
18524
18525 /* Set up all the MMX/SSE builtins.  This is not called if TARGET_MMX
18526    is zero.  Otherwise, if TARGET_SSE is not set, only expand the MMX
18527    builtins.  */
18528 static void
18529 ix86_init_mmx_sse_builtins (void)
18530 {
18531   const struct builtin_description * d;
18532   size_t i;
18533
18534   tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
18535   tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
18536   tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
18537   tree V2DI_type_node
18538     = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
18539   tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
18540   tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
18541   tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
18542   tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
18543   tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
18544   tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
18545
18546   tree pchar_type_node = build_pointer_type (char_type_node);
18547   tree pcchar_type_node = build_pointer_type (
18548                              build_type_variant (char_type_node, 1, 0));
18549   tree pfloat_type_node = build_pointer_type (float_type_node);
18550   tree pcfloat_type_node = build_pointer_type (
18551                              build_type_variant (float_type_node, 1, 0));
18552   tree pv2si_type_node = build_pointer_type (V2SI_type_node);
18553   tree pv2di_type_node = build_pointer_type (V2DI_type_node);
18554   tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
18555
18556   /* Comparisons.  */
18557   tree int_ftype_v4sf_v4sf
18558     = build_function_type_list (integer_type_node,
18559                                 V4SF_type_node, V4SF_type_node, NULL_TREE);
18560   tree v4si_ftype_v4sf_v4sf
18561     = build_function_type_list (V4SI_type_node,
18562                                 V4SF_type_node, V4SF_type_node, NULL_TREE);
18563   /* MMX/SSE/integer conversions.  */
18564   tree int_ftype_v4sf
18565     = build_function_type_list (integer_type_node,
18566                                 V4SF_type_node, NULL_TREE);
18567   tree int64_ftype_v4sf
18568     = build_function_type_list (long_long_integer_type_node,
18569                                 V4SF_type_node, NULL_TREE);
18570   tree int_ftype_v8qi
18571     = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
18572   tree v4sf_ftype_v4sf_int
18573     = build_function_type_list (V4SF_type_node,
18574                                 V4SF_type_node, integer_type_node, NULL_TREE);
18575   tree v4sf_ftype_v4sf_int64
18576     = build_function_type_list (V4SF_type_node,
18577                                 V4SF_type_node, long_long_integer_type_node,
18578                                 NULL_TREE);
18579   tree v4sf_ftype_v4sf_v2si
18580     = build_function_type_list (V4SF_type_node,
18581                                 V4SF_type_node, V2SI_type_node, NULL_TREE);
18582
18583   /* Miscellaneous.  */
18584   tree v8qi_ftype_v4hi_v4hi
18585     = build_function_type_list (V8QI_type_node,
18586                                 V4HI_type_node, V4HI_type_node, NULL_TREE);
18587   tree v4hi_ftype_v2si_v2si
18588     = build_function_type_list (V4HI_type_node,
18589                                 V2SI_type_node, V2SI_type_node, NULL_TREE);
18590   tree v4sf_ftype_v4sf_v4sf_int
18591     = build_function_type_list (V4SF_type_node,
18592                                 V4SF_type_node, V4SF_type_node,
18593                                 integer_type_node, NULL_TREE);
18594   tree v2si_ftype_v4hi_v4hi
18595     = build_function_type_list (V2SI_type_node,
18596                                 V4HI_type_node, V4HI_type_node, NULL_TREE);
18597   tree v4hi_ftype_v4hi_int
18598     = build_function_type_list (V4HI_type_node,
18599                                 V4HI_type_node, integer_type_node, NULL_TREE);
18600   tree v4hi_ftype_v4hi_di
18601     = build_function_type_list (V4HI_type_node,
18602                                 V4HI_type_node, long_long_unsigned_type_node,
18603                                 NULL_TREE);
18604   tree v2si_ftype_v2si_di
18605     = build_function_type_list (V2SI_type_node,
18606                                 V2SI_type_node, long_long_unsigned_type_node,
18607                                 NULL_TREE);
18608   tree void_ftype_void
18609     = build_function_type (void_type_node, void_list_node);
18610   tree void_ftype_unsigned
18611     = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
18612   tree void_ftype_unsigned_unsigned
18613     = build_function_type_list (void_type_node, unsigned_type_node,
18614                                 unsigned_type_node, NULL_TREE);
18615   tree void_ftype_pcvoid_unsigned_unsigned
18616     = build_function_type_list (void_type_node, const_ptr_type_node,
18617                                 unsigned_type_node, unsigned_type_node,
18618                                 NULL_TREE);
18619   tree unsigned_ftype_void
18620     = build_function_type (unsigned_type_node, void_list_node);
18621   tree v2si_ftype_v4sf
18622     = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
18623   /* Loads/stores.  */
18624   tree void_ftype_v8qi_v8qi_pchar
18625     = build_function_type_list (void_type_node,
18626                                 V8QI_type_node, V8QI_type_node,
18627                                 pchar_type_node, NULL_TREE);
18628   tree v4sf_ftype_pcfloat
18629     = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
18630   /* @@@ the type is bogus */
18631   tree v4sf_ftype_v4sf_pv2si
18632     = build_function_type_list (V4SF_type_node,
18633                                 V4SF_type_node, pv2si_type_node, NULL_TREE);
18634   tree void_ftype_pv2si_v4sf
18635     = build_function_type_list (void_type_node,
18636                                 pv2si_type_node, V4SF_type_node, NULL_TREE);
18637   tree void_ftype_pfloat_v4sf
18638     = build_function_type_list (void_type_node,
18639                                 pfloat_type_node, V4SF_type_node, NULL_TREE);
18640   tree void_ftype_pdi_di
18641     = build_function_type_list (void_type_node,
18642                                 pdi_type_node, long_long_unsigned_type_node,
18643                                 NULL_TREE);
18644   tree void_ftype_pv2di_v2di
18645     = build_function_type_list (void_type_node,
18646                                 pv2di_type_node, V2DI_type_node, NULL_TREE);
18647   /* Normal vector unops.  */
18648   tree v4sf_ftype_v4sf
18649     = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
18650   tree v16qi_ftype_v16qi
18651     = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
18652   tree v8hi_ftype_v8hi
18653     = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
18654   tree v4si_ftype_v4si
18655     = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
18656   tree v8qi_ftype_v8qi
18657     = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
18658   tree v4hi_ftype_v4hi
18659     = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
18660
18661   /* Normal vector binops.  */
18662   tree v4sf_ftype_v4sf_v4sf
18663     = build_function_type_list (V4SF_type_node,
18664                                 V4SF_type_node, V4SF_type_node, NULL_TREE);
18665   tree v8qi_ftype_v8qi_v8qi
18666     = build_function_type_list (V8QI_type_node,
18667                                 V8QI_type_node, V8QI_type_node, NULL_TREE);
18668   tree v4hi_ftype_v4hi_v4hi
18669     = build_function_type_list (V4HI_type_node,
18670                                 V4HI_type_node, V4HI_type_node, NULL_TREE);
18671   tree v2si_ftype_v2si_v2si
18672     = build_function_type_list (V2SI_type_node,
18673                                 V2SI_type_node, V2SI_type_node, NULL_TREE);
18674   tree di_ftype_di_di
18675     = build_function_type_list (long_long_unsigned_type_node,
18676                                 long_long_unsigned_type_node,
18677                                 long_long_unsigned_type_node, NULL_TREE);
18678
18679   tree di_ftype_di_di_int
18680     = build_function_type_list (long_long_unsigned_type_node,
18681                                 long_long_unsigned_type_node,
18682                                 long_long_unsigned_type_node,
18683                                 integer_type_node, NULL_TREE);
18684
18685   tree v2si_ftype_v2sf
18686     = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
18687   tree v2sf_ftype_v2si
18688     = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
18689   tree v2si_ftype_v2si
18690     = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
18691   tree v2sf_ftype_v2sf
18692     = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
18693   tree v2sf_ftype_v2sf_v2sf
18694     = build_function_type_list (V2SF_type_node,
18695                                 V2SF_type_node, V2SF_type_node, NULL_TREE);
18696   tree v2si_ftype_v2sf_v2sf
18697     = build_function_type_list (V2SI_type_node,
18698                                 V2SF_type_node, V2SF_type_node, NULL_TREE);
18699   tree pint_type_node    = build_pointer_type (integer_type_node);
18700   tree pdouble_type_node = build_pointer_type (double_type_node);
18701   tree pcdouble_type_node = build_pointer_type (
18702                                 build_type_variant (double_type_node, 1, 0));
18703   tree int_ftype_v2df_v2df
18704     = build_function_type_list (integer_type_node,
18705                                 V2DF_type_node, V2DF_type_node, NULL_TREE);
18706
18707   tree void_ftype_pcvoid
18708     = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
18709   tree v4sf_ftype_v4si
18710     = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
18711   tree v4si_ftype_v4sf
18712     = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
18713   tree v2df_ftype_v4si
18714     = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
18715   tree v4si_ftype_v2df
18716     = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
18717   tree v4si_ftype_v2df_v2df
18718     = build_function_type_list (V4SI_type_node,
18719                                 V2DF_type_node, V2DF_type_node, NULL_TREE);
18720   tree v2si_ftype_v2df
18721     = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
18722   tree v4sf_ftype_v2df
18723     = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
18724   tree v2df_ftype_v2si
18725     = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
18726   tree v2df_ftype_v4sf
18727     = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
18728   tree int_ftype_v2df
18729     = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
18730   tree int64_ftype_v2df
18731     = build_function_type_list (long_long_integer_type_node,
18732                                 V2DF_type_node, NULL_TREE);
18733   tree v2df_ftype_v2df_int
18734     = build_function_type_list (V2DF_type_node,
18735                                 V2DF_type_node, integer_type_node, NULL_TREE);
18736   tree v2df_ftype_v2df_int64
18737     = build_function_type_list (V2DF_type_node,
18738                                 V2DF_type_node, long_long_integer_type_node,
18739                                 NULL_TREE);
18740   tree v4sf_ftype_v4sf_v2df
18741     = build_function_type_list (V4SF_type_node,
18742                                 V4SF_type_node, V2DF_type_node, NULL_TREE);
18743   tree v2df_ftype_v2df_v4sf
18744     = build_function_type_list (V2DF_type_node,
18745                                 V2DF_type_node, V4SF_type_node, NULL_TREE);
18746   tree v2df_ftype_v2df_v2df_int
18747     = build_function_type_list (V2DF_type_node,
18748                                 V2DF_type_node, V2DF_type_node,
18749                                 integer_type_node,
18750                                 NULL_TREE);
18751   tree v2df_ftype_v2df_pcdouble
18752     = build_function_type_list (V2DF_type_node,
18753                                 V2DF_type_node, pcdouble_type_node, NULL_TREE);
18754   tree void_ftype_pdouble_v2df
18755     = build_function_type_list (void_type_node,
18756                                 pdouble_type_node, V2DF_type_node, NULL_TREE);
18757   tree void_ftype_pint_int
18758     = build_function_type_list (void_type_node,
18759                                 pint_type_node, integer_type_node, NULL_TREE);
18760   tree void_ftype_v16qi_v16qi_pchar
18761     = build_function_type_list (void_type_node,
18762                                 V16QI_type_node, V16QI_type_node,
18763                                 pchar_type_node, NULL_TREE);
18764   tree v2df_ftype_pcdouble
18765     = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
18766   tree v2df_ftype_v2df_v2df
18767     = build_function_type_list (V2DF_type_node,
18768                                 V2DF_type_node, V2DF_type_node, NULL_TREE);
18769   tree v16qi_ftype_v16qi_v16qi
18770     = build_function_type_list (V16QI_type_node,
18771                                 V16QI_type_node, V16QI_type_node, NULL_TREE);
18772   tree v8hi_ftype_v8hi_v8hi
18773     = build_function_type_list (V8HI_type_node,
18774                                 V8HI_type_node, V8HI_type_node, NULL_TREE);
18775   tree v4si_ftype_v4si_v4si
18776     = build_function_type_list (V4SI_type_node,
18777                                 V4SI_type_node, V4SI_type_node, NULL_TREE);
18778   tree v2di_ftype_v2di_v2di
18779     = build_function_type_list (V2DI_type_node,
18780                                 V2DI_type_node, V2DI_type_node, NULL_TREE);
18781   tree v2di_ftype_v2df_v2df
18782     = build_function_type_list (V2DI_type_node,
18783                                 V2DF_type_node, V2DF_type_node, NULL_TREE);
18784   tree v2df_ftype_v2df
18785     = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
18786   tree v2di_ftype_v2di_int
18787     = build_function_type_list (V2DI_type_node,
18788                                 V2DI_type_node, integer_type_node, NULL_TREE);
18789   tree v2di_ftype_v2di_v2di_int
18790     = build_function_type_list (V2DI_type_node, V2DI_type_node,
18791                                 V2DI_type_node, integer_type_node, NULL_TREE);
18792   tree v4si_ftype_v4si_int
18793     = build_function_type_list (V4SI_type_node,
18794                                 V4SI_type_node, integer_type_node, NULL_TREE);
18795   tree v8hi_ftype_v8hi_int
18796     = build_function_type_list (V8HI_type_node,
18797                                 V8HI_type_node, integer_type_node, NULL_TREE);
18798   tree v4si_ftype_v8hi_v8hi
18799     = build_function_type_list (V4SI_type_node,
18800                                 V8HI_type_node, V8HI_type_node, NULL_TREE);
18801   tree di_ftype_v8qi_v8qi
18802     = build_function_type_list (long_long_unsigned_type_node,
18803                                 V8QI_type_node, V8QI_type_node, NULL_TREE);
18804   tree di_ftype_v2si_v2si
18805     = build_function_type_list (long_long_unsigned_type_node,
18806                                 V2SI_type_node, V2SI_type_node, NULL_TREE);
18807   tree v2di_ftype_v16qi_v16qi
18808     = build_function_type_list (V2DI_type_node,
18809                                 V16QI_type_node, V16QI_type_node, NULL_TREE);
18810   tree v2di_ftype_v4si_v4si
18811     = build_function_type_list (V2DI_type_node,
18812                                 V4SI_type_node, V4SI_type_node, NULL_TREE);
18813   tree int_ftype_v16qi
18814     = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
18815   tree v16qi_ftype_pcchar
18816     = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
18817   tree void_ftype_pchar_v16qi
18818     = build_function_type_list (void_type_node,
18819                                 pchar_type_node, V16QI_type_node, NULL_TREE);
18820
18821   tree v2di_ftype_v2di_unsigned_unsigned
18822     = build_function_type_list (V2DI_type_node, V2DI_type_node,
18823                                 unsigned_type_node, unsigned_type_node,
18824                                 NULL_TREE);
18825   tree v2di_ftype_v2di_v2di_unsigned_unsigned
18826     = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
18827                                 unsigned_type_node, unsigned_type_node,
18828                                 NULL_TREE);
18829   tree v2di_ftype_v2di_v16qi
18830     = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
18831                                 NULL_TREE);
18832   tree v2df_ftype_v2df_v2df_v2df
18833     = build_function_type_list (V2DF_type_node,
18834                                 V2DF_type_node, V2DF_type_node,
18835                                 V2DF_type_node, NULL_TREE);
18836   tree v4sf_ftype_v4sf_v4sf_v4sf
18837     = build_function_type_list (V4SF_type_node,
18838                                 V4SF_type_node, V4SF_type_node,
18839                                 V4SF_type_node, NULL_TREE);
18840   tree v8hi_ftype_v16qi
18841     = build_function_type_list (V8HI_type_node, V16QI_type_node,
18842                                 NULL_TREE);
18843   tree v4si_ftype_v16qi
18844     = build_function_type_list (V4SI_type_node, V16QI_type_node,
18845                                 NULL_TREE);
18846   tree v2di_ftype_v16qi
18847     = build_function_type_list (V2DI_type_node, V16QI_type_node,
18848                                 NULL_TREE);
18849   tree v4si_ftype_v8hi
18850     = build_function_type_list (V4SI_type_node, V8HI_type_node,
18851                                 NULL_TREE);
18852   tree v2di_ftype_v8hi
18853     = build_function_type_list (V2DI_type_node, V8HI_type_node,
18854                                 NULL_TREE);
18855   tree v2di_ftype_v4si
18856     = build_function_type_list (V2DI_type_node, V4SI_type_node,
18857                                 NULL_TREE);
18858   tree v2di_ftype_pv2di
18859     = build_function_type_list (V2DI_type_node, pv2di_type_node,
18860                                 NULL_TREE);
18861   tree v16qi_ftype_v16qi_v16qi_int
18862     = build_function_type_list (V16QI_type_node, V16QI_type_node,
18863                                 V16QI_type_node, integer_type_node,
18864                                 NULL_TREE);
18865   tree v16qi_ftype_v16qi_v16qi_v16qi
18866     = build_function_type_list (V16QI_type_node, V16QI_type_node,
18867                                 V16QI_type_node, V16QI_type_node,
18868                                 NULL_TREE);
18869   tree v8hi_ftype_v8hi_v8hi_int
18870     = build_function_type_list (V8HI_type_node, V8HI_type_node,
18871                                 V8HI_type_node, integer_type_node,
18872                                 NULL_TREE);
18873   tree v4si_ftype_v4si_v4si_int
18874     = build_function_type_list (V4SI_type_node, V4SI_type_node,
18875                                 V4SI_type_node, integer_type_node,
18876                                 NULL_TREE);
18877   tree int_ftype_v2di_v2di
18878     = build_function_type_list (integer_type_node,
18879                                 V2DI_type_node, V2DI_type_node,
18880                                 NULL_TREE);
18881   tree int_ftype_v16qi_int_v16qi_int_int
18882     = build_function_type_list (integer_type_node,
18883                                 V16QI_type_node,
18884                                 integer_type_node,
18885                                 V16QI_type_node,
18886                                 integer_type_node,
18887                                 integer_type_node,
18888                                 NULL_TREE);
18889   tree v16qi_ftype_v16qi_int_v16qi_int_int
18890     = build_function_type_list (V16QI_type_node,
18891                                 V16QI_type_node,
18892                                 integer_type_node,
18893                                 V16QI_type_node,
18894                                 integer_type_node,
18895                                 integer_type_node,
18896                                 NULL_TREE);
18897   tree int_ftype_v16qi_v16qi_int
18898     = build_function_type_list (integer_type_node,
18899                                 V16QI_type_node,
18900                                 V16QI_type_node,
18901                                 integer_type_node,
18902                                 NULL_TREE);
18903
18904   /* SSE5 instructions */
18905   tree v2di_ftype_v2di_v2di_v2di
18906     = build_function_type_list (V2DI_type_node,
18907                                 V2DI_type_node,
18908                                 V2DI_type_node,
18909                                 V2DI_type_node,
18910                                 NULL_TREE);
18911
18912   tree v4si_ftype_v4si_v4si_v4si
18913     = build_function_type_list (V4SI_type_node,
18914                                 V4SI_type_node,
18915                                 V4SI_type_node,
18916                                 V4SI_type_node,
18917                                 NULL_TREE);
18918
18919   tree v4si_ftype_v4si_v4si_v2di
18920     = build_function_type_list (V4SI_type_node,
18921                                 V4SI_type_node,
18922                                 V4SI_type_node,
18923                                 V2DI_type_node,
18924                                 NULL_TREE);
18925
18926   tree v8hi_ftype_v8hi_v8hi_v8hi
18927     = build_function_type_list (V8HI_type_node,
18928                                 V8HI_type_node,
18929                                 V8HI_type_node,
18930                                 V8HI_type_node,
18931                                 NULL_TREE);
18932
18933   tree v8hi_ftype_v8hi_v8hi_v4si
18934     = build_function_type_list (V8HI_type_node,
18935                                 V8HI_type_node,
18936                                 V8HI_type_node,
18937                                 V4SI_type_node,
18938                                 NULL_TREE);
18939
18940   tree v2df_ftype_v2df_v2df_v16qi
18941     = build_function_type_list (V2DF_type_node,
18942                                 V2DF_type_node,
18943                                 V2DF_type_node,
18944                                 V16QI_type_node,
18945                                 NULL_TREE);
18946
18947   tree v4sf_ftype_v4sf_v4sf_v16qi
18948     = build_function_type_list (V4SF_type_node,
18949                                 V4SF_type_node,
18950                                 V4SF_type_node,
18951                                 V16QI_type_node,
18952                                 NULL_TREE);
18953
18954   tree v2di_ftype_v2di_si
18955     = build_function_type_list (V2DI_type_node,
18956                                 V2DI_type_node,
18957                                 integer_type_node,
18958                                 NULL_TREE);
18959
18960   tree v4si_ftype_v4si_si
18961     = build_function_type_list (V4SI_type_node,
18962                                 V4SI_type_node,
18963                                 integer_type_node,
18964                                 NULL_TREE);
18965
18966   tree v8hi_ftype_v8hi_si
18967     = build_function_type_list (V8HI_type_node,
18968                                 V8HI_type_node,
18969                                 integer_type_node,
18970                                 NULL_TREE);
18971
18972   tree v16qi_ftype_v16qi_si
18973     = build_function_type_list (V16QI_type_node,
18974                                 V16QI_type_node,
18975                                 integer_type_node,
18976                                 NULL_TREE);
18977   tree v4sf_ftype_v4hi
18978     = build_function_type_list (V4SF_type_node,
18979                                 V4HI_type_node,
18980                                 NULL_TREE);
18981
18982   tree v4hi_ftype_v4sf
18983     = build_function_type_list (V4HI_type_node,
18984                                 V4SF_type_node,
18985                                 NULL_TREE);
18986
18987   tree v2di_ftype_v2di
18988     = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
18989
18990   tree ftype;
18991
18992   /* The __float80 type.  */
18993   if (TYPE_MODE (long_double_type_node) == XFmode)
18994     (*lang_hooks.types.register_builtin_type) (long_double_type_node,
18995                                                "__float80");
18996   else
18997     {
18998       /* The __float80 type.  */
18999       tree float80_type_node = make_node (REAL_TYPE);
19000
19001       TYPE_PRECISION (float80_type_node) = 80;
19002       layout_type (float80_type_node);
19003       (*lang_hooks.types.register_builtin_type) (float80_type_node,
19004                                                  "__float80");
19005     }
19006
19007   if (TARGET_64BIT)
19008     {
19009       tree float128_type_node = make_node (REAL_TYPE);
19010
19011       TYPE_PRECISION (float128_type_node) = 128;
19012       layout_type (float128_type_node);
19013       (*lang_hooks.types.register_builtin_type) (float128_type_node,
19014                                                  "__float128");
19015
19016       /* TFmode support builtins.  */
19017       ftype = build_function_type (float128_type_node,
19018                                    void_list_node);
19019       def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
19020
19021       ftype = build_function_type_list (float128_type_node,
19022                                         float128_type_node,
19023                                         NULL_TREE);
19024       def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
19025
19026       ftype = build_function_type_list (float128_type_node,
19027                                         float128_type_node,
19028                                         float128_type_node,
19029                                         NULL_TREE);
19030       def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
19031     }
19032
19033   /* Add all SSE builtins that are more or less simple operations on
19034      three operands.  */
19035   for (i = 0, d = bdesc_sse_3arg;
19036        i < ARRAY_SIZE (bdesc_sse_3arg);
19037        i++, d++)
19038     {
19039       /* Use one of the operands; the target can have a different mode for
19040          mask-generating compares.  */
19041       enum machine_mode mode;
19042       tree type;
19043
19044       if (d->name == 0)
19045         continue;
19046       mode = insn_data[d->icode].operand[1].mode;
19047
19048       switch (mode)
19049         {
19050         case V16QImode:
19051           type = v16qi_ftype_v16qi_v16qi_int;
19052           break;
19053         case V8HImode:
19054           type = v8hi_ftype_v8hi_v8hi_int;
19055           break;
19056         case V4SImode:
19057           type = v4si_ftype_v4si_v4si_int;
19058           break;
19059         case V2DImode:
19060           type = v2di_ftype_v2di_v2di_int;
19061           break;
19062         case V2DFmode:
19063           type = v2df_ftype_v2df_v2df_int;
19064           break;
19065         case V4SFmode:
19066           type = v4sf_ftype_v4sf_v4sf_int;
19067           break;
19068         default:
19069           gcc_unreachable ();
19070         }
19071
19072       /* Override for variable blends.  */
19073       switch (d->icode)
19074         {
19075         case CODE_FOR_sse4_1_blendvpd:
19076           type = v2df_ftype_v2df_v2df_v2df;
19077           break;
19078         case CODE_FOR_sse4_1_blendvps:
19079           type = v4sf_ftype_v4sf_v4sf_v4sf;
19080           break;
19081         case CODE_FOR_sse4_1_pblendvb:
19082           type = v16qi_ftype_v16qi_v16qi_v16qi;
19083           break;
19084         default:
19085           break;
19086         }
19087
19088       def_builtin_const (d->mask, d->name, type, d->code);
19089     }
19090
19091   /* Add all builtins that are more or less simple operations on two
19092      operands.  */
19093   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19094     {
19095       /* Use one of the operands; the target can have a different mode for
19096          mask-generating compares.  */
19097       enum machine_mode mode;
19098       tree type;
19099
19100       if (d->name == 0)
19101         continue;
19102       mode = insn_data[d->icode].operand[1].mode;
19103
19104       switch (mode)
19105         {
19106         case V16QImode:
19107           type = v16qi_ftype_v16qi_v16qi;
19108           break;
19109         case V8HImode:
19110           type = v8hi_ftype_v8hi_v8hi;
19111           break;
19112         case V4SImode:
19113           type = v4si_ftype_v4si_v4si;
19114           break;
19115         case V2DImode:
19116           type = v2di_ftype_v2di_v2di;
19117           break;
19118         case V2DFmode:
19119           type = v2df_ftype_v2df_v2df;
19120           break;
19121         case V4SFmode:
19122           type = v4sf_ftype_v4sf_v4sf;
19123           break;
19124         case V8QImode:
19125           type = v8qi_ftype_v8qi_v8qi;
19126           break;
19127         case V4HImode:
19128           type = v4hi_ftype_v4hi_v4hi;
19129           break;
19130         case V2SImode:
19131           type = v2si_ftype_v2si_v2si;
19132           break;
19133         case DImode:
19134           type = di_ftype_di_di;
19135           break;
19136
19137         default:
19138           gcc_unreachable ();
19139         }
19140
19141       /* Override for comparisons.  */
19142       if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19143           || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
19144         type = v4si_ftype_v4sf_v4sf;
19145
19146       if (d->icode == CODE_FOR_sse2_maskcmpv2df3
19147           || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19148         type = v2di_ftype_v2df_v2df;
19149
19150       if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
19151         type = v4si_ftype_v2df_v2df;
19152
19153       def_builtin_const (d->mask, d->name, type, d->code);
19154     }
19155
19156   /* Add all builtins that are more or less simple operations on 1 operand.  */
19157   for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19158     {
19159       enum machine_mode mode;
19160       tree type;
19161
19162       if (d->name == 0)
19163         continue;
19164       mode = insn_data[d->icode].operand[1].mode;
19165
19166       switch (mode)
19167         {
19168         case V16QImode:
19169           type = v16qi_ftype_v16qi;
19170           break;
19171         case V8HImode:
19172           type = v8hi_ftype_v8hi;
19173           break;
19174         case V4SImode:
19175           type = v4si_ftype_v4si;
19176           break;
19177         case V2DFmode:
19178           type = v2df_ftype_v2df;
19179           break;
19180         case V4SFmode:
19181           type = v4sf_ftype_v4sf;
19182           break;
19183         case V8QImode:
19184           type = v8qi_ftype_v8qi;
19185           break;
19186         case V4HImode:
19187           type = v4hi_ftype_v4hi;
19188           break;
19189         case V2SImode:
19190           type = v2si_ftype_v2si;
19191           break;
19192
19193         default:
19194           abort ();
19195         }
19196
19197       def_builtin_const (d->mask, d->name, type, d->code);
19198     }
19199
19200   /* pcmpestr[im] insns.  */
19201   for (i = 0, d = bdesc_pcmpestr;
19202        i < ARRAY_SIZE (bdesc_pcmpestr);
19203        i++, d++)
19204     {
19205       if (d->code == IX86_BUILTIN_PCMPESTRM128)
19206         ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
19207       else
19208         ftype = int_ftype_v16qi_int_v16qi_int_int;
19209       def_builtin_const (d->mask, d->name, ftype, d->code);
19210     }
19211
19212   /* pcmpistr[im] insns.  */
19213   for (i = 0, d = bdesc_pcmpistr;
19214        i < ARRAY_SIZE (bdesc_pcmpistr);
19215        i++, d++)
19216     {
19217       if (d->code == IX86_BUILTIN_PCMPISTRM128)
19218         ftype = v16qi_ftype_v16qi_v16qi_int;
19219       else
19220         ftype = int_ftype_v16qi_v16qi_int;
19221       def_builtin_const (d->mask, d->name, ftype, d->code);
19222     }
19223
19224   /* Add the remaining MMX insns with somewhat more complicated types.  */
19225   def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
19226   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
19227   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
19228   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
19229
19230   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
19231   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
19232   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
19233
19234   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
19235   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
19236
19237   def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
19238   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
19239
19240   /* comi/ucomi insns.  */
19241   for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19242     if (d->mask == OPTION_MASK_ISA_SSE2)
19243       def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
19244     else
19245       def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
19246
19247   /* ptest insns.  */
19248   for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19249     def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
19250
19251   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
19252   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
19253   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
19254
19255   def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
19256   def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
19257   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
19258   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
19259   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
19260   def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
19261   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
19262   def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
19263   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
19264   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
19265   def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
19266
19267   def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
19268
19269   def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
19270   def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
19271
19272   def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
19273   def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
19274   def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
19275   def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
19276
19277   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
19278   def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
19279   def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
19280   def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
19281
19282   def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
19283
19284   def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
19285
19286   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
19287   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
19288   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
19289   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps_nr", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS_NR);
19290   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
19291   ftype = build_function_type_list (float_type_node,
19292                                     float_type_node,
19293                                     NULL_TREE);
19294   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF);
19295   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
19296   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps_nr", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS_NR);
19297   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
19298
19299   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
19300
19301   /* Original 3DNow!  */
19302   def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
19303   def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
19304   def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
19305   def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
19306   def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
19307   def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
19308   def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
19309   def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
19310   def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
19311   def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
19312   def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
19313   def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
19314   def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
19315   def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
19316   def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
19317   def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
19318   def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
19319   def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
19320   def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
19321   def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
19322
19323   /* 3DNow! extension as used in the Athlon CPU.  */
19324   def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
19325   def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
19326   def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
19327   def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
19328   def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
19329   def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
19330
19331   /* SSE2 */
19332   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
19333
19334   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
19335   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
19336
19337   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
19338   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
19339
19340   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
19341   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
19342   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
19343   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
19344   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
19345
19346   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
19347   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
19348   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
19349   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
19350
19351   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
19352   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
19353
19354   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
19355
19356   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
19357   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
19358
19359   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
19360   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
19361   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
19362   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
19363   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
19364
19365   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
19366
19367   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
19368   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
19369   def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
19370   def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
19371
19372   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
19373   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
19374   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
19375
19376   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
19377   def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
19378   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
19379   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
19380
19381   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
19382   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
19383   x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
19384
19385   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
19386   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
19387
19388   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
19389   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
19390
19391   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
19392   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
19393   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
19394   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
19395   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
19396   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
19397   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
19398
19399   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
19400   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
19401   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
19402   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
19403   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
19404   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
19405   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
19406
19407   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
19408   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
19409   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
19410   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
19411
19412   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
19413
19414   /* Prescott New Instructions.  */
19415   def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
19416   def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
19417   def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
19418
19419   /* SSSE3.  */
19420   def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
19421   def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
19422
19423   /* SSE4.1. */
19424   def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
19425   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
19426   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
19427   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
19428   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
19429   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
19430   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
19431   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
19432   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
19433   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
19434   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
19435   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
19436   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
19437   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
19438
19439   /* SSE4.1 and SSE5 */
19440   def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
19441   def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
19442   def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
19443   def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
19444
19445   /* SSE4.2. */
19446   ftype = build_function_type_list (unsigned_type_node,
19447                                     unsigned_type_node,
19448                                     unsigned_char_type_node,
19449                                     NULL_TREE);
19450   def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
19451   ftype = build_function_type_list (unsigned_type_node,
19452                                     unsigned_type_node,
19453                                     short_unsigned_type_node,
19454                                     NULL_TREE);
19455   def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
19456   ftype = build_function_type_list (unsigned_type_node,
19457                                     unsigned_type_node,
19458                                     unsigned_type_node,
19459                                     NULL_TREE);
19460   def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
19461   ftype = build_function_type_list (long_long_unsigned_type_node,
19462                                     long_long_unsigned_type_node,
19463                                     long_long_unsigned_type_node,
19464                                     NULL_TREE);
19465   def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
19466
19467   /* AMDFAM10 SSE4A New built-ins  */
19468   def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
19469   def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
19470   def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
19471   def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi,  IX86_BUILTIN_EXTRQ);
19472   def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
19473   def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
19474
19475   /* Access to the vec_init patterns.  */
19476   ftype = build_function_type_list (V2SI_type_node, integer_type_node,
19477                                     integer_type_node, NULL_TREE);
19478   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
19479
19480   ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
19481                                     short_integer_type_node,
19482                                     short_integer_type_node,
19483                                     short_integer_type_node, NULL_TREE);
19484   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
19485
19486   ftype = build_function_type_list (V8QI_type_node, char_type_node,
19487                                     char_type_node, char_type_node,
19488                                     char_type_node, char_type_node,
19489                                     char_type_node, char_type_node,
19490                                     char_type_node, NULL_TREE);
19491   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
19492
19493   /* Access to the vec_extract patterns.  */
19494   ftype = build_function_type_list (double_type_node, V2DF_type_node,
19495                                     integer_type_node, NULL_TREE);
19496   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
19497
19498   ftype = build_function_type_list (long_long_integer_type_node,
19499                                     V2DI_type_node, integer_type_node,
19500                                     NULL_TREE);
19501   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
19502
19503   ftype = build_function_type_list (float_type_node, V4SF_type_node,
19504                                     integer_type_node, NULL_TREE);
19505   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
19506
19507   ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
19508                                     integer_type_node, NULL_TREE);
19509   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
19510
19511   ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
19512                                     integer_type_node, NULL_TREE);
19513   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
19514
19515   ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
19516                                     integer_type_node, NULL_TREE);
19517   def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
19518
19519   ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
19520                                     integer_type_node, NULL_TREE);
19521   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
19522
19523   ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
19524                                     integer_type_node, NULL_TREE);
19525   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
19526
19527   /* Access to the vec_set patterns.  */
19528   ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
19529                                     intDI_type_node,
19530                                     integer_type_node, NULL_TREE);
19531   def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
19532
19533   ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
19534                                     float_type_node,
19535                                     integer_type_node, NULL_TREE);
19536   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
19537
19538   ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
19539                                     intSI_type_node,
19540                                     integer_type_node, NULL_TREE);
19541   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
19542
19543   ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
19544                                     intHI_type_node,
19545                                     integer_type_node, NULL_TREE);
19546   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
19547
19548   ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
19549                                     intHI_type_node,
19550                                     integer_type_node, NULL_TREE);
19551   def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
19552
19553   ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
19554                                     intQI_type_node,
19555                                     integer_type_node, NULL_TREE);
19556   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
19557
19558   /* Add SSE5 multi-arg argument instructions */
19559   for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
19560     {
19561       tree mtype = NULL_TREE;
19562
19563       if (d->name == 0)
19564         continue;
19565
19566       switch ((enum multi_arg_type)d->flag)
19567         {
19568         case MULTI_ARG_3_SF:     mtype = v4sf_ftype_v4sf_v4sf_v4sf;     break;
19569         case MULTI_ARG_3_DF:     mtype = v2df_ftype_v2df_v2df_v2df;     break;
19570         case MULTI_ARG_3_DI:     mtype = v2di_ftype_v2di_v2di_v2di;     break;
19571         case MULTI_ARG_3_SI:     mtype = v4si_ftype_v4si_v4si_v4si;     break;
19572         case MULTI_ARG_3_SI_DI:  mtype = v4si_ftype_v4si_v4si_v2di;     break;
19573         case MULTI_ARG_3_HI:     mtype = v8hi_ftype_v8hi_v8hi_v8hi;     break;
19574         case MULTI_ARG_3_HI_SI:  mtype = v8hi_ftype_v8hi_v8hi_v4si;     break;
19575         case MULTI_ARG_3_QI:     mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
19576         case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi;    break;
19577         case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi;    break;
19578         case MULTI_ARG_2_SF:     mtype = v4sf_ftype_v4sf_v4sf;          break;
19579         case MULTI_ARG_2_DF:     mtype = v2df_ftype_v2df_v2df;          break;
19580         case MULTI_ARG_2_DI:     mtype = v2di_ftype_v2di_v2di;          break;
19581         case MULTI_ARG_2_SI:     mtype = v4si_ftype_v4si_v4si;          break;
19582         case MULTI_ARG_2_HI:     mtype = v8hi_ftype_v8hi_v8hi;          break;
19583         case MULTI_ARG_2_QI:     mtype = v16qi_ftype_v16qi_v16qi;       break;
19584         case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si;            break;
19585         case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si;            break;
19586         case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si;            break;
19587         case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si;          break;
19588         case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf;          break;
19589         case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df;          break;
19590         case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di;          break;
19591         case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si;          break;
19592         case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi;          break;
19593         case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi;       break;
19594         case MULTI_ARG_2_SF_TF:  mtype = v4sf_ftype_v4sf_v4sf;          break;
19595         case MULTI_ARG_2_DF_TF:  mtype = v2df_ftype_v2df_v2df;          break;
19596         case MULTI_ARG_2_DI_TF:  mtype = v2di_ftype_v2di_v2di;          break;
19597         case MULTI_ARG_2_SI_TF:  mtype = v4si_ftype_v4si_v4si;          break;
19598         case MULTI_ARG_2_HI_TF:  mtype = v8hi_ftype_v8hi_v8hi;          break;
19599         case MULTI_ARG_2_QI_TF:  mtype = v16qi_ftype_v16qi_v16qi;       break;
19600         case MULTI_ARG_1_SF:     mtype = v4sf_ftype_v4sf;               break;
19601         case MULTI_ARG_1_DF:     mtype = v2df_ftype_v2df;               break;
19602         case MULTI_ARG_1_DI:     mtype = v2di_ftype_v2di;               break;
19603         case MULTI_ARG_1_SI:     mtype = v4si_ftype_v4si;               break;
19604         case MULTI_ARG_1_HI:     mtype = v8hi_ftype_v8hi;               break;
19605         case MULTI_ARG_1_QI:     mtype = v16qi_ftype_v16qi;             break;
19606         case MULTI_ARG_1_SI_DI:  mtype = v2di_ftype_v4si;               break;
19607         case MULTI_ARG_1_HI_DI:  mtype = v2di_ftype_v8hi;               break;
19608         case MULTI_ARG_1_HI_SI:  mtype = v4si_ftype_v8hi;               break;
19609         case MULTI_ARG_1_QI_DI:  mtype = v2di_ftype_v16qi;              break;
19610         case MULTI_ARG_1_QI_SI:  mtype = v4si_ftype_v16qi;              break;
19611         case MULTI_ARG_1_QI_HI:  mtype = v8hi_ftype_v16qi;              break;
19612         case MULTI_ARG_1_PH2PS:  mtype = v4sf_ftype_v4hi;               break;
19613         case MULTI_ARG_1_PS2PH:  mtype = v4hi_ftype_v4sf;               break;
19614         case MULTI_ARG_UNKNOWN:
19615         default:
19616           gcc_unreachable ();
19617         }
19618
19619       if (mtype)
19620         def_builtin_const (d->mask, d->name, mtype, d->code);
19621     }
19622 }
19623
19624 static void
19625 ix86_init_builtins (void)
19626 {
19627   if (TARGET_MMX)
19628     ix86_init_mmx_sse_builtins ();
19629 }
19630
19631 /* Errors in the source file can cause expand_expr to return const0_rtx
19632    where we expect a vector.  To avoid crashing, use one of the vector
19633    clear instructions.  */
19634 static rtx
19635 safe_vector_operand (rtx x, enum machine_mode mode)
19636 {
19637   if (x == const0_rtx)
19638     x = CONST0_RTX (mode);
19639   return x;
19640 }
19641
19642 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
19643    4 operands. The third argument must be a constant smaller than 8
19644    bits or xmm0.  */
19645
19646 static rtx
19647 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
19648                                     rtx target)
19649 {
19650   rtx pat;
19651   tree arg0 = CALL_EXPR_ARG (exp, 0);
19652   tree arg1 = CALL_EXPR_ARG (exp, 1);
19653   tree arg2 = CALL_EXPR_ARG (exp, 2);
19654   rtx op0 = expand_normal (arg0);
19655   rtx op1 = expand_normal (arg1);
19656   rtx op2 = expand_normal (arg2);
19657   enum machine_mode tmode = insn_data[icode].operand[0].mode;
19658   enum machine_mode mode1 = insn_data[icode].operand[1].mode;
19659   enum machine_mode mode2 = insn_data[icode].operand[2].mode;
19660   enum machine_mode mode3 = insn_data[icode].operand[3].mode;
19661
19662   if (VECTOR_MODE_P (mode1))
19663     op0 = safe_vector_operand (op0, mode1);
19664   if (VECTOR_MODE_P (mode2))
19665     op1 = safe_vector_operand (op1, mode2);
19666   if (VECTOR_MODE_P (mode3))
19667     op2 = safe_vector_operand (op2, mode3);
19668
19669   if (optimize
19670       || target == 0
19671       || GET_MODE (target) != tmode
19672       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19673     target = gen_reg_rtx (tmode);
19674
19675   if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19676     op0 = copy_to_mode_reg (mode1, op0);
19677   if ((optimize && !register_operand (op1, mode2))
19678       || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
19679     op1 = copy_to_mode_reg (mode2, op1);
19680
19681   if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19682     switch (icode)
19683       {
19684       case CODE_FOR_sse4_1_blendvpd:
19685       case CODE_FOR_sse4_1_blendvps:
19686       case CODE_FOR_sse4_1_pblendvb:
19687         op2 = copy_to_mode_reg (mode3, op2);
19688         break;
19689
19690       case CODE_FOR_sse4_1_roundsd:
19691       case CODE_FOR_sse4_1_roundss:
19692         error ("the third argument must be a 4-bit immediate");
19693         return const0_rtx;
19694
19695       default:
19696         error ("the third argument must be an 8-bit immediate");
19697         return const0_rtx;
19698       }
19699
19700   pat = GEN_FCN (icode) (target, op0, op1, op2);
19701   if (! pat)
19702     return 0;
19703   emit_insn (pat);
19704   return target;
19705 }
19706
19707 /* Subroutine of ix86_expand_builtin to take care of crc32 insns.  */
19708
19709 static rtx
19710 ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
19711 {
19712   rtx pat;
19713   tree arg0 = CALL_EXPR_ARG (exp, 0);
19714   tree arg1 = CALL_EXPR_ARG (exp, 1);
19715   rtx op0 = expand_normal (arg0);
19716   rtx op1 = expand_normal (arg1);
19717   enum machine_mode tmode = insn_data[icode].operand[0].mode;
19718   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19719   enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19720
19721   if (optimize
19722       || !target
19723       || GET_MODE (target) != tmode
19724       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19725     target = gen_reg_rtx (tmode);
19726
19727   if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
19728     op0 = copy_to_mode_reg (mode0, op0);
19729   if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
19730     {
19731       op1 = copy_to_reg (op1);
19732       op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
19733     }
19734
19735   pat = GEN_FCN (icode) (target, op0, op1);
19736   if (! pat)
19737     return 0;
19738   emit_insn (pat);
19739   return target;
19740 }
19741
19742 /* Subroutine of ix86_expand_builtin to take care of binop insns.  */
19743
19744 static rtx
19745 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
19746 {
19747   rtx pat, xops[3];
19748   tree arg0 = CALL_EXPR_ARG (exp, 0);
19749   tree arg1 = CALL_EXPR_ARG (exp, 1);
19750   rtx op0 = expand_normal (arg0);
19751   rtx op1 = expand_normal (arg1);
19752   enum machine_mode tmode = insn_data[icode].operand[0].mode;
19753   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19754   enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19755
19756   if (VECTOR_MODE_P (mode0))
19757     op0 = safe_vector_operand (op0, mode0);
19758   if (VECTOR_MODE_P (mode1))
19759     op1 = safe_vector_operand (op1, mode1);
19760
19761   if (optimize || !target
19762       || GET_MODE (target) != tmode
19763       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19764     target = gen_reg_rtx (tmode);
19765
19766   if (GET_MODE (op1) == SImode && mode1 == TImode)
19767     {
19768       rtx x = gen_reg_rtx (V4SImode);
19769       emit_insn (gen_sse2_loadd (x, op1));
19770       op1 = gen_lowpart (TImode, x);
19771     }
19772
19773   if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
19774     op0 = copy_to_mode_reg (mode0, op0);
19775   if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
19776     op1 = copy_to_mode_reg (mode1, op1);
19777
19778   /* ??? Using ix86_fixup_binary_operands is problematic when
19779      we've got mismatched modes.  Fake it.  */
19780
19781   xops[0] = target;
19782   xops[1] = op0;
19783   xops[2] = op1;
19784
19785   if (tmode == mode0 && tmode == mode1)
19786     {
19787       target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
19788       op0 = xops[1];
19789       op1 = xops[2];
19790     }
19791   else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
19792     {
19793       op0 = force_reg (mode0, op0);
19794       op1 = force_reg (mode1, op1);
19795       target = gen_reg_rtx (tmode);
19796     }
19797
19798   pat = GEN_FCN (icode) (target, op0, op1);
19799   if (! pat)
19800     return 0;
19801   emit_insn (pat);
19802   return target;
19803 }
19804
19805 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns.  */
19806
19807 static rtx
19808 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
19809                                enum multi_arg_type m_type,
19810                                enum insn_code sub_code)
19811 {
19812   rtx pat;
19813   int i;
19814   int nargs;
19815   bool comparison_p = false;
19816   bool tf_p = false;
19817   bool last_arg_constant = false;
19818   int num_memory = 0;
19819   struct {
19820     rtx op;
19821     enum machine_mode mode;
19822   } args[4];
19823
19824   enum machine_mode tmode = insn_data[icode].operand[0].mode;
19825
19826   switch (m_type)
19827     {
19828     case MULTI_ARG_3_SF:
19829     case MULTI_ARG_3_DF:
19830     case MULTI_ARG_3_DI:
19831     case MULTI_ARG_3_SI:
19832     case MULTI_ARG_3_SI_DI:
19833     case MULTI_ARG_3_HI:
19834     case MULTI_ARG_3_HI_SI:
19835     case MULTI_ARG_3_QI:
19836     case MULTI_ARG_3_PERMPS:
19837     case MULTI_ARG_3_PERMPD:
19838       nargs = 3;
19839       break;
19840
19841     case MULTI_ARG_2_SF:
19842     case MULTI_ARG_2_DF:
19843     case MULTI_ARG_2_DI:
19844     case MULTI_ARG_2_SI:
19845     case MULTI_ARG_2_HI:
19846     case MULTI_ARG_2_QI:
19847       nargs = 2;
19848       break;
19849
19850     case MULTI_ARG_2_DI_IMM:
19851     case MULTI_ARG_2_SI_IMM:
19852     case MULTI_ARG_2_HI_IMM:
19853     case MULTI_ARG_2_QI_IMM:
19854       nargs = 2;
19855       last_arg_constant = true;
19856       break;
19857
19858     case MULTI_ARG_1_SF:
19859     case MULTI_ARG_1_DF:
19860     case MULTI_ARG_1_DI:
19861     case MULTI_ARG_1_SI:
19862     case MULTI_ARG_1_HI:
19863     case MULTI_ARG_1_QI:
19864     case MULTI_ARG_1_SI_DI:
19865     case MULTI_ARG_1_HI_DI:
19866     case MULTI_ARG_1_HI_SI:
19867     case MULTI_ARG_1_QI_DI:
19868     case MULTI_ARG_1_QI_SI:
19869     case MULTI_ARG_1_QI_HI:
19870     case MULTI_ARG_1_PH2PS:
19871     case MULTI_ARG_1_PS2PH:
19872       nargs = 1;
19873       break;
19874
19875     case MULTI_ARG_2_SF_CMP:
19876     case MULTI_ARG_2_DF_CMP:
19877     case MULTI_ARG_2_DI_CMP:
19878     case MULTI_ARG_2_SI_CMP:
19879     case MULTI_ARG_2_HI_CMP:
19880     case MULTI_ARG_2_QI_CMP:
19881       nargs = 2;
19882       comparison_p = true;
19883       break;
19884
19885     case MULTI_ARG_2_SF_TF:
19886     case MULTI_ARG_2_DF_TF:
19887     case MULTI_ARG_2_DI_TF:
19888     case MULTI_ARG_2_SI_TF:
19889     case MULTI_ARG_2_HI_TF:
19890     case MULTI_ARG_2_QI_TF:
19891       nargs = 2;
19892       tf_p = true;
19893       break;
19894
19895     case MULTI_ARG_UNKNOWN:
19896     default:
19897       gcc_unreachable ();
19898     }
19899
19900   if (optimize || !target
19901       || GET_MODE (target) != tmode
19902       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19903     target = gen_reg_rtx (tmode);
19904
19905   gcc_assert (nargs <= 4);
19906
19907   for (i = 0; i < nargs; i++)
19908     {
19909       tree arg = CALL_EXPR_ARG (exp, i);
19910       rtx op = expand_normal (arg);
19911       int adjust = (comparison_p) ? 1 : 0;
19912       enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
19913
19914       if (last_arg_constant && i == nargs-1)
19915         {
19916           if (GET_CODE (op) != CONST_INT)
19917             {
19918               error ("last argument must be an immediate");
19919               return gen_reg_rtx (tmode);
19920             }
19921         }
19922       else
19923         {
19924           if (VECTOR_MODE_P (mode))
19925             op = safe_vector_operand (op, mode);
19926
19927           /* If we aren't optimizing, only allow one memory operand to be
19928              generated.  */
19929           if (memory_operand (op, mode))
19930             num_memory++;
19931
19932           gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
19933
19934           if (optimize
19935               || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
19936               || num_memory > 1)
19937             op = force_reg (mode, op);
19938         }
19939
19940       args[i].op = op;
19941       args[i].mode = mode;
19942     }
19943
19944   switch (nargs)
19945     {
19946     case 1:
19947       pat = GEN_FCN (icode) (target, args[0].op);
19948       break;
19949
19950     case 2:
19951       if (tf_p)
19952         pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
19953                                GEN_INT ((int)sub_code));
19954       else if (! comparison_p)
19955         pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
19956       else
19957         {
19958           rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
19959                                        args[0].op,
19960                                        args[1].op);
19961
19962           pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
19963         }
19964       break;
19965
19966     case 3:
19967       pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
19968       break;
19969
19970     default:
19971       gcc_unreachable ();
19972     }
19973
19974   if (! pat)
19975     return 0;
19976
19977   emit_insn (pat);
19978   return target;
19979 }
19980
19981 /* Subroutine of ix86_expand_builtin to take care of stores.  */
19982
19983 static rtx
19984 ix86_expand_store_builtin (enum insn_code icode, tree exp)
19985 {
19986   rtx pat;
19987   tree arg0 = CALL_EXPR_ARG (exp, 0);
19988   tree arg1 = CALL_EXPR_ARG (exp, 1);
19989   rtx op0 = expand_normal (arg0);
19990   rtx op1 = expand_normal (arg1);
19991   enum machine_mode mode0 = insn_data[icode].operand[0].mode;
19992   enum machine_mode mode1 = insn_data[icode].operand[1].mode;
19993
19994   if (VECTOR_MODE_P (mode1))
19995     op1 = safe_vector_operand (op1, mode1);
19996
19997   op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19998   op1 = copy_to_mode_reg (mode1, op1);
19999
20000   pat = GEN_FCN (icode) (op0, op1);
20001   if (pat)
20002     emit_insn (pat);
20003   return 0;
20004 }
20005
20006 /* Subroutine of ix86_expand_builtin to take care of unop insns.  */
20007
20008 static rtx
20009 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
20010                           rtx target, int do_load)
20011 {
20012   rtx pat;
20013   tree arg0 = CALL_EXPR_ARG (exp, 0);
20014   rtx op0 = expand_normal (arg0);
20015   enum machine_mode tmode = insn_data[icode].operand[0].mode;
20016   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20017
20018   if (optimize || !target
20019       || GET_MODE (target) != tmode
20020       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20021     target = gen_reg_rtx (tmode);
20022   if (do_load)
20023     op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20024   else
20025     {
20026       if (VECTOR_MODE_P (mode0))
20027         op0 = safe_vector_operand (op0, mode0);
20028
20029       if ((optimize && !register_operand (op0, mode0))
20030           || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20031         op0 = copy_to_mode_reg (mode0, op0);
20032     }
20033
20034   switch (icode)
20035     {
20036     case CODE_FOR_sse4_1_roundpd:
20037     case CODE_FOR_sse4_1_roundps:
20038         {
20039           tree arg1 = CALL_EXPR_ARG (exp, 1);
20040           rtx op1 = expand_normal (arg1);
20041           enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20042
20043           if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20044             {
20045               error ("the second argument must be a 4-bit immediate");
20046               return const0_rtx;
20047             }
20048           pat = GEN_FCN (icode) (target, op0, op1);
20049         }
20050       break;
20051     default:
20052       pat = GEN_FCN (icode) (target, op0);
20053       break;
20054     }
20055
20056   if (! pat)
20057     return 0;
20058   emit_insn (pat);
20059   return target;
20060 }
20061
20062 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
20063    sqrtss, rsqrtss, rcpss.  */
20064
20065 static rtx
20066 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
20067 {
20068   rtx pat;
20069   tree arg0 = CALL_EXPR_ARG (exp, 0);
20070   rtx op1, op0 = expand_normal (arg0);
20071   enum machine_mode tmode = insn_data[icode].operand[0].mode;
20072   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20073
20074   if (optimize || !target
20075       || GET_MODE (target) != tmode
20076       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20077     target = gen_reg_rtx (tmode);
20078
20079   if (VECTOR_MODE_P (mode0))
20080     op0 = safe_vector_operand (op0, mode0);
20081
20082   if ((optimize && !register_operand (op0, mode0))
20083       || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20084     op0 = copy_to_mode_reg (mode0, op0);
20085
20086   op1 = op0;
20087   if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
20088     op1 = copy_to_mode_reg (mode0, op1);
20089
20090   pat = GEN_FCN (icode) (target, op0, op1);
20091   if (! pat)
20092     return 0;
20093   emit_insn (pat);
20094   return target;
20095 }
20096
20097 /* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
20098
20099 static rtx
20100 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
20101                          rtx target)
20102 {
20103   rtx pat;
20104   tree arg0 = CALL_EXPR_ARG (exp, 0);
20105   tree arg1 = CALL_EXPR_ARG (exp, 1);
20106   rtx op0 = expand_normal (arg0);
20107   rtx op1 = expand_normal (arg1);
20108   rtx op2;
20109   enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
20110   enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
20111   enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
20112   enum rtx_code comparison = d->comparison;
20113
20114   if (VECTOR_MODE_P (mode0))
20115     op0 = safe_vector_operand (op0, mode0);
20116   if (VECTOR_MODE_P (mode1))
20117     op1 = safe_vector_operand (op1, mode1);
20118
20119   /* Swap operands if we have a comparison that isn't available in
20120      hardware.  */
20121   if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20122     {
20123       rtx tmp = gen_reg_rtx (mode1);
20124       emit_move_insn (tmp, op1);
20125       op1 = op0;
20126       op0 = tmp;
20127     }
20128
20129   if (optimize || !target
20130       || GET_MODE (target) != tmode
20131       || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
20132     target = gen_reg_rtx (tmode);
20133
20134   if ((optimize && !register_operand (op0, mode0))
20135       || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
20136     op0 = copy_to_mode_reg (mode0, op0);
20137   if ((optimize && !register_operand (op1, mode1))
20138       || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
20139     op1 = copy_to_mode_reg (mode1, op1);
20140
20141   op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
20142   pat = GEN_FCN (d->icode) (target, op0, op1, op2);
20143   if (! pat)
20144     return 0;
20145   emit_insn (pat);
20146   return target;
20147 }
20148
20149 /* Subroutine of ix86_expand_builtin to take care of comi insns.  */
20150
20151 static rtx
20152 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
20153                       rtx target)
20154 {
20155   rtx pat;
20156   tree arg0 = CALL_EXPR_ARG (exp, 0);
20157   tree arg1 = CALL_EXPR_ARG (exp, 1);
20158   rtx op0 = expand_normal (arg0);
20159   rtx op1 = expand_normal (arg1);
20160   enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20161   enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20162   enum rtx_code comparison = d->comparison;
20163
20164   if (VECTOR_MODE_P (mode0))
20165     op0 = safe_vector_operand (op0, mode0);
20166   if (VECTOR_MODE_P (mode1))
20167     op1 = safe_vector_operand (op1, mode1);
20168
20169   /* Swap operands if we have a comparison that isn't available in
20170      hardware.  */
20171   if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20172     {
20173       rtx tmp = op1;
20174       op1 = op0;
20175       op0 = tmp;
20176     }
20177
20178   target = gen_reg_rtx (SImode);
20179   emit_move_insn (target, const0_rtx);
20180   target = gen_rtx_SUBREG (QImode, target, 0);
20181
20182   if ((optimize && !register_operand (op0, mode0))
20183       || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20184     op0 = copy_to_mode_reg (mode0, op0);
20185   if ((optimize && !register_operand (op1, mode1))
20186       || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20187     op1 = copy_to_mode_reg (mode1, op1);
20188
20189   pat = GEN_FCN (d->icode) (op0, op1);
20190   if (! pat)
20191     return 0;
20192   emit_insn (pat);
20193   emit_insn (gen_rtx_SET (VOIDmode,
20194                           gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20195                           gen_rtx_fmt_ee (comparison, QImode,
20196                                           SET_DEST (pat),
20197                                           const0_rtx)));
20198
20199   return SUBREG_REG (target);
20200 }
20201
20202 /* Subroutine of ix86_expand_builtin to take care of ptest insns.  */
20203
20204 static rtx
20205 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
20206                        rtx target)
20207 {
20208   rtx pat;
20209   tree arg0 = CALL_EXPR_ARG (exp, 0);
20210   tree arg1 = CALL_EXPR_ARG (exp, 1);
20211   rtx op0 = expand_normal (arg0);
20212   rtx op1 = expand_normal (arg1);
20213   enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20214   enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20215   enum rtx_code comparison = d->comparison;
20216
20217   if (VECTOR_MODE_P (mode0))
20218     op0 = safe_vector_operand (op0, mode0);
20219   if (VECTOR_MODE_P (mode1))
20220     op1 = safe_vector_operand (op1, mode1);
20221
20222   target = gen_reg_rtx (SImode);
20223   emit_move_insn (target, const0_rtx);
20224   target = gen_rtx_SUBREG (QImode, target, 0);
20225
20226   if ((optimize && !register_operand (op0, mode0))
20227       || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20228     op0 = copy_to_mode_reg (mode0, op0);
20229   if ((optimize && !register_operand (op1, mode1))
20230       || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20231     op1 = copy_to_mode_reg (mode1, op1);
20232
20233   pat = GEN_FCN (d->icode) (op0, op1);
20234   if (! pat)
20235     return 0;
20236   emit_insn (pat);
20237   emit_insn (gen_rtx_SET (VOIDmode,
20238                           gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20239                           gen_rtx_fmt_ee (comparison, QImode,
20240                                           SET_DEST (pat),
20241                                           const0_rtx)));
20242
20243   return SUBREG_REG (target);
20244 }
20245
20246 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns.  */
20247
20248 static rtx
20249 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
20250                           tree exp, rtx target)
20251 {
20252   rtx pat;
20253   tree arg0 = CALL_EXPR_ARG (exp, 0);
20254   tree arg1 = CALL_EXPR_ARG (exp, 1);
20255   tree arg2 = CALL_EXPR_ARG (exp, 2);
20256   tree arg3 = CALL_EXPR_ARG (exp, 3);
20257   tree arg4 = CALL_EXPR_ARG (exp, 4);
20258   rtx scratch0, scratch1;
20259   rtx op0 = expand_normal (arg0);
20260   rtx op1 = expand_normal (arg1);
20261   rtx op2 = expand_normal (arg2);
20262   rtx op3 = expand_normal (arg3);
20263   rtx op4 = expand_normal (arg4);
20264   enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
20265
20266   tmode0 = insn_data[d->icode].operand[0].mode;
20267   tmode1 = insn_data[d->icode].operand[1].mode;
20268   modev2 = insn_data[d->icode].operand[2].mode;
20269   modei3 = insn_data[d->icode].operand[3].mode;
20270   modev4 = insn_data[d->icode].operand[4].mode;
20271   modei5 = insn_data[d->icode].operand[5].mode;
20272   modeimm = insn_data[d->icode].operand[6].mode;
20273
20274   if (VECTOR_MODE_P (modev2))
20275     op0 = safe_vector_operand (op0, modev2);
20276   if (VECTOR_MODE_P (modev4))
20277     op2 = safe_vector_operand (op2, modev4);
20278
20279   if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20280     op0 = copy_to_mode_reg (modev2, op0);
20281   if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
20282     op1 = copy_to_mode_reg (modei3, op1);
20283   if ((optimize && !register_operand (op2, modev4))
20284       || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
20285     op2 = copy_to_mode_reg (modev4, op2);
20286   if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
20287     op3 = copy_to_mode_reg (modei5, op3);
20288
20289   if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
20290     {
20291       error ("the fifth argument must be a 8-bit immediate");
20292       return const0_rtx;
20293     }
20294
20295   if (d->code == IX86_BUILTIN_PCMPESTRI128)
20296     {
20297       if (optimize || !target
20298           || GET_MODE (target) != tmode0
20299           || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20300         target = gen_reg_rtx (tmode0);
20301
20302       scratch1 = gen_reg_rtx (tmode1);
20303
20304       pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
20305     }
20306   else if (d->code == IX86_BUILTIN_PCMPESTRM128)
20307     {
20308       if (optimize || !target
20309           || GET_MODE (target) != tmode1
20310           || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20311         target = gen_reg_rtx (tmode1);
20312
20313       scratch0 = gen_reg_rtx (tmode0);
20314
20315       pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
20316     }
20317   else
20318     {
20319       gcc_assert (d->flag);
20320
20321       scratch0 = gen_reg_rtx (tmode0);
20322       scratch1 = gen_reg_rtx (tmode1);
20323
20324       pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
20325     }
20326
20327   if (! pat)
20328     return 0;
20329
20330   emit_insn (pat);
20331
20332   if (d->flag)
20333     {
20334       target = gen_reg_rtx (SImode);
20335       emit_move_insn (target, const0_rtx);
20336       target = gen_rtx_SUBREG (QImode, target, 0);
20337
20338       emit_insn
20339         (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20340                       gen_rtx_fmt_ee (EQ, QImode,
20341                                       gen_rtx_REG ((enum machine_mode) d->flag,
20342                                                    FLAGS_REG),
20343                                       const0_rtx)));
20344       return SUBREG_REG (target);
20345     }
20346   else
20347     return target;
20348 }
20349
20350
20351 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns.  */
20352
20353 static rtx
20354 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
20355                           tree exp, rtx target)
20356 {
20357   rtx pat;
20358   tree arg0 = CALL_EXPR_ARG (exp, 0);
20359   tree arg1 = CALL_EXPR_ARG (exp, 1);
20360   tree arg2 = CALL_EXPR_ARG (exp, 2);
20361   rtx scratch0, scratch1;
20362   rtx op0 = expand_normal (arg0);
20363   rtx op1 = expand_normal (arg1);
20364   rtx op2 = expand_normal (arg2);
20365   enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
20366
20367   tmode0 = insn_data[d->icode].operand[0].mode;
20368   tmode1 = insn_data[d->icode].operand[1].mode;
20369   modev2 = insn_data[d->icode].operand[2].mode;
20370   modev3 = insn_data[d->icode].operand[3].mode;
20371   modeimm = insn_data[d->icode].operand[4].mode;
20372
20373   if (VECTOR_MODE_P (modev2))
20374     op0 = safe_vector_operand (op0, modev2);
20375   if (VECTOR_MODE_P (modev3))
20376     op1 = safe_vector_operand (op1, modev3);
20377
20378   if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20379     op0 = copy_to_mode_reg (modev2, op0);
20380   if ((optimize && !register_operand (op1, modev3))
20381       || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
20382     op1 = copy_to_mode_reg (modev3, op1);
20383
20384   if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
20385     {
20386       error ("the third argument must be a 8-bit immediate");
20387       return const0_rtx;
20388     }
20389
20390   if (d->code == IX86_BUILTIN_PCMPISTRI128)
20391     {
20392       if (optimize || !target
20393           || GET_MODE (target) != tmode0
20394           || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20395         target = gen_reg_rtx (tmode0);
20396
20397       scratch1 = gen_reg_rtx (tmode1);
20398
20399       pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
20400     }
20401   else if (d->code == IX86_BUILTIN_PCMPISTRM128)
20402     {
20403       if (optimize || !target
20404           || GET_MODE (target) != tmode1
20405           || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20406         target = gen_reg_rtx (tmode1);
20407
20408       scratch0 = gen_reg_rtx (tmode0);
20409
20410       pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
20411     }
20412   else
20413     {
20414       gcc_assert (d->flag);
20415
20416       scratch0 = gen_reg_rtx (tmode0);
20417       scratch1 = gen_reg_rtx (tmode1);
20418
20419       pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
20420     }
20421
20422   if (! pat)
20423     return 0;
20424
20425   emit_insn (pat);
20426
20427   if (d->flag)
20428     {
20429       target = gen_reg_rtx (SImode);
20430       emit_move_insn (target, const0_rtx);
20431       target = gen_rtx_SUBREG (QImode, target, 0);
20432
20433       emit_insn
20434         (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20435                       gen_rtx_fmt_ee (EQ, QImode,
20436                                       gen_rtx_REG ((enum machine_mode) d->flag,
20437                                                    FLAGS_REG),
20438                                       const0_rtx)));
20439       return SUBREG_REG (target);
20440     }
20441   else
20442     return target;
20443 }
20444
20445 /* Return the integer constant in ARG.  Constrain it to be in the range
20446    of the subparts of VEC_TYPE; issue an error if not.  */
20447
20448 static int
20449 get_element_number (tree vec_type, tree arg)
20450 {
20451   unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
20452
20453   if (!host_integerp (arg, 1)
20454       || (elt = tree_low_cst (arg, 1), elt > max))
20455     {
20456       error ("selector must be an integer constant in the range 0..%wi", max);
20457       return 0;
20458     }
20459
20460   return elt;
20461 }
20462
20463 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
20464    ix86_expand_vector_init.  We DO have language-level syntax for this, in
20465    the form of  (type){ init-list }.  Except that since we can't place emms
20466    instructions from inside the compiler, we can't allow the use of MMX
20467    registers unless the user explicitly asks for it.  So we do *not* define
20468    vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md.  Instead
20469    we have builtins invoked by mmintrin.h that gives us license to emit
20470    these sorts of instructions.  */
20471
20472 static rtx
20473 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
20474 {
20475   enum machine_mode tmode = TYPE_MODE (type);
20476   enum machine_mode inner_mode = GET_MODE_INNER (tmode);
20477   int i, n_elt = GET_MODE_NUNITS (tmode);
20478   rtvec v = rtvec_alloc (n_elt);
20479
20480   gcc_assert (VECTOR_MODE_P (tmode));
20481   gcc_assert (call_expr_nargs (exp) == n_elt);
20482
20483   for (i = 0; i < n_elt; ++i)
20484     {
20485       rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
20486       RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
20487     }
20488
20489   if (!target || !register_operand (target, tmode))
20490     target = gen_reg_rtx (tmode);
20491
20492   ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
20493   return target;
20494 }
20495
20496 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
20497    ix86_expand_vector_extract.  They would be redundant (for non-MMX) if we
20498    had a language-level syntax for referencing vector elements.  */
20499
20500 static rtx
20501 ix86_expand_vec_ext_builtin (tree exp, rtx target)
20502 {
20503   enum machine_mode tmode, mode0;
20504   tree arg0, arg1;
20505   int elt;
20506   rtx op0;
20507
20508   arg0 = CALL_EXPR_ARG (exp, 0);
20509   arg1 = CALL_EXPR_ARG (exp, 1);
20510
20511   op0 = expand_normal (arg0);
20512   elt = get_element_number (TREE_TYPE (arg0), arg1);
20513
20514   tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20515   mode0 = TYPE_MODE (TREE_TYPE (arg0));
20516   gcc_assert (VECTOR_MODE_P (mode0));
20517
20518   op0 = force_reg (mode0, op0);
20519
20520   if (optimize || !target || !register_operand (target, tmode))
20521     target = gen_reg_rtx (tmode);
20522
20523   ix86_expand_vector_extract (true, target, op0, elt);
20524
20525   return target;
20526 }
20527
20528 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
20529    ix86_expand_vector_set.  They would be redundant (for non-MMX) if we had
20530    a language-level syntax for referencing vector elements.  */
20531
20532 static rtx
20533 ix86_expand_vec_set_builtin (tree exp)
20534 {
20535   enum machine_mode tmode, mode1;
20536   tree arg0, arg1, arg2;
20537   int elt;
20538   rtx op0, op1, target;
20539
20540   arg0 = CALL_EXPR_ARG (exp, 0);
20541   arg1 = CALL_EXPR_ARG (exp, 1);
20542   arg2 = CALL_EXPR_ARG (exp, 2);
20543
20544   tmode = TYPE_MODE (TREE_TYPE (arg0));
20545   mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20546   gcc_assert (VECTOR_MODE_P (tmode));
20547
20548   op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
20549   op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
20550   elt = get_element_number (TREE_TYPE (arg0), arg2);
20551
20552   if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
20553     op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
20554
20555   op0 = force_reg (tmode, op0);
20556   op1 = force_reg (mode1, op1);
20557
20558   /* OP0 is the source of these builtin functions and shouldn't be
20559      modified.  Create a copy, use it and return it as target.  */
20560   target = gen_reg_rtx (tmode);
20561   emit_move_insn (target, op0);
20562   ix86_expand_vector_set (true, target, op1, elt);
20563
20564   return target;
20565 }
20566
20567 /* Expand an expression EXP that calls a built-in function,
20568    with result going to TARGET if that's convenient
20569    (and in mode MODE if that's convenient).
20570    SUBTARGET may be used as the target for computing one of EXP's operands.
20571    IGNORE is nonzero if the value is to be ignored.  */
20572
20573 static rtx
20574 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
20575                      enum machine_mode mode ATTRIBUTE_UNUSED,
20576                      int ignore ATTRIBUTE_UNUSED)
20577 {
20578   const struct builtin_description *d;
20579   size_t i;
20580   enum insn_code icode;
20581   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20582   tree arg0, arg1, arg2, arg3;
20583   rtx op0, op1, op2, op3, pat;
20584   enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
20585   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
20586
20587   switch (fcode)
20588     {
20589     case IX86_BUILTIN_EMMS:
20590       emit_insn (gen_mmx_emms ());
20591       return 0;
20592
20593     case IX86_BUILTIN_SFENCE:
20594       emit_insn (gen_sse_sfence ());
20595       return 0;
20596
20597     case IX86_BUILTIN_MASKMOVQ:
20598     case IX86_BUILTIN_MASKMOVDQU:
20599       icode = (fcode == IX86_BUILTIN_MASKMOVQ
20600                ? CODE_FOR_mmx_maskmovq
20601                : CODE_FOR_sse2_maskmovdqu);
20602       /* Note the arg order is different from the operand order.  */
20603       arg1 = CALL_EXPR_ARG (exp, 0);
20604       arg2 = CALL_EXPR_ARG (exp, 1);
20605       arg0 = CALL_EXPR_ARG (exp, 2);
20606       op0 = expand_normal (arg0);
20607       op1 = expand_normal (arg1);
20608       op2 = expand_normal (arg2);
20609       mode0 = insn_data[icode].operand[0].mode;
20610       mode1 = insn_data[icode].operand[1].mode;
20611       mode2 = insn_data[icode].operand[2].mode;
20612
20613       op0 = force_reg (Pmode, op0);
20614       op0 = gen_rtx_MEM (mode1, op0);
20615
20616       if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
20617         op0 = copy_to_mode_reg (mode0, op0);
20618       if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
20619         op1 = copy_to_mode_reg (mode1, op1);
20620       if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
20621         op2 = copy_to_mode_reg (mode2, op2);
20622       pat = GEN_FCN (icode) (op0, op1, op2);
20623       if (! pat)
20624         return 0;
20625       emit_insn (pat);
20626       return 0;
20627
20628     case IX86_BUILTIN_RSQRTF:
20629       return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2, exp, target);
20630
20631     case IX86_BUILTIN_SQRTSS:
20632       return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
20633     case IX86_BUILTIN_RSQRTSS:
20634       return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
20635     case IX86_BUILTIN_RCPSS:
20636       return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
20637
20638     case IX86_BUILTIN_LOADUPS:
20639       return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
20640
20641     case IX86_BUILTIN_STOREUPS:
20642       return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
20643
20644     case IX86_BUILTIN_LOADHPS:
20645     case IX86_BUILTIN_LOADLPS:
20646     case IX86_BUILTIN_LOADHPD:
20647     case IX86_BUILTIN_LOADLPD:
20648       icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
20649                : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
20650                : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
20651                : CODE_FOR_sse2_loadlpd);
20652       arg0 = CALL_EXPR_ARG (exp, 0);
20653       arg1 = CALL_EXPR_ARG (exp, 1);
20654       op0 = expand_normal (arg0);
20655       op1 = expand_normal (arg1);
20656       tmode = insn_data[icode].operand[0].mode;
20657       mode0 = insn_data[icode].operand[1].mode;
20658       mode1 = insn_data[icode].operand[2].mode;
20659
20660       op0 = force_reg (mode0, op0);
20661       op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
20662       if (optimize || target == 0
20663           || GET_MODE (target) != tmode
20664           || !register_operand (target, tmode))
20665         target = gen_reg_rtx (tmode);
20666       pat = GEN_FCN (icode) (target, op0, op1);
20667       if (! pat)
20668         return 0;
20669       emit_insn (pat);
20670       return target;
20671
20672     case IX86_BUILTIN_STOREHPS:
20673     case IX86_BUILTIN_STORELPS:
20674       icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
20675                : CODE_FOR_sse_storelps);
20676       arg0 = CALL_EXPR_ARG (exp, 0);
20677       arg1 = CALL_EXPR_ARG (exp, 1);
20678       op0 = expand_normal (arg0);
20679       op1 = expand_normal (arg1);
20680       mode0 = insn_data[icode].operand[0].mode;
20681       mode1 = insn_data[icode].operand[1].mode;
20682
20683       op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20684       op1 = force_reg (mode1, op1);
20685
20686       pat = GEN_FCN (icode) (op0, op1);
20687       if (! pat)
20688         return 0;
20689       emit_insn (pat);
20690       return const0_rtx;
20691
20692     case IX86_BUILTIN_MOVNTPS:
20693       return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
20694     case IX86_BUILTIN_MOVNTQ:
20695       return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
20696
20697     case IX86_BUILTIN_LDMXCSR:
20698       op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
20699       target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
20700       emit_move_insn (target, op0);
20701       emit_insn (gen_sse_ldmxcsr (target));
20702       return 0;
20703
20704     case IX86_BUILTIN_STMXCSR:
20705       target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
20706       emit_insn (gen_sse_stmxcsr (target));
20707       return copy_to_mode_reg (SImode, target);
20708
20709     case IX86_BUILTIN_SHUFPS:
20710     case IX86_BUILTIN_SHUFPD:
20711       icode = (fcode == IX86_BUILTIN_SHUFPS
20712                ? CODE_FOR_sse_shufps
20713                : CODE_FOR_sse2_shufpd);
20714       arg0 = CALL_EXPR_ARG (exp, 0);
20715       arg1 = CALL_EXPR_ARG (exp, 1);
20716       arg2 = CALL_EXPR_ARG (exp, 2);
20717       op0 = expand_normal (arg0);
20718       op1 = expand_normal (arg1);
20719       op2 = expand_normal (arg2);
20720       tmode = insn_data[icode].operand[0].mode;
20721       mode0 = insn_data[icode].operand[1].mode;
20722       mode1 = insn_data[icode].operand[2].mode;
20723       mode2 = insn_data[icode].operand[3].mode;
20724
20725       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20726         op0 = copy_to_mode_reg (mode0, op0);
20727       if ((optimize && !register_operand (op1, mode1))
20728           || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
20729         op1 = copy_to_mode_reg (mode1, op1);
20730       if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20731         {
20732           /* @@@ better error message */
20733           error ("mask must be an immediate");
20734           return gen_reg_rtx (tmode);
20735         }
20736       if (optimize || target == 0
20737           || GET_MODE (target) != tmode
20738           || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20739         target = gen_reg_rtx (tmode);
20740       pat = GEN_FCN (icode) (target, op0, op1, op2);
20741       if (! pat)
20742         return 0;
20743       emit_insn (pat);
20744       return target;
20745
20746     case IX86_BUILTIN_PSHUFW:
20747     case IX86_BUILTIN_PSHUFD:
20748     case IX86_BUILTIN_PSHUFHW:
20749     case IX86_BUILTIN_PSHUFLW:
20750       icode = (  fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
20751                : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
20752                : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
20753                : CODE_FOR_mmx_pshufw);
20754       arg0 = CALL_EXPR_ARG (exp, 0);
20755       arg1 = CALL_EXPR_ARG (exp, 1);
20756       op0 = expand_normal (arg0);
20757       op1 = expand_normal (arg1);
20758       tmode = insn_data[icode].operand[0].mode;
20759       mode1 = insn_data[icode].operand[1].mode;
20760       mode2 = insn_data[icode].operand[2].mode;
20761
20762       if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20763         op0 = copy_to_mode_reg (mode1, op0);
20764       if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20765         {
20766           /* @@@ better error message */
20767           error ("mask must be an immediate");
20768           return const0_rtx;
20769         }
20770       if (target == 0
20771           || GET_MODE (target) != tmode
20772           || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20773         target = gen_reg_rtx (tmode);
20774       pat = GEN_FCN (icode) (target, op0, op1);
20775       if (! pat)
20776         return 0;
20777       emit_insn (pat);
20778       return target;
20779
20780     case IX86_BUILTIN_PSLLW128:
20781     case IX86_BUILTIN_PSLLWI128:
20782       icode = CODE_FOR_ashlv8hi3;
20783       goto do_pshift;
20784     case IX86_BUILTIN_PSLLD128:
20785     case IX86_BUILTIN_PSLLDI128:
20786       icode = CODE_FOR_ashlv4si3;
20787       goto do_pshift;
20788     case IX86_BUILTIN_PSLLQ128:
20789     case IX86_BUILTIN_PSLLQI128:
20790       icode = CODE_FOR_ashlv2di3;
20791       goto do_pshift;
20792     case IX86_BUILTIN_PSRAW128:
20793     case IX86_BUILTIN_PSRAWI128:
20794       icode = CODE_FOR_ashrv8hi3;
20795       goto do_pshift;
20796     case IX86_BUILTIN_PSRAD128:
20797     case IX86_BUILTIN_PSRADI128:
20798       icode = CODE_FOR_ashrv4si3;
20799       goto do_pshift;
20800     case IX86_BUILTIN_PSRLW128:
20801     case IX86_BUILTIN_PSRLWI128:
20802       icode = CODE_FOR_lshrv8hi3;
20803       goto do_pshift;
20804     case IX86_BUILTIN_PSRLD128:
20805     case IX86_BUILTIN_PSRLDI128:
20806       icode = CODE_FOR_lshrv4si3;
20807       goto do_pshift;
20808     case IX86_BUILTIN_PSRLQ128:
20809     case IX86_BUILTIN_PSRLQI128:
20810       icode = CODE_FOR_lshrv2di3;
20811
20812     do_pshift:
20813       arg0 = CALL_EXPR_ARG (exp, 0);
20814       arg1 = CALL_EXPR_ARG (exp, 1);
20815       op0 = expand_normal (arg0);
20816       op1 = expand_normal (arg1);
20817
20818       tmode = insn_data[icode].operand[0].mode;
20819       mode1 = insn_data[icode].operand[1].mode;
20820
20821       if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20822         op0 = copy_to_reg (op0);
20823
20824       if (!CONST_INT_P (op1))
20825         op1 = simplify_gen_subreg (SImode, op1, GET_MODE (op1), 0);
20826
20827       if (! (*insn_data[icode].operand[2].predicate) (op1, SImode))
20828         op1 = copy_to_reg (op1);
20829
20830       target = gen_reg_rtx (tmode);
20831       pat = GEN_FCN (icode) (target, op0, op1);
20832       if (!pat)
20833         return 0;
20834       emit_insn (pat);
20835       return target;
20836
20837     case IX86_BUILTIN_PSLLDQI128:
20838     case IX86_BUILTIN_PSRLDQI128:
20839       icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
20840                : CODE_FOR_sse2_lshrti3);
20841       arg0 = CALL_EXPR_ARG (exp, 0);
20842       arg1 = CALL_EXPR_ARG (exp, 1);
20843       op0 = expand_normal (arg0);
20844       op1 = expand_normal (arg1);
20845       tmode = insn_data[icode].operand[0].mode;
20846       mode1 = insn_data[icode].operand[1].mode;
20847       mode2 = insn_data[icode].operand[2].mode;
20848
20849       if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20850         {
20851           op0 = copy_to_reg (op0);
20852           op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
20853         }
20854       if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20855         {
20856           error ("shift must be an immediate");
20857           return const0_rtx;
20858         }
20859       target = gen_reg_rtx (V2DImode);
20860       pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
20861                              op0, op1);
20862       if (! pat)
20863         return 0;
20864       emit_insn (pat);
20865       return target;
20866
20867     case IX86_BUILTIN_FEMMS:
20868       emit_insn (gen_mmx_femms ());
20869       return NULL_RTX;
20870
20871     case IX86_BUILTIN_PAVGUSB:
20872       return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
20873
20874     case IX86_BUILTIN_PF2ID:
20875       return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
20876
20877     case IX86_BUILTIN_PFACC:
20878       return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
20879
20880     case IX86_BUILTIN_PFADD:
20881      return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
20882
20883     case IX86_BUILTIN_PFCMPEQ:
20884       return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
20885
20886     case IX86_BUILTIN_PFCMPGE:
20887       return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
20888
20889     case IX86_BUILTIN_PFCMPGT:
20890       return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
20891
20892     case IX86_BUILTIN_PFMAX:
20893       return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
20894
20895     case IX86_BUILTIN_PFMIN:
20896       return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
20897
20898     case IX86_BUILTIN_PFMUL:
20899       return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
20900
20901     case IX86_BUILTIN_PFRCP:
20902       return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
20903
20904     case IX86_BUILTIN_PFRCPIT1:
20905       return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
20906
20907     case IX86_BUILTIN_PFRCPIT2:
20908       return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
20909
20910     case IX86_BUILTIN_PFRSQIT1:
20911       return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
20912
20913     case IX86_BUILTIN_PFRSQRT:
20914       return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
20915
20916     case IX86_BUILTIN_PFSUB:
20917       return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
20918
20919     case IX86_BUILTIN_PFSUBR:
20920       return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
20921
20922     case IX86_BUILTIN_PI2FD:
20923       return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
20924
20925     case IX86_BUILTIN_PMULHRW:
20926       return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
20927
20928     case IX86_BUILTIN_PF2IW:
20929       return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
20930
20931     case IX86_BUILTIN_PFNACC:
20932       return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
20933
20934     case IX86_BUILTIN_PFPNACC:
20935       return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
20936
20937     case IX86_BUILTIN_PI2FW:
20938       return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
20939
20940     case IX86_BUILTIN_PSWAPDSI:
20941       return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
20942
20943     case IX86_BUILTIN_PSWAPDSF:
20944       return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
20945
20946     case IX86_BUILTIN_SQRTSD:
20947       return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
20948     case IX86_BUILTIN_LOADUPD:
20949       return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
20950     case IX86_BUILTIN_STOREUPD:
20951       return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
20952
20953     case IX86_BUILTIN_MFENCE:
20954         emit_insn (gen_sse2_mfence ());
20955         return 0;
20956     case IX86_BUILTIN_LFENCE:
20957         emit_insn (gen_sse2_lfence ());
20958         return 0;
20959
20960     case IX86_BUILTIN_CLFLUSH:
20961         arg0 = CALL_EXPR_ARG (exp, 0);
20962         op0 = expand_normal (arg0);
20963         icode = CODE_FOR_sse2_clflush;
20964         if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
20965             op0 = copy_to_mode_reg (Pmode, op0);
20966
20967         emit_insn (gen_sse2_clflush (op0));
20968         return 0;
20969
20970     case IX86_BUILTIN_MOVNTPD:
20971       return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
20972     case IX86_BUILTIN_MOVNTDQ:
20973       return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
20974     case IX86_BUILTIN_MOVNTI:
20975       return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
20976
20977     case IX86_BUILTIN_LOADDQU:
20978       return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
20979     case IX86_BUILTIN_STOREDQU:
20980       return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
20981
20982     case IX86_BUILTIN_MONITOR:
20983       arg0 = CALL_EXPR_ARG (exp, 0);
20984       arg1 = CALL_EXPR_ARG (exp, 1);
20985       arg2 = CALL_EXPR_ARG (exp, 2);
20986       op0 = expand_normal (arg0);
20987       op1 = expand_normal (arg1);
20988       op2 = expand_normal (arg2);
20989       if (!REG_P (op0))
20990         op0 = copy_to_mode_reg (Pmode, op0);
20991       if (!REG_P (op1))
20992         op1 = copy_to_mode_reg (SImode, op1);
20993       if (!REG_P (op2))
20994         op2 = copy_to_mode_reg (SImode, op2);
20995       if (!TARGET_64BIT)
20996         emit_insn (gen_sse3_monitor (op0, op1, op2));
20997       else
20998         emit_insn (gen_sse3_monitor64 (op0, op1, op2));
20999       return 0;
21000
21001     case IX86_BUILTIN_MWAIT:
21002       arg0 = CALL_EXPR_ARG (exp, 0);
21003       arg1 = CALL_EXPR_ARG (exp, 1);
21004       op0 = expand_normal (arg0);
21005       op1 = expand_normal (arg1);
21006       if (!REG_P (op0))
21007         op0 = copy_to_mode_reg (SImode, op0);
21008       if (!REG_P (op1))
21009         op1 = copy_to_mode_reg (SImode, op1);
21010       emit_insn (gen_sse3_mwait (op0, op1));
21011       return 0;
21012
21013     case IX86_BUILTIN_LDDQU:
21014       return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
21015                                        target, 1);
21016
21017     case IX86_BUILTIN_PALIGNR:
21018     case IX86_BUILTIN_PALIGNR128:
21019       if (fcode == IX86_BUILTIN_PALIGNR)
21020         {
21021           icode = CODE_FOR_ssse3_palignrdi;
21022           mode = DImode;
21023         }
21024       else
21025         {
21026           icode = CODE_FOR_ssse3_palignrti;
21027           mode = V2DImode;
21028         }
21029       arg0 = CALL_EXPR_ARG (exp, 0);
21030       arg1 = CALL_EXPR_ARG (exp, 1);
21031       arg2 = CALL_EXPR_ARG (exp, 2);
21032       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21033       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21034       op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21035       tmode = insn_data[icode].operand[0].mode;
21036       mode1 = insn_data[icode].operand[1].mode;
21037       mode2 = insn_data[icode].operand[2].mode;
21038       mode3 = insn_data[icode].operand[3].mode;
21039
21040       if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21041         {
21042           op0 = copy_to_reg (op0);
21043           op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
21044         }
21045       if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21046         {
21047           op1 = copy_to_reg (op1);
21048           op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
21049         }
21050       if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21051         {
21052           error ("shift must be an immediate");
21053           return const0_rtx;
21054         }
21055       target = gen_reg_rtx (mode);
21056       pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
21057                              op0, op1, op2);
21058       if (! pat)
21059         return 0;
21060       emit_insn (pat);
21061       return target;
21062
21063     case IX86_BUILTIN_MOVNTDQA:
21064       return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
21065                                        target, 1);
21066
21067     case IX86_BUILTIN_MOVNTSD:
21068       return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
21069
21070     case IX86_BUILTIN_MOVNTSS:
21071       return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
21072
21073     case IX86_BUILTIN_INSERTQ:
21074     case IX86_BUILTIN_EXTRQ:
21075       icode = (fcode == IX86_BUILTIN_EXTRQ
21076                ? CODE_FOR_sse4a_extrq
21077                : CODE_FOR_sse4a_insertq);
21078       arg0 = CALL_EXPR_ARG (exp, 0);
21079       arg1 = CALL_EXPR_ARG (exp, 1);
21080       op0 = expand_normal (arg0);
21081       op1 = expand_normal (arg1);
21082       tmode = insn_data[icode].operand[0].mode;
21083       mode1 = insn_data[icode].operand[1].mode;
21084       mode2 = insn_data[icode].operand[2].mode;
21085       if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21086         op0 = copy_to_mode_reg (mode1, op0);
21087       if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21088         op1 = copy_to_mode_reg (mode2, op1);
21089       if (optimize || target == 0
21090           || GET_MODE (target) != tmode
21091           || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21092         target = gen_reg_rtx (tmode);
21093       pat = GEN_FCN (icode) (target, op0, op1);
21094       if (! pat)
21095         return NULL_RTX;
21096       emit_insn (pat);
21097       return target;
21098
21099     case IX86_BUILTIN_EXTRQI:
21100       icode = CODE_FOR_sse4a_extrqi;
21101       arg0 = CALL_EXPR_ARG (exp, 0);
21102       arg1 = CALL_EXPR_ARG (exp, 1);
21103       arg2 = CALL_EXPR_ARG (exp, 2);
21104       op0 = expand_normal (arg0);
21105       op1 = expand_normal (arg1);
21106       op2 = expand_normal (arg2);
21107       tmode = insn_data[icode].operand[0].mode;
21108       mode1 = insn_data[icode].operand[1].mode;
21109       mode2 = insn_data[icode].operand[2].mode;
21110       mode3 = insn_data[icode].operand[3].mode;
21111       if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21112         op0 = copy_to_mode_reg (mode1, op0);
21113       if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21114         {
21115           error ("index mask must be an immediate");
21116           return gen_reg_rtx (tmode);
21117         }
21118       if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21119         {
21120           error ("length mask must be an immediate");
21121           return gen_reg_rtx (tmode);
21122         }
21123       if (optimize || target == 0
21124           || GET_MODE (target) != tmode
21125           || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21126         target = gen_reg_rtx (tmode);
21127       pat = GEN_FCN (icode) (target, op0, op1, op2);
21128       if (! pat)
21129         return NULL_RTX;
21130       emit_insn (pat);
21131       return target;
21132
21133     case IX86_BUILTIN_INSERTQI:
21134       icode = CODE_FOR_sse4a_insertqi;
21135       arg0 = CALL_EXPR_ARG (exp, 0);
21136       arg1 = CALL_EXPR_ARG (exp, 1);
21137       arg2 = CALL_EXPR_ARG (exp, 2);
21138       arg3 = CALL_EXPR_ARG (exp, 3);
21139       op0 = expand_normal (arg0);
21140       op1 = expand_normal (arg1);
21141       op2 = expand_normal (arg2);
21142       op3 = expand_normal (arg3);
21143       tmode = insn_data[icode].operand[0].mode;
21144       mode1 = insn_data[icode].operand[1].mode;
21145       mode2 = insn_data[icode].operand[2].mode;
21146       mode3 = insn_data[icode].operand[3].mode;
21147       mode4 = insn_data[icode].operand[4].mode;
21148
21149       if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21150         op0 = copy_to_mode_reg (mode1, op0);
21151
21152       if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21153         op1 = copy_to_mode_reg (mode2, op1);
21154
21155       if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21156         {
21157           error ("index mask must be an immediate");
21158           return gen_reg_rtx (tmode);
21159         }
21160       if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
21161         {
21162           error ("length mask must be an immediate");
21163           return gen_reg_rtx (tmode);
21164         }
21165       if (optimize || target == 0
21166           || GET_MODE (target) != tmode
21167           || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21168         target = gen_reg_rtx (tmode);
21169       pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
21170       if (! pat)
21171         return NULL_RTX;
21172       emit_insn (pat);
21173       return target;
21174
21175     case IX86_BUILTIN_VEC_INIT_V2SI:
21176     case IX86_BUILTIN_VEC_INIT_V4HI:
21177     case IX86_BUILTIN_VEC_INIT_V8QI:
21178       return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
21179
21180     case IX86_BUILTIN_VEC_EXT_V2DF:
21181     case IX86_BUILTIN_VEC_EXT_V2DI:
21182     case IX86_BUILTIN_VEC_EXT_V4SF:
21183     case IX86_BUILTIN_VEC_EXT_V4SI:
21184     case IX86_BUILTIN_VEC_EXT_V8HI:
21185     case IX86_BUILTIN_VEC_EXT_V2SI:
21186     case IX86_BUILTIN_VEC_EXT_V4HI:
21187     case IX86_BUILTIN_VEC_EXT_V16QI:
21188       return ix86_expand_vec_ext_builtin (exp, target);
21189
21190     case IX86_BUILTIN_VEC_SET_V2DI:
21191     case IX86_BUILTIN_VEC_SET_V4SF:
21192     case IX86_BUILTIN_VEC_SET_V4SI:
21193     case IX86_BUILTIN_VEC_SET_V8HI:
21194     case IX86_BUILTIN_VEC_SET_V4HI:
21195     case IX86_BUILTIN_VEC_SET_V16QI:
21196       return ix86_expand_vec_set_builtin (exp);
21197
21198     case IX86_BUILTIN_INFQ:
21199       {
21200         REAL_VALUE_TYPE inf;
21201         rtx tmp;
21202
21203         real_inf (&inf);
21204         tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
21205
21206         tmp = validize_mem (force_const_mem (mode, tmp));
21207
21208         if (target == 0)
21209           target = gen_reg_rtx (mode);
21210
21211         emit_move_insn (target, tmp);
21212         return target;
21213       }
21214
21215     case IX86_BUILTIN_FABSQ:
21216       return ix86_expand_unop_builtin (CODE_FOR_abstf2, exp, target, 0);
21217
21218     case IX86_BUILTIN_COPYSIGNQ:
21219       return ix86_expand_binop_builtin (CODE_FOR_copysigntf3, exp, target);
21220
21221     default:
21222       break;
21223     }
21224
21225   for (i = 0, d = bdesc_sse_3arg;
21226        i < ARRAY_SIZE (bdesc_sse_3arg);
21227        i++, d++)
21228     if (d->code == fcode)
21229       return ix86_expand_sse_4_operands_builtin (d->icode, exp,
21230                                                  target);
21231
21232   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21233     if (d->code == fcode)
21234       {
21235         /* Compares are treated specially.  */
21236         if (d->icode == CODE_FOR_sse_maskcmpv4sf3
21237             || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
21238             || d->icode == CODE_FOR_sse2_maskcmpv2df3
21239             || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
21240           return ix86_expand_sse_compare (d, exp, target);
21241
21242         return ix86_expand_binop_builtin (d->icode, exp, target);
21243       }
21244
21245   for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21246     if (d->code == fcode)
21247       return ix86_expand_unop_builtin (d->icode, exp, target, 0);
21248
21249   for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
21250     if (d->code == fcode)
21251       return ix86_expand_sse_comi (d, exp, target);
21252
21253   for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
21254     if (d->code == fcode)
21255       return ix86_expand_sse_ptest (d, exp, target);
21256
21257   for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
21258     if (d->code == fcode)
21259       return ix86_expand_crc32 (d->icode, exp, target);
21260
21261   for (i = 0, d = bdesc_pcmpestr;
21262        i < ARRAY_SIZE (bdesc_pcmpestr);
21263        i++, d++)
21264     if (d->code == fcode)
21265       return ix86_expand_sse_pcmpestr (d, exp, target);
21266
21267   for (i = 0, d = bdesc_pcmpistr;
21268        i < ARRAY_SIZE (bdesc_pcmpistr);
21269        i++, d++)
21270     if (d->code == fcode)
21271       return ix86_expand_sse_pcmpistr (d, exp, target);
21272
21273   for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21274     if (d->code == fcode)
21275       return ix86_expand_multi_arg_builtin (d->icode, exp, target,
21276                                             (enum multi_arg_type)d->flag,
21277                                             d->comparison);
21278
21279   gcc_unreachable ();
21280 }
21281
21282 /* Returns a function decl for a vectorized version of the builtin function
21283    with builtin function code FN and the result vector type TYPE, or NULL_TREE
21284    if it is not available.  */
21285
21286 static tree
21287 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
21288                                   tree type_in)
21289 {
21290   enum machine_mode in_mode, out_mode;
21291   int in_n, out_n;
21292
21293   if (TREE_CODE (type_out) != VECTOR_TYPE
21294       || TREE_CODE (type_in) != VECTOR_TYPE)
21295     return NULL_TREE;
21296
21297   out_mode = TYPE_MODE (TREE_TYPE (type_out));
21298   out_n = TYPE_VECTOR_SUBPARTS (type_out);
21299   in_mode = TYPE_MODE (TREE_TYPE (type_in));
21300   in_n = TYPE_VECTOR_SUBPARTS (type_in);
21301
21302   switch (fn)
21303     {
21304     case BUILT_IN_SQRT:
21305       if (out_mode == DFmode && out_n == 2
21306           && in_mode == DFmode && in_n == 2)
21307         return ix86_builtins[IX86_BUILTIN_SQRTPD];
21308       break;
21309
21310     case BUILT_IN_SQRTF:
21311       if (out_mode == SFmode && out_n == 4
21312           && in_mode == SFmode && in_n == 4)
21313         return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
21314       break;
21315
21316     case BUILT_IN_LRINT:
21317       if (out_mode == SImode && out_n == 4
21318           && in_mode == DFmode && in_n == 2)
21319         return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
21320       break;
21321
21322     case BUILT_IN_LRINTF:
21323       if (out_mode == SImode && out_n == 4
21324           && in_mode == SFmode && in_n == 4)
21325         return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
21326       break;
21327
21328     default:
21329       ;
21330     }
21331
21332   /* Dispatch to a handler for a vectorization library.  */
21333   if (ix86_veclib_handler)
21334     return (*ix86_veclib_handler)(fn, type_out, type_in);
21335
21336   return NULL_TREE;
21337 }
21338
21339 /* Handler for an ACML-style interface to a library with vectorized
21340    intrinsics.  */
21341
21342 static tree
21343 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
21344 {
21345   char name[20] = "__vr.._";
21346   tree fntype, new_fndecl, args;
21347   unsigned arity;
21348   const char *bname;
21349   enum machine_mode el_mode, in_mode;
21350   int n, in_n;
21351
21352   /* The ACML is 64bits only and suitable for unsafe math only as
21353      it does not correctly support parts of IEEE with the required
21354      precision such as denormals.  */
21355   if (!TARGET_64BIT
21356       || !flag_unsafe_math_optimizations)
21357     return NULL_TREE;
21358
21359   el_mode = TYPE_MODE (TREE_TYPE (type_out));
21360   n = TYPE_VECTOR_SUBPARTS (type_out);
21361   in_mode = TYPE_MODE (TREE_TYPE (type_in));
21362   in_n = TYPE_VECTOR_SUBPARTS (type_in);
21363   if (el_mode != in_mode
21364       || n != in_n)
21365     return NULL_TREE;
21366
21367   switch (fn)
21368     {
21369     case BUILT_IN_SIN:
21370     case BUILT_IN_COS:
21371     case BUILT_IN_EXP:
21372     case BUILT_IN_LOG:
21373     case BUILT_IN_LOG2:
21374     case BUILT_IN_LOG10:
21375       name[4] = 'd';
21376       name[5] = '2';
21377       if (el_mode != DFmode
21378           || n != 2)
21379         return NULL_TREE;
21380       break;
21381
21382     case BUILT_IN_SINF:
21383     case BUILT_IN_COSF:
21384     case BUILT_IN_EXPF:
21385     case BUILT_IN_POWF:
21386     case BUILT_IN_LOGF:
21387     case BUILT_IN_LOG2F:
21388     case BUILT_IN_LOG10F:
21389       name[4] = 's';
21390       name[5] = '4';
21391       if (el_mode != SFmode
21392           || n != 4)
21393         return NULL_TREE;
21394       break;
21395
21396     default:
21397       return NULL_TREE;
21398     }
21399
21400   bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21401   sprintf (name + 7, "%s", bname+10);
21402
21403   arity = 0;
21404   for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21405        args = TREE_CHAIN (args))
21406     arity++;
21407
21408   if (arity == 1)
21409     fntype = build_function_type_list (type_out, type_in, NULL);
21410   else
21411     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21412
21413   /* Build a function declaration for the vectorized function.  */
21414   new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21415   TREE_PUBLIC (new_fndecl) = 1;
21416   DECL_EXTERNAL (new_fndecl) = 1;
21417   DECL_IS_NOVOPS (new_fndecl) = 1;
21418   TREE_READONLY (new_fndecl) = 1;
21419
21420   return new_fndecl;
21421 }
21422
21423
21424 /* Returns a decl of a function that implements conversion of the
21425    input vector of type TYPE, or NULL_TREE if it is not available.  */
21426
21427 static tree
21428 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
21429 {
21430   if (TREE_CODE (type) != VECTOR_TYPE)
21431     return NULL_TREE;
21432
21433   switch (code)
21434     {
21435     case FLOAT_EXPR:
21436       switch (TYPE_MODE (type))
21437         {
21438         case V4SImode:
21439           return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
21440         default:
21441           return NULL_TREE;
21442         }
21443
21444     case FIX_TRUNC_EXPR:
21445       switch (TYPE_MODE (type))
21446         {
21447         case V4SFmode:
21448           return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
21449         default:
21450           return NULL_TREE;
21451         }
21452     default:
21453       return NULL_TREE;
21454
21455     }
21456 }
21457
21458 /* Returns a code for a target-specific builtin that implements
21459    reciprocal of the function, or NULL_TREE if not available.  */
21460
21461 static tree
21462 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
21463                          bool sqrt ATTRIBUTE_UNUSED)
21464 {
21465   if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
21466          && flag_finite_math_only && !flag_trapping_math
21467          && flag_unsafe_math_optimizations))
21468     return NULL_TREE;
21469
21470   if (md_fn)
21471     /* Machine dependent builtins.  */
21472     switch (fn)
21473       {
21474         /* Vectorized version of sqrt to rsqrt conversion.  */
21475       case IX86_BUILTIN_SQRTPS_NR:
21476         return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
21477
21478       default:
21479         return NULL_TREE;
21480       }
21481   else
21482     /* Normal builtins.  */
21483     switch (fn)
21484       {
21485         /* Sqrt to rsqrt conversion.  */
21486       case BUILT_IN_SQRTF:
21487         return ix86_builtins[IX86_BUILTIN_RSQRTF];
21488
21489       default:
21490         return NULL_TREE;
21491       }
21492 }
21493
21494 /* Store OPERAND to the memory after reload is completed.  This means
21495    that we can't easily use assign_stack_local.  */
21496 rtx
21497 ix86_force_to_memory (enum machine_mode mode, rtx operand)
21498 {
21499   rtx result;
21500
21501   gcc_assert (reload_completed);
21502   if (TARGET_RED_ZONE)
21503     {
21504       result = gen_rtx_MEM (mode,
21505                             gen_rtx_PLUS (Pmode,
21506                                           stack_pointer_rtx,
21507                                           GEN_INT (-RED_ZONE_SIZE)));
21508       emit_move_insn (result, operand);
21509     }
21510   else if (!TARGET_RED_ZONE && TARGET_64BIT)
21511     {
21512       switch (mode)
21513         {
21514         case HImode:
21515         case SImode:
21516           operand = gen_lowpart (DImode, operand);
21517           /* FALLTHRU */
21518         case DImode:
21519           emit_insn (
21520                       gen_rtx_SET (VOIDmode,
21521                                    gen_rtx_MEM (DImode,
21522                                                 gen_rtx_PRE_DEC (DImode,
21523                                                         stack_pointer_rtx)),
21524                                    operand));
21525           break;
21526         default:
21527           gcc_unreachable ();
21528         }
21529       result = gen_rtx_MEM (mode, stack_pointer_rtx);
21530     }
21531   else
21532     {
21533       switch (mode)
21534         {
21535         case DImode:
21536           {
21537             rtx operands[2];
21538             split_di (&operand, 1, operands, operands + 1);
21539             emit_insn (
21540                         gen_rtx_SET (VOIDmode,
21541                                      gen_rtx_MEM (SImode,
21542                                                   gen_rtx_PRE_DEC (Pmode,
21543                                                         stack_pointer_rtx)),
21544                                      operands[1]));
21545             emit_insn (
21546                         gen_rtx_SET (VOIDmode,
21547                                      gen_rtx_MEM (SImode,
21548                                                   gen_rtx_PRE_DEC (Pmode,
21549                                                         stack_pointer_rtx)),
21550                                      operands[0]));
21551           }
21552           break;
21553         case HImode:
21554           /* Store HImodes as SImodes.  */
21555           operand = gen_lowpart (SImode, operand);
21556           /* FALLTHRU */
21557         case SImode:
21558           emit_insn (
21559                       gen_rtx_SET (VOIDmode,
21560                                    gen_rtx_MEM (GET_MODE (operand),
21561                                                 gen_rtx_PRE_DEC (SImode,
21562                                                         stack_pointer_rtx)),
21563                                    operand));
21564           break;
21565         default:
21566           gcc_unreachable ();
21567         }
21568       result = gen_rtx_MEM (mode, stack_pointer_rtx);
21569     }
21570   return result;
21571 }
21572
21573 /* Free operand from the memory.  */
21574 void
21575 ix86_free_from_memory (enum machine_mode mode)
21576 {
21577   if (!TARGET_RED_ZONE)
21578     {
21579       int size;
21580
21581       if (mode == DImode || TARGET_64BIT)
21582         size = 8;
21583       else
21584         size = 4;
21585       /* Use LEA to deallocate stack space.  In peephole2 it will be converted
21586          to pop or add instruction if registers are available.  */
21587       emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21588                               gen_rtx_PLUS (Pmode, stack_pointer_rtx,
21589                                             GEN_INT (size))));
21590     }
21591 }
21592
21593 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
21594    QImode must go into class Q_REGS.
21595    Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
21596    movdf to do mem-to-mem moves through integer regs.  */
21597 enum reg_class
21598 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
21599 {
21600   enum machine_mode mode = GET_MODE (x);
21601
21602   /* We're only allowed to return a subclass of CLASS.  Many of the
21603      following checks fail for NO_REGS, so eliminate that early.  */
21604   if (regclass == NO_REGS)
21605     return NO_REGS;
21606
21607   /* All classes can load zeros.  */
21608   if (x == CONST0_RTX (mode))
21609     return regclass;
21610
21611   /* Force constants into memory if we are loading a (nonzero) constant into
21612      an MMX or SSE register.  This is because there are no MMX/SSE instructions
21613      to load from a constant.  */
21614   if (CONSTANT_P (x)
21615       && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
21616     return NO_REGS;
21617
21618   /* Prefer SSE regs only, if we can use them for math.  */
21619   if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
21620     return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
21621
21622   /* Floating-point constants need more complex checks.  */
21623   if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
21624     {
21625       /* General regs can load everything.  */
21626       if (reg_class_subset_p (regclass, GENERAL_REGS))
21627         return regclass;
21628
21629       /* Floats can load 0 and 1 plus some others.  Note that we eliminated
21630          zero above.  We only want to wind up preferring 80387 registers if
21631          we plan on doing computation with them.  */
21632       if (TARGET_80387
21633           && standard_80387_constant_p (x))
21634         {
21635           /* Limit class to non-sse.  */
21636           if (regclass == FLOAT_SSE_REGS)
21637             return FLOAT_REGS;
21638           if (regclass == FP_TOP_SSE_REGS)
21639             return FP_TOP_REG;
21640           if (regclass == FP_SECOND_SSE_REGS)
21641             return FP_SECOND_REG;
21642           if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
21643             return regclass;
21644         }
21645
21646       return NO_REGS;
21647     }
21648
21649   /* Generally when we see PLUS here, it's the function invariant
21650      (plus soft-fp const_int).  Which can only be computed into general
21651      regs.  */
21652   if (GET_CODE (x) == PLUS)
21653     return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
21654
21655   /* QImode constants are easy to load, but non-constant QImode data
21656      must go into Q_REGS.  */
21657   if (GET_MODE (x) == QImode && !CONSTANT_P (x))
21658     {
21659       if (reg_class_subset_p (regclass, Q_REGS))
21660         return regclass;
21661       if (reg_class_subset_p (Q_REGS, regclass))
21662         return Q_REGS;
21663       return NO_REGS;
21664     }
21665
21666   return regclass;
21667 }
21668
21669 /* Discourage putting floating-point values in SSE registers unless
21670    SSE math is being used, and likewise for the 387 registers.  */
21671 enum reg_class
21672 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
21673 {
21674   enum machine_mode mode = GET_MODE (x);
21675
21676   /* Restrict the output reload class to the register bank that we are doing
21677      math on.  If we would like not to return a subset of CLASS, reject this
21678      alternative: if reload cannot do this, it will still use its choice.  */
21679   mode = GET_MODE (x);
21680   if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21681     return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
21682
21683   if (X87_FLOAT_MODE_P (mode))
21684     {
21685       if (regclass == FP_TOP_SSE_REGS)
21686         return FP_TOP_REG;
21687       else if (regclass == FP_SECOND_SSE_REGS)
21688         return FP_SECOND_REG;
21689       else
21690         return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
21691     }
21692
21693   return regclass;
21694 }
21695
21696 /* If we are copying between general and FP registers, we need a memory
21697    location. The same is true for SSE and MMX registers.
21698
21699    To optimize register_move_cost performance, allow inline variant.
21700
21701    The macro can't work reliably when one of the CLASSES is class containing
21702    registers from multiple units (SSE, MMX, integer).  We avoid this by never
21703    combining those units in single alternative in the machine description.
21704    Ensure that this constraint holds to avoid unexpected surprises.
21705
21706    When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
21707    enforce these sanity checks.  */
21708
21709 static inline int
21710 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
21711                               enum machine_mode mode, int strict)
21712 {
21713   if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
21714       || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
21715       || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
21716       || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
21717       || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
21718       || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
21719     {
21720       gcc_assert (!strict);
21721       return true;
21722     }
21723
21724   if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
21725     return true;
21726
21727   /* ??? This is a lie.  We do have moves between mmx/general, and for
21728      mmx/sse2.  But by saying we need secondary memory we discourage the
21729      register allocator from using the mmx registers unless needed.  */
21730   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
21731     return true;
21732
21733   if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21734     {
21735       /* SSE1 doesn't have any direct moves from other classes.  */
21736       if (!TARGET_SSE2)
21737         return true;
21738
21739       /* If the target says that inter-unit moves are more expensive
21740          than moving through memory, then don't generate them.  */
21741       if (!TARGET_INTER_UNIT_MOVES)
21742         return true;
21743
21744       /* Between SSE and general, we have moves no larger than word size.  */
21745       if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21746         return true;
21747     }
21748
21749   return false;
21750 }
21751
21752 int
21753 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
21754                               enum machine_mode mode, int strict)
21755 {
21756   return inline_secondary_memory_needed (class1, class2, mode, strict);
21757 }
21758
21759 /* Return true if the registers in CLASS cannot represent the change from
21760    modes FROM to TO.  */
21761
21762 bool
21763 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
21764                                enum reg_class regclass)
21765 {
21766   if (from == to)
21767     return false;
21768
21769   /* x87 registers can't do subreg at all, as all values are reformatted
21770      to extended precision.  */
21771   if (MAYBE_FLOAT_CLASS_P (regclass))
21772     return true;
21773
21774   if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
21775     {
21776       /* Vector registers do not support QI or HImode loads.  If we don't
21777          disallow a change to these modes, reload will assume it's ok to
21778          drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
21779          the vec_dupv4hi pattern.  */
21780       if (GET_MODE_SIZE (from) < 4)
21781         return true;
21782
21783       /* Vector registers do not support subreg with nonzero offsets, which
21784          are otherwise valid for integer registers.  Since we can't see
21785          whether we have a nonzero offset from here, prohibit all
21786          nonparadoxical subregs changing size.  */
21787       if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
21788         return true;
21789     }
21790
21791   return false;
21792 }
21793
21794 /* Return the cost of moving data of mode M between a
21795    register and memory.  A value of 2 is the default; this cost is
21796    relative to those in `REGISTER_MOVE_COST'.
21797
21798    This function is used extensively by register_move_cost that is used to
21799    build tables at startup.  Make it inline in this case.
21800    When IN is 2, return maximum of in and out move cost.
21801
21802    If moving between registers and memory is more expensive than
21803    between two registers, you should define this macro to express the
21804    relative cost.
21805
21806    Model also increased moving costs of QImode registers in non
21807    Q_REGS classes.
21808  */
21809 static inline int
21810 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
21811                          int in)
21812 {
21813   int cost;
21814   if (FLOAT_CLASS_P (regclass))
21815     {
21816       int index;
21817       switch (mode)
21818         {
21819           case SFmode:
21820             index = 0;
21821             break;
21822           case DFmode:
21823             index = 1;
21824             break;
21825           case XFmode:
21826             index = 2;
21827             break;
21828           default:
21829             return 100;
21830         }
21831       if (in == 2)
21832         return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
21833       return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
21834     }
21835   if (SSE_CLASS_P (regclass))
21836     {
21837       int index;
21838       switch (GET_MODE_SIZE (mode))
21839         {
21840           case 4:
21841             index = 0;
21842             break;
21843           case 8:
21844             index = 1;
21845             break;
21846           case 16:
21847             index = 2;
21848             break;
21849           default:
21850             return 100;
21851         }
21852       if (in == 2)
21853         return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
21854       return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
21855     }
21856   if (MMX_CLASS_P (regclass))
21857     {
21858       int index;
21859       switch (GET_MODE_SIZE (mode))
21860         {
21861           case 4:
21862             index = 0;
21863             break;
21864           case 8:
21865             index = 1;
21866             break;
21867           default:
21868             return 100;
21869         }
21870       if (in)
21871         return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
21872       return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
21873     }
21874   switch (GET_MODE_SIZE (mode))
21875     {
21876       case 1:
21877         if (Q_CLASS_P (regclass) || TARGET_64BIT)
21878           {
21879             if (!in)
21880               return ix86_cost->int_store[0];
21881             if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
21882               cost = ix86_cost->movzbl_load;
21883             else
21884               cost = ix86_cost->int_load[0];
21885             if (in == 2)
21886               return MAX (cost, ix86_cost->int_store[0]);
21887             return cost;
21888           }
21889         else
21890           {
21891            if (in == 2)
21892              return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
21893            if (in)
21894              return ix86_cost->movzbl_load;
21895            else
21896              return ix86_cost->int_store[0] + 4;
21897           }
21898         break;
21899       case 2:
21900         if (in == 2)
21901           return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
21902         return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
21903       default:
21904         /* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
21905         if (mode == TFmode)
21906           mode = XFmode;
21907         if (in == 2)
21908           cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
21909         else if (in)
21910           cost = ix86_cost->int_load[2];
21911         else
21912           cost = ix86_cost->int_store[2];
21913         return (cost * (((int) GET_MODE_SIZE (mode)
21914                         + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
21915     }
21916 }
21917
21918 int
21919 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
21920 {
21921   return inline_memory_move_cost (mode, regclass, in);
21922 }
21923
21924
21925 /* Return the cost of moving data from a register in class CLASS1 to
21926    one in class CLASS2.
21927
21928    It is not required that the cost always equal 2 when FROM is the same as TO;
21929    on some machines it is expensive to move between registers if they are not
21930    general registers.  */
21931
21932 int
21933 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
21934                          enum reg_class class2)
21935 {
21936   /* In case we require secondary memory, compute cost of the store followed
21937      by load.  In order to avoid bad register allocation choices, we need
21938      for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
21939
21940   if (inline_secondary_memory_needed (class1, class2, mode, 0))
21941     {
21942       int cost = 1;
21943
21944       cost += inline_memory_move_cost (mode, class1, 2);
21945       cost += inline_memory_move_cost (mode, class2, 2);
21946
21947       /* In case of copying from general_purpose_register we may emit multiple
21948          stores followed by single load causing memory size mismatch stall.
21949          Count this as arbitrarily high cost of 20.  */
21950       if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
21951         cost += 20;
21952
21953       /* In the case of FP/MMX moves, the registers actually overlap, and we
21954          have to switch modes in order to treat them differently.  */
21955       if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
21956           || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
21957         cost += 20;
21958
21959       return cost;
21960     }
21961
21962   /* Moves between SSE/MMX and integer unit are expensive.  */
21963   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
21964       || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21965
21966     /* ??? By keeping returned value relatively high, we limit the number
21967        of moves between integer and MMX/SSE registers for all targets.
21968        Additionally, high value prevents problem with x86_modes_tieable_p(),
21969        where integer modes in MMX/SSE registers are not tieable
21970        because of missing QImode and HImode moves to, from or between
21971        MMX/SSE registers.  */
21972     return MAX (ix86_cost->mmxsse_to_integer, 8);
21973
21974   if (MAYBE_FLOAT_CLASS_P (class1))
21975     return ix86_cost->fp_move;
21976   if (MAYBE_SSE_CLASS_P (class1))
21977     return ix86_cost->sse_move;
21978   if (MAYBE_MMX_CLASS_P (class1))
21979     return ix86_cost->mmx_move;
21980   return 2;
21981 }
21982
21983 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
21984
21985 bool
21986 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
21987 {
21988   /* Flags and only flags can only hold CCmode values.  */
21989   if (CC_REGNO_P (regno))
21990     return GET_MODE_CLASS (mode) == MODE_CC;
21991   if (GET_MODE_CLASS (mode) == MODE_CC
21992       || GET_MODE_CLASS (mode) == MODE_RANDOM
21993       || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
21994     return 0;
21995   if (FP_REGNO_P (regno))
21996     return VALID_FP_MODE_P (mode);
21997   if (SSE_REGNO_P (regno))
21998     {
21999       /* We implement the move patterns for all vector modes into and
22000          out of SSE registers, even when no operation instructions
22001          are available.  */
22002       return (VALID_SSE_REG_MODE (mode)
22003               || VALID_SSE2_REG_MODE (mode)
22004               || VALID_MMX_REG_MODE (mode)
22005               || VALID_MMX_REG_MODE_3DNOW (mode));
22006     }
22007   if (MMX_REGNO_P (regno))
22008     {
22009       /* We implement the move patterns for 3DNOW modes even in MMX mode,
22010          so if the register is available at all, then we can move data of
22011          the given mode into or out of it.  */
22012       return (VALID_MMX_REG_MODE (mode)
22013               || VALID_MMX_REG_MODE_3DNOW (mode));
22014     }
22015
22016   if (mode == QImode)
22017     {
22018       /* Take care for QImode values - they can be in non-QI regs,
22019          but then they do cause partial register stalls.  */
22020       if (regno < 4 || TARGET_64BIT)
22021         return 1;
22022       if (!TARGET_PARTIAL_REG_STALL)
22023         return 1;
22024       return reload_in_progress || reload_completed;
22025     }
22026   /* We handle both integer and floats in the general purpose registers.  */
22027   else if (VALID_INT_MODE_P (mode))
22028     return 1;
22029   else if (VALID_FP_MODE_P (mode))
22030     return 1;
22031   else if (VALID_DFP_MODE_P (mode))
22032     return 1;
22033   /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
22034      on to use that value in smaller contexts, this can easily force a
22035      pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
22036      supporting DImode, allow it.  */
22037   else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
22038     return 1;
22039
22040   return 0;
22041 }
22042
22043 /* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
22044    tieable integer mode.  */
22045
22046 static bool
22047 ix86_tieable_integer_mode_p (enum machine_mode mode)
22048 {
22049   switch (mode)
22050     {
22051     case HImode:
22052     case SImode:
22053       return true;
22054
22055     case QImode:
22056       return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
22057
22058     case DImode:
22059       return TARGET_64BIT;
22060
22061     default:
22062       return false;
22063     }
22064 }
22065
22066 /* Return true if MODE1 is accessible in a register that can hold MODE2
22067    without copying.  That is, all register classes that can hold MODE2
22068    can also hold MODE1.  */
22069
22070 bool
22071 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22072 {
22073   if (mode1 == mode2)
22074     return true;
22075
22076   if (ix86_tieable_integer_mode_p (mode1)
22077       && ix86_tieable_integer_mode_p (mode2))
22078     return true;
22079
22080   /* MODE2 being XFmode implies fp stack or general regs, which means we
22081      can tie any smaller floating point modes to it.  Note that we do not
22082      tie this with TFmode.  */
22083   if (mode2 == XFmode)
22084     return mode1 == SFmode || mode1 == DFmode;
22085
22086   /* MODE2 being DFmode implies fp stack, general or sse regs, which means
22087      that we can tie it with SFmode.  */
22088   if (mode2 == DFmode)
22089     return mode1 == SFmode;
22090
22091   /* If MODE2 is only appropriate for an SSE register, then tie with
22092      any other mode acceptable to SSE registers.  */
22093   if (GET_MODE_SIZE (mode2) == 16
22094       && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
22095     return (GET_MODE_SIZE (mode1) == 16
22096             && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
22097
22098   /* If MODE2 is appropriate for an MMX register, then tie
22099      with any other mode acceptable to MMX registers.  */
22100   if (GET_MODE_SIZE (mode2) == 8
22101       && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
22102     return (GET_MODE_SIZE (mode1) == 8
22103             && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
22104
22105   return false;
22106 }
22107
22108 /* Compute a (partial) cost for rtx X.  Return true if the complete
22109    cost has been computed, and false if subexpressions should be
22110    scanned.  In either case, *TOTAL contains the cost result.  */
22111
22112 static bool
22113 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
22114 {
22115   enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22116   enum machine_mode mode = GET_MODE (x);
22117
22118   switch (code)
22119     {
22120     case CONST_INT:
22121     case CONST:
22122     case LABEL_REF:
22123     case SYMBOL_REF:
22124       if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
22125         *total = 3;
22126       else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
22127         *total = 2;
22128       else if (flag_pic && SYMBOLIC_CONST (x)
22129                && (!TARGET_64BIT
22130                    || (!GET_CODE (x) != LABEL_REF
22131                        && (GET_CODE (x) != SYMBOL_REF
22132                            || !SYMBOL_REF_LOCAL_P (x)))))
22133         *total = 1;
22134       else
22135         *total = 0;
22136       return true;
22137
22138     case CONST_DOUBLE:
22139       if (mode == VOIDmode)
22140         *total = 0;
22141       else
22142         switch (standard_80387_constant_p (x))
22143           {
22144           case 1: /* 0.0 */
22145             *total = 1;
22146             break;
22147           default: /* Other constants */
22148             *total = 2;
22149             break;
22150           case 0:
22151           case -1:
22152             /* Start with (MEM (SYMBOL_REF)), since that's where
22153                it'll probably end up.  Add a penalty for size.  */
22154             *total = (COSTS_N_INSNS (1)
22155                       + (flag_pic != 0 && !TARGET_64BIT)
22156                       + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
22157             break;
22158           }
22159       return true;
22160
22161     case ZERO_EXTEND:
22162       /* The zero extensions is often completely free on x86_64, so make
22163          it as cheap as possible.  */
22164       if (TARGET_64BIT && mode == DImode
22165           && GET_MODE (XEXP (x, 0)) == SImode)
22166         *total = 1;
22167       else if (TARGET_ZERO_EXTEND_WITH_AND)
22168         *total = ix86_cost->add;
22169       else
22170         *total = ix86_cost->movzx;
22171       return false;
22172
22173     case SIGN_EXTEND:
22174       *total = ix86_cost->movsx;
22175       return false;
22176
22177     case ASHIFT:
22178       if (CONST_INT_P (XEXP (x, 1))
22179           && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
22180         {
22181           HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22182           if (value == 1)
22183             {
22184               *total = ix86_cost->add;
22185               return false;
22186             }
22187           if ((value == 2 || value == 3)
22188               && ix86_cost->lea <= ix86_cost->shift_const)
22189             {
22190               *total = ix86_cost->lea;
22191               return false;
22192             }
22193         }
22194       /* FALLTHRU */
22195
22196     case ROTATE:
22197     case ASHIFTRT:
22198     case LSHIFTRT:
22199     case ROTATERT:
22200       if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
22201         {
22202           if (CONST_INT_P (XEXP (x, 1)))
22203             {
22204               if (INTVAL (XEXP (x, 1)) > 32)
22205                 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
22206               else
22207                 *total = ix86_cost->shift_const * 2;
22208             }
22209           else
22210             {
22211               if (GET_CODE (XEXP (x, 1)) == AND)
22212                 *total = ix86_cost->shift_var * 2;
22213               else
22214                 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
22215             }
22216         }
22217       else
22218         {
22219           if (CONST_INT_P (XEXP (x, 1)))
22220             *total = ix86_cost->shift_const;
22221           else
22222             *total = ix86_cost->shift_var;
22223         }
22224       return false;
22225
22226     case MULT:
22227       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22228         {
22229           /* ??? SSE scalar cost should be used here.  */
22230           *total = ix86_cost->fmul;
22231           return false;
22232         }
22233       else if (X87_FLOAT_MODE_P (mode))
22234         {
22235           *total = ix86_cost->fmul;
22236           return false;
22237         }
22238       else if (FLOAT_MODE_P (mode))
22239         {
22240           /* ??? SSE vector cost should be used here.  */
22241           *total = ix86_cost->fmul;
22242           return false;
22243         }
22244       else
22245         {
22246           rtx op0 = XEXP (x, 0);
22247           rtx op1 = XEXP (x, 1);
22248           int nbits;
22249           if (CONST_INT_P (XEXP (x, 1)))
22250             {
22251               unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22252               for (nbits = 0; value != 0; value &= value - 1)
22253                 nbits++;
22254             }
22255           else
22256             /* This is arbitrary.  */
22257             nbits = 7;
22258
22259           /* Compute costs correctly for widening multiplication.  */
22260           if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
22261               && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22262                  == GET_MODE_SIZE (mode))
22263             {
22264               int is_mulwiden = 0;
22265               enum machine_mode inner_mode = GET_MODE (op0);
22266
22267               if (GET_CODE (op0) == GET_CODE (op1))
22268                 is_mulwiden = 1, op1 = XEXP (op1, 0);
22269               else if (CONST_INT_P (op1))
22270                 {
22271                   if (GET_CODE (op0) == SIGN_EXTEND)
22272                     is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22273                                   == INTVAL (op1);
22274                   else
22275                     is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22276                 }
22277
22278               if (is_mulwiden)
22279                 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22280             }
22281
22282           *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
22283                     + nbits * ix86_cost->mult_bit
22284                     + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
22285
22286           return true;
22287         }
22288
22289     case DIV:
22290     case UDIV:
22291     case MOD:
22292     case UMOD:
22293       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22294         /* ??? SSE cost should be used here.  */
22295         *total = ix86_cost->fdiv;
22296       else if (X87_FLOAT_MODE_P (mode))
22297         *total = ix86_cost->fdiv;
22298       else if (FLOAT_MODE_P (mode))
22299         /* ??? SSE vector cost should be used here.  */
22300         *total = ix86_cost->fdiv;
22301       else
22302         *total = ix86_cost->divide[MODE_INDEX (mode)];
22303       return false;
22304
22305     case PLUS:
22306       if (GET_MODE_CLASS (mode) == MODE_INT
22307                && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
22308         {
22309           if (GET_CODE (XEXP (x, 0)) == PLUS
22310               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22311               && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22312               && CONSTANT_P (XEXP (x, 1)))
22313             {
22314               HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22315               if (val == 2 || val == 4 || val == 8)
22316                 {
22317                   *total = ix86_cost->lea;
22318                   *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22319                   *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22320                                       outer_code);
22321                   *total += rtx_cost (XEXP (x, 1), outer_code);
22322                   return true;
22323                 }
22324             }
22325           else if (GET_CODE (XEXP (x, 0)) == MULT
22326                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22327             {
22328               HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22329               if (val == 2 || val == 4 || val == 8)
22330                 {
22331                   *total = ix86_cost->lea;
22332                   *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22333                   *total += rtx_cost (XEXP (x, 1), outer_code);
22334                   return true;
22335                 }
22336             }
22337           else if (GET_CODE (XEXP (x, 0)) == PLUS)
22338             {
22339               *total = ix86_cost->lea;
22340               *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22341               *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22342               *total += rtx_cost (XEXP (x, 1), outer_code);
22343               return true;
22344             }
22345         }
22346       /* FALLTHRU */
22347
22348     case MINUS:
22349       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22350         {
22351           /* ??? SSE cost should be used here.  */
22352           *total = ix86_cost->fadd;
22353           return false;
22354         }
22355       else if (X87_FLOAT_MODE_P (mode))
22356         {
22357           *total = ix86_cost->fadd;
22358           return false;
22359         }
22360       else if (FLOAT_MODE_P (mode))
22361         {
22362           /* ??? SSE vector cost should be used here.  */
22363           *total = ix86_cost->fadd;
22364           return false;
22365         }
22366       /* FALLTHRU */
22367
22368     case AND:
22369     case IOR:
22370     case XOR:
22371       if (!TARGET_64BIT && mode == DImode)
22372         {
22373           *total = (ix86_cost->add * 2
22374                     + (rtx_cost (XEXP (x, 0), outer_code)
22375                        << (GET_MODE (XEXP (x, 0)) != DImode))
22376                     + (rtx_cost (XEXP (x, 1), outer_code)
22377                        << (GET_MODE (XEXP (x, 1)) != DImode)));
22378           return true;
22379         }
22380       /* FALLTHRU */
22381
22382     case NEG:
22383       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22384         {
22385           /* ??? SSE cost should be used here.  */
22386           *total = ix86_cost->fchs;
22387           return false;
22388         }
22389       else if (X87_FLOAT_MODE_P (mode))
22390         {
22391           *total = ix86_cost->fchs;
22392           return false;
22393         }
22394       else if (FLOAT_MODE_P (mode))
22395         {
22396           /* ??? SSE vector cost should be used here.  */
22397           *total = ix86_cost->fchs;
22398           return false;
22399         }
22400       /* FALLTHRU */
22401
22402     case NOT:
22403       if (!TARGET_64BIT && mode == DImode)
22404         *total = ix86_cost->add * 2;
22405       else
22406         *total = ix86_cost->add;
22407       return false;
22408
22409     case COMPARE:
22410       if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
22411           && XEXP (XEXP (x, 0), 1) == const1_rtx
22412           && CONST_INT_P (XEXP (XEXP (x, 0), 2))
22413           && XEXP (x, 1) == const0_rtx)
22414         {
22415           /* This kind of construct is implemented using test[bwl].
22416              Treat it as if we had an AND.  */
22417           *total = (ix86_cost->add
22418                     + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
22419                     + rtx_cost (const1_rtx, outer_code));
22420           return true;
22421         }
22422       return false;
22423
22424     case FLOAT_EXTEND:
22425       if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
22426         *total = 0;
22427       return false;
22428
22429     case ABS:
22430       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22431         /* ??? SSE cost should be used here.  */
22432         *total = ix86_cost->fabs;
22433       else if (X87_FLOAT_MODE_P (mode))
22434         *total = ix86_cost->fabs;
22435       else if (FLOAT_MODE_P (mode))
22436         /* ??? SSE vector cost should be used here.  */
22437         *total = ix86_cost->fabs;
22438       return false;
22439
22440     case SQRT:
22441       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22442         /* ??? SSE cost should be used here.  */
22443         *total = ix86_cost->fsqrt;
22444       else if (X87_FLOAT_MODE_P (mode))
22445         *total = ix86_cost->fsqrt;
22446       else if (FLOAT_MODE_P (mode))
22447         /* ??? SSE vector cost should be used here.  */
22448         *total = ix86_cost->fsqrt;
22449       return false;
22450
22451     case UNSPEC:
22452       if (XINT (x, 1) == UNSPEC_TP)
22453         *total = 0;
22454       return false;
22455
22456     default:
22457       return false;
22458     }
22459 }
22460
22461 #if TARGET_MACHO
22462
22463 static int current_machopic_label_num;
22464
22465 /* Given a symbol name and its associated stub, write out the
22466    definition of the stub.  */
22467
22468 void
22469 machopic_output_stub (FILE *file, const char *symb, const char *stub)
22470 {
22471   unsigned int length;
22472   char *binder_name, *symbol_name, lazy_ptr_name[32];
22473   int label = ++current_machopic_label_num;
22474
22475   /* For 64-bit we shouldn't get here.  */
22476   gcc_assert (!TARGET_64BIT);
22477
22478   /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
22479   symb = (*targetm.strip_name_encoding) (symb);
22480
22481   length = strlen (stub);
22482   binder_name = alloca (length + 32);
22483   GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22484
22485   length = strlen (symb);
22486   symbol_name = alloca (length + 32);
22487   GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22488
22489   sprintf (lazy_ptr_name, "L%d$lz", label);
22490
22491   if (MACHOPIC_PURE)
22492     switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
22493   else
22494     switch_to_section (darwin_sections[machopic_symbol_stub_section]);
22495
22496   fprintf (file, "%s:\n", stub);
22497   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22498
22499   if (MACHOPIC_PURE)
22500     {
22501       fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
22502       fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
22503       fprintf (file, "\tjmp\t*%%edx\n");
22504     }
22505   else
22506     fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
22507
22508   fprintf (file, "%s:\n", binder_name);
22509
22510   if (MACHOPIC_PURE)
22511     {
22512       fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
22513       fprintf (file, "\tpushl\t%%eax\n");
22514     }
22515   else
22516     fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
22517
22518   fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
22519
22520   switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
22521   fprintf (file, "%s:\n", lazy_ptr_name);
22522   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22523   fprintf (file, "\t.long %s\n", binder_name);
22524 }
22525
22526 void
22527 darwin_x86_file_end (void)
22528 {
22529   darwin_file_end ();
22530   ix86_file_end ();
22531 }
22532 #endif /* TARGET_MACHO */
22533
22534 /* Order the registers for register allocator.  */
22535
22536 void
22537 x86_order_regs_for_local_alloc (void)
22538 {
22539    int pos = 0;
22540    int i;
22541
22542    /* First allocate the local general purpose registers.  */
22543    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22544      if (GENERAL_REGNO_P (i) && call_used_regs[i])
22545         reg_alloc_order [pos++] = i;
22546
22547    /* Global general purpose registers.  */
22548    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22549      if (GENERAL_REGNO_P (i) && !call_used_regs[i])
22550         reg_alloc_order [pos++] = i;
22551
22552    /* x87 registers come first in case we are doing FP math
22553       using them.  */
22554    if (!TARGET_SSE_MATH)
22555      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22556        reg_alloc_order [pos++] = i;
22557
22558    /* SSE registers.  */
22559    for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
22560      reg_alloc_order [pos++] = i;
22561    for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
22562      reg_alloc_order [pos++] = i;
22563
22564    /* x87 registers.  */
22565    if (TARGET_SSE_MATH)
22566      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22567        reg_alloc_order [pos++] = i;
22568
22569    for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
22570      reg_alloc_order [pos++] = i;
22571
22572    /* Initialize the rest of array as we do not allocate some registers
22573       at all.  */
22574    while (pos < FIRST_PSEUDO_REGISTER)
22575      reg_alloc_order [pos++] = 0;
22576 }
22577
22578 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
22579    struct attribute_spec.handler.  */
22580 static tree
22581 ix86_handle_struct_attribute (tree *node, tree name,
22582                               tree args ATTRIBUTE_UNUSED,
22583                               int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
22584 {
22585   tree *type = NULL;
22586   if (DECL_P (*node))
22587     {
22588       if (TREE_CODE (*node) == TYPE_DECL)
22589         type = &TREE_TYPE (*node);
22590     }
22591   else
22592     type = node;
22593
22594   if (!(type && (TREE_CODE (*type) == RECORD_TYPE
22595                  || TREE_CODE (*type) == UNION_TYPE)))
22596     {
22597       warning (OPT_Wattributes, "%qs attribute ignored",
22598                IDENTIFIER_POINTER (name));
22599       *no_add_attrs = true;
22600     }
22601
22602   else if ((is_attribute_p ("ms_struct", name)
22603             && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
22604            || ((is_attribute_p ("gcc_struct", name)
22605                 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
22606     {
22607       warning (OPT_Wattributes, "%qs incompatible attribute ignored",
22608                IDENTIFIER_POINTER (name));
22609       *no_add_attrs = true;
22610     }
22611
22612   return NULL_TREE;
22613 }
22614
22615 static bool
22616 ix86_ms_bitfield_layout_p (const_tree record_type)
22617 {
22618   return (TARGET_MS_BITFIELD_LAYOUT &&
22619           !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
22620     || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
22621 }
22622
22623 /* Returns an expression indicating where the this parameter is
22624    located on entry to the FUNCTION.  */
22625
22626 static rtx
22627 x86_this_parameter (tree function)
22628 {
22629   tree type = TREE_TYPE (function);
22630   bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
22631
22632   if (TARGET_64BIT)
22633     {
22634       const int *parm_regs;
22635
22636       if (TARGET_64BIT_MS_ABI)
22637         parm_regs = x86_64_ms_abi_int_parameter_registers;
22638       else
22639         parm_regs = x86_64_int_parameter_registers;
22640       return gen_rtx_REG (DImode, parm_regs[aggr]);
22641     }
22642
22643   if (ix86_function_regparm (type, function) > 0 && !stdarg_p (type))
22644     {
22645       int regno = AX_REG;
22646       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
22647         regno = CX_REG;
22648       return gen_rtx_REG (SImode, regno);
22649     }
22650
22651   return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
22652 }
22653
22654 /* Determine whether x86_output_mi_thunk can succeed.  */
22655
22656 static bool
22657 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
22658                          HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
22659                          HOST_WIDE_INT vcall_offset, const_tree function)
22660 {
22661   /* 64-bit can handle anything.  */
22662   if (TARGET_64BIT)
22663     return true;
22664
22665   /* For 32-bit, everything's fine if we have one free register.  */
22666   if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
22667     return true;
22668
22669   /* Need a free register for vcall_offset.  */
22670   if (vcall_offset)
22671     return false;
22672
22673   /* Need a free register for GOT references.  */
22674   if (flag_pic && !(*targetm.binds_local_p) (function))
22675     return false;
22676
22677   /* Otherwise ok.  */
22678   return true;
22679 }
22680
22681 /* Output the assembler code for a thunk function.  THUNK_DECL is the
22682    declaration for the thunk function itself, FUNCTION is the decl for
22683    the target function.  DELTA is an immediate constant offset to be
22684    added to THIS.  If VCALL_OFFSET is nonzero, the word at
22685    *(*this + vcall_offset) should be added to THIS.  */
22686
22687 static void
22688 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
22689                      tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
22690                      HOST_WIDE_INT vcall_offset, tree function)
22691 {
22692   rtx xops[3];
22693   rtx this_param = x86_this_parameter (function);
22694   rtx this_reg, tmp;
22695
22696   /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
22697      pull it in now and let DELTA benefit.  */
22698   if (REG_P (this_param))
22699     this_reg = this_param;
22700   else if (vcall_offset)
22701     {
22702       /* Put the this parameter into %eax.  */
22703       xops[0] = this_param;
22704       xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
22705       output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22706     }
22707   else
22708     this_reg = NULL_RTX;
22709
22710   /* Adjust the this parameter by a fixed constant.  */
22711   if (delta)
22712     {
22713       xops[0] = GEN_INT (delta);
22714       xops[1] = this_reg ? this_reg : this_param;
22715       if (TARGET_64BIT)
22716         {
22717           if (!x86_64_general_operand (xops[0], DImode))
22718             {
22719               tmp = gen_rtx_REG (DImode, R10_REG);
22720               xops[1] = tmp;
22721               output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
22722               xops[0] = tmp;
22723               xops[1] = this_param;
22724             }
22725           output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
22726         }
22727       else
22728         output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
22729     }
22730
22731   /* Adjust the this parameter by a value stored in the vtable.  */
22732   if (vcall_offset)
22733     {
22734       if (TARGET_64BIT)
22735         tmp = gen_rtx_REG (DImode, R10_REG);
22736       else
22737         {
22738           int tmp_regno = CX_REG;
22739           if (lookup_attribute ("fastcall",
22740                                 TYPE_ATTRIBUTES (TREE_TYPE (function))))
22741             tmp_regno = AX_REG;
22742           tmp = gen_rtx_REG (SImode, tmp_regno);
22743         }
22744
22745       xops[0] = gen_rtx_MEM (Pmode, this_reg);
22746       xops[1] = tmp;
22747       if (TARGET_64BIT)
22748         output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
22749       else
22750         output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22751
22752       /* Adjust the this parameter.  */
22753       xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
22754       if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
22755         {
22756           rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
22757           xops[0] = GEN_INT (vcall_offset);
22758           xops[1] = tmp2;
22759           output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
22760           xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
22761         }
22762       xops[1] = this_reg;
22763       if (TARGET_64BIT)
22764         output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
22765       else
22766         output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
22767     }
22768
22769   /* If necessary, drop THIS back to its stack slot.  */
22770   if (this_reg && this_reg != this_param)
22771     {
22772       xops[0] = this_reg;
22773       xops[1] = this_param;
22774       output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22775     }
22776
22777   xops[0] = XEXP (DECL_RTL (function), 0);
22778   if (TARGET_64BIT)
22779     {
22780       if (!flag_pic || (*targetm.binds_local_p) (function))
22781         output_asm_insn ("jmp\t%P0", xops);
22782       /* All thunks should be in the same object as their target,
22783          and thus binds_local_p should be true.  */
22784       else if (TARGET_64BIT_MS_ABI)
22785         gcc_unreachable ();
22786       else
22787         {
22788           tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
22789           tmp = gen_rtx_CONST (Pmode, tmp);
22790           tmp = gen_rtx_MEM (QImode, tmp);
22791           xops[0] = tmp;
22792           output_asm_insn ("jmp\t%A0", xops);
22793         }
22794     }
22795   else
22796     {
22797       if (!flag_pic || (*targetm.binds_local_p) (function))
22798         output_asm_insn ("jmp\t%P0", xops);
22799       else
22800 #if TARGET_MACHO
22801         if (TARGET_MACHO)
22802           {
22803             rtx sym_ref = XEXP (DECL_RTL (function), 0);
22804             tmp = (gen_rtx_SYMBOL_REF
22805                    (Pmode,
22806                     machopic_indirection_name (sym_ref, /*stub_p=*/true)));
22807             tmp = gen_rtx_MEM (QImode, tmp);
22808             xops[0] = tmp;
22809             output_asm_insn ("jmp\t%0", xops);
22810           }
22811         else
22812 #endif /* TARGET_MACHO */
22813         {
22814           tmp = gen_rtx_REG (SImode, CX_REG);
22815           output_set_got (tmp, NULL_RTX);
22816
22817           xops[1] = tmp;
22818           output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
22819           output_asm_insn ("jmp\t{*}%1", xops);
22820         }
22821     }
22822 }
22823
22824 static void
22825 x86_file_start (void)
22826 {
22827   default_file_start ();
22828 #if TARGET_MACHO
22829   darwin_file_start ();
22830 #endif
22831   if (X86_FILE_START_VERSION_DIRECTIVE)
22832     fputs ("\t.version\t\"01.01\"\n", asm_out_file);
22833   if (X86_FILE_START_FLTUSED)
22834     fputs ("\t.global\t__fltused\n", asm_out_file);
22835   if (ix86_asm_dialect == ASM_INTEL)
22836     fputs ("\t.intel_syntax noprefix\n", asm_out_file);
22837 }
22838
22839 int
22840 x86_field_alignment (tree field, int computed)
22841 {
22842   enum machine_mode mode;
22843   tree type = TREE_TYPE (field);
22844
22845   if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
22846     return computed;
22847   mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
22848                     ? get_inner_array_type (type) : type);
22849   if (mode == DFmode || mode == DCmode
22850       || GET_MODE_CLASS (mode) == MODE_INT
22851       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
22852     return MIN (32, computed);
22853   return computed;
22854 }
22855
22856 /* Output assembler code to FILE to increment profiler label # LABELNO
22857    for profiling a function entry.  */
22858 void
22859 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
22860 {
22861   if (TARGET_64BIT)
22862     {
22863 #ifndef NO_PROFILE_COUNTERS
22864       fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
22865 #endif
22866
22867       if (!TARGET_64BIT_MS_ABI && flag_pic)
22868         fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
22869       else
22870         fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
22871     }
22872   else if (flag_pic)
22873     {
22874 #ifndef NO_PROFILE_COUNTERS
22875       fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
22876                LPREFIX, labelno, PROFILE_COUNT_REGISTER);
22877 #endif
22878       fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
22879     }
22880   else
22881     {
22882 #ifndef NO_PROFILE_COUNTERS
22883       fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
22884                PROFILE_COUNT_REGISTER);
22885 #endif
22886       fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
22887     }
22888 }
22889
22890 /* We don't have exact information about the insn sizes, but we may assume
22891    quite safely that we are informed about all 1 byte insns and memory
22892    address sizes.  This is enough to eliminate unnecessary padding in
22893    99% of cases.  */
22894
22895 static int
22896 min_insn_size (rtx insn)
22897 {
22898   int l = 0;
22899
22900   if (!INSN_P (insn) || !active_insn_p (insn))
22901     return 0;
22902
22903   /* Discard alignments we've emit and jump instructions.  */
22904   if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
22905       && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
22906     return 0;
22907   if (JUMP_P (insn)
22908       && (GET_CODE (PATTERN (insn)) == ADDR_VEC
22909           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
22910     return 0;
22911
22912   /* Important case - calls are always 5 bytes.
22913      It is common to have many calls in the row.  */
22914   if (CALL_P (insn)
22915       && symbolic_reference_mentioned_p (PATTERN (insn))
22916       && !SIBLING_CALL_P (insn))
22917     return 5;
22918   if (get_attr_length (insn) <= 1)
22919     return 1;
22920
22921   /* For normal instructions we may rely on the sizes of addresses
22922      and the presence of symbol to require 4 bytes of encoding.
22923      This is not the case for jumps where references are PC relative.  */
22924   if (!JUMP_P (insn))
22925     {
22926       l = get_attr_length_address (insn);
22927       if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
22928         l = 4;
22929     }
22930   if (l)
22931     return 1+l;
22932   else
22933     return 2;
22934 }
22935
22936 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
22937    window.  */
22938
22939 static void
22940 ix86_avoid_jump_misspredicts (void)
22941 {
22942   rtx insn, start = get_insns ();
22943   int nbytes = 0, njumps = 0;
22944   int isjump = 0;
22945
22946   /* Look for all minimal intervals of instructions containing 4 jumps.
22947      The intervals are bounded by START and INSN.  NBYTES is the total
22948      size of instructions in the interval including INSN and not including
22949      START.  When the NBYTES is smaller than 16 bytes, it is possible
22950      that the end of START and INSN ends up in the same 16byte page.
22951
22952      The smallest offset in the page INSN can start is the case where START
22953      ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
22954      We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
22955      */
22956   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22957     {
22958
22959       nbytes += min_insn_size (insn);
22960       if (dump_file)
22961         fprintf(dump_file, "Insn %i estimated to %i bytes\n",
22962                 INSN_UID (insn), min_insn_size (insn));
22963       if ((JUMP_P (insn)
22964            && GET_CODE (PATTERN (insn)) != ADDR_VEC
22965            && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
22966           || CALL_P (insn))
22967         njumps++;
22968       else
22969         continue;
22970
22971       while (njumps > 3)
22972         {
22973           start = NEXT_INSN (start);
22974           if ((JUMP_P (start)
22975                && GET_CODE (PATTERN (start)) != ADDR_VEC
22976                && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
22977               || CALL_P (start))
22978             njumps--, isjump = 1;
22979           else
22980             isjump = 0;
22981           nbytes -= min_insn_size (start);
22982         }
22983       gcc_assert (njumps >= 0);
22984       if (dump_file)
22985         fprintf (dump_file, "Interval %i to %i has %i bytes\n",
22986                 INSN_UID (start), INSN_UID (insn), nbytes);
22987
22988       if (njumps == 3 && isjump && nbytes < 16)
22989         {
22990           int padsize = 15 - nbytes + min_insn_size (insn);
22991
22992           if (dump_file)
22993             fprintf (dump_file, "Padding insn %i by %i bytes!\n",
22994                      INSN_UID (insn), padsize);
22995           emit_insn_before (gen_align (GEN_INT (padsize)), insn);
22996         }
22997     }
22998 }
22999
23000 /* AMD Athlon works faster
23001    when RET is not destination of conditional jump or directly preceded
23002    by other jump instruction.  We avoid the penalty by inserting NOP just
23003    before the RET instructions in such cases.  */
23004 static void
23005 ix86_pad_returns (void)
23006 {
23007   edge e;
23008   edge_iterator ei;
23009
23010   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
23011     {
23012       basic_block bb = e->src;
23013       rtx ret = BB_END (bb);
23014       rtx prev;
23015       bool replace = false;
23016
23017       if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
23018           || !maybe_hot_bb_p (bb))
23019         continue;
23020       for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
23021         if (active_insn_p (prev) || LABEL_P (prev))
23022           break;
23023       if (prev && LABEL_P (prev))
23024         {
23025           edge e;
23026           edge_iterator ei;
23027
23028           FOR_EACH_EDGE (e, ei, bb->preds)
23029             if (EDGE_FREQUENCY (e) && e->src->index >= 0
23030                 && !(e->flags & EDGE_FALLTHRU))
23031               replace = true;
23032         }
23033       if (!replace)
23034         {
23035           prev = prev_active_insn (ret);
23036           if (prev
23037               && ((JUMP_P (prev) && any_condjump_p (prev))
23038                   || CALL_P (prev)))
23039             replace = true;
23040           /* Empty functions get branch mispredict even when the jump destination
23041              is not visible to us.  */
23042           if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
23043             replace = true;
23044         }
23045       if (replace)
23046         {
23047           emit_insn_before (gen_return_internal_long (), ret);
23048           delete_insn (ret);
23049         }
23050     }
23051 }
23052
23053 /* Implement machine specific optimizations.  We implement padding of returns
23054    for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
23055 static void
23056 ix86_reorg (void)
23057 {
23058   if (TARGET_PAD_RETURNS && optimize && !optimize_size)
23059     ix86_pad_returns ();
23060   if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
23061     ix86_avoid_jump_misspredicts ();
23062 }
23063
23064 /* Return nonzero when QImode register that must be represented via REX prefix
23065    is used.  */
23066 bool
23067 x86_extended_QIreg_mentioned_p (rtx insn)
23068 {
23069   int i;
23070   extract_insn_cached (insn);
23071   for (i = 0; i < recog_data.n_operands; i++)
23072     if (REG_P (recog_data.operand[i])
23073         && REGNO (recog_data.operand[i]) >= 4)
23074        return true;
23075   return false;
23076 }
23077
23078 /* Return nonzero when P points to register encoded via REX prefix.
23079    Called via for_each_rtx.  */
23080 static int
23081 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
23082 {
23083    unsigned int regno;
23084    if (!REG_P (*p))
23085      return 0;
23086    regno = REGNO (*p);
23087    return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
23088 }
23089
23090 /* Return true when INSN mentions register that must be encoded using REX
23091    prefix.  */
23092 bool
23093 x86_extended_reg_mentioned_p (rtx insn)
23094 {
23095   return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
23096 }
23097
23098 /* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
23099    optabs would emit if we didn't have TFmode patterns.  */
23100
23101 void
23102 x86_emit_floatuns (rtx operands[2])
23103 {
23104   rtx neglab, donelab, i0, i1, f0, in, out;
23105   enum machine_mode mode, inmode;
23106
23107   inmode = GET_MODE (operands[1]);
23108   gcc_assert (inmode == SImode || inmode == DImode);
23109
23110   out = operands[0];
23111   in = force_reg (inmode, operands[1]);
23112   mode = GET_MODE (out);
23113   neglab = gen_label_rtx ();
23114   donelab = gen_label_rtx ();
23115   f0 = gen_reg_rtx (mode);
23116
23117   emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23118
23119   expand_float (out, in, 0);
23120
23121   emit_jump_insn (gen_jump (donelab));
23122   emit_barrier ();
23123
23124   emit_label (neglab);
23125
23126   i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23127                             1, OPTAB_DIRECT);
23128   i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23129                             1, OPTAB_DIRECT);
23130   i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23131
23132   expand_float (f0, i0, 0);
23133
23134   emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
23135
23136   emit_label (donelab);
23137 }
23138 \f
23139 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
23140    with all elements equal to VAR.  Return true if successful.  */
23141
23142 static bool
23143 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
23144                                    rtx target, rtx val)
23145 {
23146   enum machine_mode smode, wsmode, wvmode;
23147   rtx x;
23148
23149   switch (mode)
23150     {
23151     case V2SImode:
23152     case V2SFmode:
23153       if (!mmx_ok)
23154         return false;
23155       /* FALLTHRU */
23156
23157     case V2DFmode:
23158     case V2DImode:
23159     case V4SFmode:
23160     case V4SImode:
23161       val = force_reg (GET_MODE_INNER (mode), val);
23162       x = gen_rtx_VEC_DUPLICATE (mode, val);
23163       emit_insn (gen_rtx_SET (VOIDmode, target, x));
23164       return true;
23165
23166     case V4HImode:
23167       if (!mmx_ok)
23168         return false;
23169       if (TARGET_SSE || TARGET_3DNOW_A)
23170         {
23171           val = gen_lowpart (SImode, val);
23172           x = gen_rtx_TRUNCATE (HImode, val);
23173           x = gen_rtx_VEC_DUPLICATE (mode, x);
23174           emit_insn (gen_rtx_SET (VOIDmode, target, x));
23175           return true;
23176         }
23177       else
23178         {
23179           smode = HImode;
23180           wsmode = SImode;
23181           wvmode = V2SImode;
23182           goto widen;
23183         }
23184
23185     case V8QImode:
23186       if (!mmx_ok)
23187         return false;
23188       smode = QImode;
23189       wsmode = HImode;
23190       wvmode = V4HImode;
23191       goto widen;
23192     case V8HImode:
23193       if (TARGET_SSE2)
23194         {
23195           rtx tmp1, tmp2;
23196           /* Extend HImode to SImode using a paradoxical SUBREG.  */
23197           tmp1 = gen_reg_rtx (SImode);
23198           emit_move_insn (tmp1, gen_lowpart (SImode, val));
23199           /* Insert the SImode value as low element of V4SImode vector. */
23200           tmp2 = gen_reg_rtx (V4SImode);
23201           tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23202                                     gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23203                                     CONST0_RTX (V4SImode),
23204                                     const1_rtx);
23205           emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23206           /* Cast the V4SImode vector back to a V8HImode vector.  */
23207           tmp1 = gen_reg_rtx (V8HImode);
23208           emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
23209           /* Duplicate the low short through the whole low SImode word.  */
23210           emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
23211           /* Cast the V8HImode vector back to a V4SImode vector.  */
23212           tmp2 = gen_reg_rtx (V4SImode);
23213           emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23214           /* Replicate the low element of the V4SImode vector.  */
23215           emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23216           /* Cast the V2SImode back to V8HImode, and store in target.  */
23217           emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
23218           return true;
23219         }
23220       smode = HImode;
23221       wsmode = SImode;
23222       wvmode = V4SImode;
23223       goto widen;
23224     case V16QImode:
23225       if (TARGET_SSE2)
23226         {
23227           rtx tmp1, tmp2;
23228           /* Extend QImode to SImode using a paradoxical SUBREG.  */
23229           tmp1 = gen_reg_rtx (SImode);
23230           emit_move_insn (tmp1, gen_lowpart (SImode, val));
23231           /* Insert the SImode value as low element of V4SImode vector. */
23232           tmp2 = gen_reg_rtx (V4SImode);
23233           tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23234                                     gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23235                                     CONST0_RTX (V4SImode),
23236                                     const1_rtx);
23237           emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23238           /* Cast the V4SImode vector back to a V16QImode vector.  */
23239           tmp1 = gen_reg_rtx (V16QImode);
23240           emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
23241           /* Duplicate the low byte through the whole low SImode word.  */
23242           emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23243           emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23244           /* Cast the V16QImode vector back to a V4SImode vector.  */
23245           tmp2 = gen_reg_rtx (V4SImode);
23246           emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23247           /* Replicate the low element of the V4SImode vector.  */
23248           emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23249           /* Cast the V2SImode back to V16QImode, and store in target.  */
23250           emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
23251           return true;
23252         }
23253       smode = QImode;
23254       wsmode = HImode;
23255       wvmode = V8HImode;
23256       goto widen;
23257     widen:
23258       /* Replicate the value once into the next wider mode and recurse.  */
23259       val = convert_modes (wsmode, smode, val, true);
23260       x = expand_simple_binop (wsmode, ASHIFT, val,
23261                                GEN_INT (GET_MODE_BITSIZE (smode)),
23262                                NULL_RTX, 1, OPTAB_LIB_WIDEN);
23263       val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
23264
23265       x = gen_reg_rtx (wvmode);
23266       if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
23267         gcc_unreachable ();
23268       emit_move_insn (target, gen_lowpart (mode, x));
23269       return true;
23270
23271     default:
23272       return false;
23273     }
23274 }
23275
23276 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
23277    whose ONE_VAR element is VAR, and other elements are zero.  Return true
23278    if successful.  */
23279
23280 static bool
23281 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
23282                                      rtx target, rtx var, int one_var)
23283 {
23284   enum machine_mode vsimode;
23285   rtx new_target;
23286   rtx x, tmp;
23287
23288   switch (mode)
23289     {
23290     case V2SFmode:
23291     case V2SImode:
23292       if (!mmx_ok)
23293         return false;
23294       /* FALLTHRU */
23295
23296     case V2DFmode:
23297     case V2DImode:
23298       if (one_var != 0)
23299         return false;
23300       var = force_reg (GET_MODE_INNER (mode), var);
23301       x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
23302       emit_insn (gen_rtx_SET (VOIDmode, target, x));
23303       return true;
23304
23305     case V4SFmode:
23306     case V4SImode:
23307       if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
23308         new_target = gen_reg_rtx (mode);
23309       else
23310         new_target = target;
23311       var = force_reg (GET_MODE_INNER (mode), var);
23312       x = gen_rtx_VEC_DUPLICATE (mode, var);
23313       x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
23314       emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
23315       if (one_var != 0)
23316         {
23317           /* We need to shuffle the value to the correct position, so
23318              create a new pseudo to store the intermediate result.  */
23319
23320           /* With SSE2, we can use the integer shuffle insns.  */
23321           if (mode != V4SFmode && TARGET_SSE2)
23322             {
23323               emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
23324                                             GEN_INT (1),
23325                                             GEN_INT (one_var == 1 ? 0 : 1),
23326                                             GEN_INT (one_var == 2 ? 0 : 1),
23327                                             GEN_INT (one_var == 3 ? 0 : 1)));
23328               if (target != new_target)
23329                 emit_move_insn (target, new_target);
23330               return true;
23331             }
23332
23333           /* Otherwise convert the intermediate result to V4SFmode and
23334              use the SSE1 shuffle instructions.  */
23335           if (mode != V4SFmode)
23336             {
23337               tmp = gen_reg_rtx (V4SFmode);
23338               emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
23339             }
23340           else
23341             tmp = new_target;
23342
23343           emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
23344                                        GEN_INT (1),
23345                                        GEN_INT (one_var == 1 ? 0 : 1),
23346                                        GEN_INT (one_var == 2 ? 0+4 : 1+4),
23347                                        GEN_INT (one_var == 3 ? 0+4 : 1+4)));
23348
23349           if (mode != V4SFmode)
23350             emit_move_insn (target, gen_lowpart (V4SImode, tmp));
23351           else if (tmp != target)
23352             emit_move_insn (target, tmp);
23353         }
23354       else if (target != new_target)
23355         emit_move_insn (target, new_target);
23356       return true;
23357
23358     case V8HImode:
23359     case V16QImode:
23360       vsimode = V4SImode;
23361       goto widen;
23362     case V4HImode:
23363     case V8QImode:
23364       if (!mmx_ok)
23365         return false;
23366       vsimode = V2SImode;
23367       goto widen;
23368     widen:
23369       if (one_var != 0)
23370         return false;
23371
23372       /* Zero extend the variable element to SImode and recurse.  */
23373       var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
23374
23375       x = gen_reg_rtx (vsimode);
23376       if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
23377                                                 var, one_var))
23378         gcc_unreachable ();
23379
23380       emit_move_insn (target, gen_lowpart (mode, x));
23381       return true;
23382
23383     default:
23384       return false;
23385     }
23386 }
23387
23388 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
23389    consisting of the values in VALS.  It is known that all elements
23390    except ONE_VAR are constants.  Return true if successful.  */
23391
23392 static bool
23393 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
23394                                  rtx target, rtx vals, int one_var)
23395 {
23396   rtx var = XVECEXP (vals, 0, one_var);
23397   enum machine_mode wmode;
23398   rtx const_vec, x;
23399
23400   const_vec = copy_rtx (vals);
23401   XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
23402   const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
23403
23404   switch (mode)
23405     {
23406     case V2DFmode:
23407     case V2DImode:
23408     case V2SFmode:
23409     case V2SImode:
23410       /* For the two element vectors, it's just as easy to use
23411          the general case.  */
23412       return false;
23413
23414     case V4SFmode:
23415     case V4SImode:
23416     case V8HImode:
23417     case V4HImode:
23418       break;
23419
23420     case V16QImode:
23421       wmode = V8HImode;
23422       goto widen;
23423     case V8QImode:
23424       wmode = V4HImode;
23425       goto widen;
23426     widen:
23427       /* There's no way to set one QImode entry easily.  Combine
23428          the variable value with its adjacent constant value, and
23429          promote to an HImode set.  */
23430       x = XVECEXP (vals, 0, one_var ^ 1);
23431       if (one_var & 1)
23432         {
23433           var = convert_modes (HImode, QImode, var, true);
23434           var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
23435                                      NULL_RTX, 1, OPTAB_LIB_WIDEN);
23436           x = GEN_INT (INTVAL (x) & 0xff);
23437         }
23438       else
23439         {
23440           var = convert_modes (HImode, QImode, var, true);
23441           x = gen_int_mode (INTVAL (x) << 8, HImode);
23442         }
23443       if (x != const0_rtx)
23444         var = expand_simple_binop (HImode, IOR, var, x, var,
23445                                    1, OPTAB_LIB_WIDEN);
23446
23447       x = gen_reg_rtx (wmode);
23448       emit_move_insn (x, gen_lowpart (wmode, const_vec));
23449       ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
23450
23451       emit_move_insn (target, gen_lowpart (mode, x));
23452       return true;
23453
23454     default:
23455       return false;
23456     }
23457
23458   emit_move_insn (target, const_vec);
23459   ix86_expand_vector_set (mmx_ok, target, var, one_var);
23460   return true;
23461 }
23462
23463 /* A subroutine of ix86_expand_vector_init.  Handle the most general case:
23464    all values variable, and none identical.  */
23465
23466 static void
23467 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
23468                                  rtx target, rtx vals)
23469 {
23470   enum machine_mode half_mode = GET_MODE_INNER (mode);
23471   rtx op0 = NULL, op1 = NULL;
23472   bool use_vec_concat = false;
23473
23474   switch (mode)
23475     {
23476     case V2SFmode:
23477     case V2SImode:
23478       if (!mmx_ok && !TARGET_SSE)
23479         break;
23480       /* FALLTHRU */
23481
23482     case V2DFmode:
23483     case V2DImode:
23484       /* For the two element vectors, we always implement VEC_CONCAT.  */
23485       op0 = XVECEXP (vals, 0, 0);
23486       op1 = XVECEXP (vals, 0, 1);
23487       use_vec_concat = true;
23488       break;
23489
23490     case V4SFmode:
23491       half_mode = V2SFmode;
23492       goto half;
23493     case V4SImode:
23494       half_mode = V2SImode;
23495       goto half;
23496     half:
23497       {
23498         rtvec v;
23499
23500         /* For V4SF and V4SI, we implement a concat of two V2 vectors.
23501            Recurse to load the two halves.  */
23502
23503         op0 = gen_reg_rtx (half_mode);
23504         v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
23505         ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
23506
23507         op1 = gen_reg_rtx (half_mode);
23508         v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
23509         ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
23510
23511         use_vec_concat = true;
23512       }
23513       break;
23514
23515     case V8HImode:
23516     case V16QImode:
23517     case V4HImode:
23518     case V8QImode:
23519       break;
23520
23521     default:
23522       gcc_unreachable ();
23523     }
23524
23525   if (use_vec_concat)
23526     {
23527       if (!register_operand (op0, half_mode))
23528         op0 = force_reg (half_mode, op0);
23529       if (!register_operand (op1, half_mode))
23530         op1 = force_reg (half_mode, op1);
23531
23532       emit_insn (gen_rtx_SET (VOIDmode, target,
23533                               gen_rtx_VEC_CONCAT (mode, op0, op1)));
23534     }
23535   else
23536     {
23537       int i, j, n_elts, n_words, n_elt_per_word;
23538       enum machine_mode inner_mode;
23539       rtx words[4], shift;
23540
23541       inner_mode = GET_MODE_INNER (mode);
23542       n_elts = GET_MODE_NUNITS (mode);
23543       n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
23544       n_elt_per_word = n_elts / n_words;
23545       shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
23546
23547       for (i = 0; i < n_words; ++i)
23548         {
23549           rtx word = NULL_RTX;
23550
23551           for (j = 0; j < n_elt_per_word; ++j)
23552             {
23553               rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
23554               elt = convert_modes (word_mode, inner_mode, elt, true);
23555
23556               if (j == 0)
23557                 word = elt;
23558               else
23559                 {
23560                   word = expand_simple_binop (word_mode, ASHIFT, word, shift,
23561                                               word, 1, OPTAB_LIB_WIDEN);
23562                   word = expand_simple_binop (word_mode, IOR, word, elt,
23563                                               word, 1, OPTAB_LIB_WIDEN);
23564                 }
23565             }
23566
23567           words[i] = word;
23568         }
23569
23570       if (n_words == 1)
23571         emit_move_insn (target, gen_lowpart (mode, words[0]));
23572       else if (n_words == 2)
23573         {
23574           rtx tmp = gen_reg_rtx (mode);
23575           emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
23576           emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
23577           emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
23578           emit_move_insn (target, tmp);
23579         }
23580       else if (n_words == 4)
23581         {
23582           rtx tmp = gen_reg_rtx (V4SImode);
23583           vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
23584           ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
23585           emit_move_insn (target, gen_lowpart (mode, tmp));
23586         }
23587       else
23588         gcc_unreachable ();
23589     }
23590 }
23591
23592 /* Initialize vector TARGET via VALS.  Suppress the use of MMX
23593    instructions unless MMX_OK is true.  */
23594
23595 void
23596 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
23597 {
23598   enum machine_mode mode = GET_MODE (target);
23599   enum machine_mode inner_mode = GET_MODE_INNER (mode);
23600   int n_elts = GET_MODE_NUNITS (mode);
23601   int n_var = 0, one_var = -1;
23602   bool all_same = true, all_const_zero = true;
23603   int i;
23604   rtx x;
23605
23606   for (i = 0; i < n_elts; ++i)
23607     {
23608       x = XVECEXP (vals, 0, i);
23609       if (!(CONST_INT_P (x)
23610             || GET_CODE (x) == CONST_DOUBLE
23611             || GET_CODE (x) == CONST_FIXED))
23612         n_var++, one_var = i;
23613       else if (x != CONST0_RTX (inner_mode))
23614         all_const_zero = false;
23615       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
23616         all_same = false;
23617     }
23618
23619   /* Constants are best loaded from the constant pool.  */
23620   if (n_var == 0)
23621     {
23622       emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
23623       return;
23624     }
23625
23626   /* If all values are identical, broadcast the value.  */
23627   if (all_same
23628       && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
23629                                             XVECEXP (vals, 0, 0)))
23630     return;
23631
23632   /* Values where only one field is non-constant are best loaded from
23633      the pool and overwritten via move later.  */
23634   if (n_var == 1)
23635     {
23636       if (all_const_zero
23637           && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
23638                                                   XVECEXP (vals, 0, one_var),
23639                                                   one_var))
23640         return;
23641
23642       if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
23643         return;
23644     }
23645
23646   ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
23647 }
23648
23649 void
23650 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
23651 {
23652   enum machine_mode mode = GET_MODE (target);
23653   enum machine_mode inner_mode = GET_MODE_INNER (mode);
23654   bool use_vec_merge = false;
23655   rtx tmp;
23656
23657   switch (mode)
23658     {
23659     case V2SFmode:
23660     case V2SImode:
23661       if (mmx_ok)
23662         {
23663           tmp = gen_reg_rtx (GET_MODE_INNER (mode));
23664           ix86_expand_vector_extract (true, tmp, target, 1 - elt);
23665           if (elt == 0)
23666             tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
23667           else
23668             tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
23669           emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23670           return;
23671         }
23672       break;
23673
23674     case V2DImode:
23675       use_vec_merge = TARGET_SSE4_1;
23676       if (use_vec_merge)
23677         break;
23678
23679     case V2DFmode:
23680       {
23681         rtx op0, op1;
23682
23683         /* For the two element vectors, we implement a VEC_CONCAT with
23684            the extraction of the other element.  */
23685
23686         tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
23687         tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
23688
23689         if (elt == 0)
23690           op0 = val, op1 = tmp;
23691         else
23692           op0 = tmp, op1 = val;
23693
23694         tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
23695         emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23696       }
23697       return;
23698
23699     case V4SFmode:
23700       use_vec_merge = TARGET_SSE4_1;
23701       if (use_vec_merge)
23702         break;
23703
23704       switch (elt)
23705         {
23706         case 0:
23707           use_vec_merge = true;
23708           break;
23709
23710         case 1:
23711           /* tmp = target = A B C D */
23712           tmp = copy_to_reg (target);
23713           /* target = A A B B */
23714           emit_insn (gen_sse_unpcklps (target, target, target));
23715           /* target = X A B B */
23716           ix86_expand_vector_set (false, target, val, 0);
23717           /* target = A X C D  */
23718           emit_insn (gen_sse_shufps_1 (target, target, tmp,
23719                                        GEN_INT (1), GEN_INT (0),
23720                                        GEN_INT (2+4), GEN_INT (3+4)));
23721           return;
23722
23723         case 2:
23724           /* tmp = target = A B C D */
23725           tmp = copy_to_reg (target);
23726           /* tmp = X B C D */
23727           ix86_expand_vector_set (false, tmp, val, 0);
23728           /* target = A B X D */
23729           emit_insn (gen_sse_shufps_1 (target, target, tmp,
23730                                        GEN_INT (0), GEN_INT (1),
23731                                        GEN_INT (0+4), GEN_INT (3+4)));
23732           return;
23733
23734         case 3:
23735           /* tmp = target = A B C D */
23736           tmp = copy_to_reg (target);
23737           /* tmp = X B C D */
23738           ix86_expand_vector_set (false, tmp, val, 0);
23739           /* target = A B X D */
23740           emit_insn (gen_sse_shufps_1 (target, target, tmp,
23741                                        GEN_INT (0), GEN_INT (1),
23742                                        GEN_INT (2+4), GEN_INT (0+4)));
23743           return;
23744
23745         default:
23746           gcc_unreachable ();
23747         }
23748       break;
23749
23750     case V4SImode:
23751       use_vec_merge = TARGET_SSE4_1;
23752       if (use_vec_merge)
23753         break;
23754
23755       /* Element 0 handled by vec_merge below.  */
23756       if (elt == 0)
23757         {
23758           use_vec_merge = true;
23759           break;
23760         }
23761
23762       if (TARGET_SSE2)
23763         {
23764           /* With SSE2, use integer shuffles to swap element 0 and ELT,
23765              store into element 0, then shuffle them back.  */
23766
23767           rtx order[4];
23768
23769           order[0] = GEN_INT (elt);
23770           order[1] = const1_rtx;
23771           order[2] = const2_rtx;
23772           order[3] = GEN_INT (3);
23773           order[elt] = const0_rtx;
23774
23775           emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
23776                                         order[1], order[2], order[3]));
23777
23778           ix86_expand_vector_set (false, target, val, 0);
23779
23780           emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
23781                                         order[1], order[2], order[3]));
23782         }
23783       else
23784         {
23785           /* For SSE1, we have to reuse the V4SF code.  */
23786           ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
23787                                   gen_lowpart (SFmode, val), elt);
23788         }
23789       return;
23790
23791     case V8HImode:
23792       use_vec_merge = TARGET_SSE2;
23793       break;
23794     case V4HImode:
23795       use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
23796       break;
23797
23798     case V16QImode:
23799       use_vec_merge = TARGET_SSE4_1;
23800       break;
23801
23802     case V8QImode:
23803     default:
23804       break;
23805     }
23806
23807   if (use_vec_merge)
23808     {
23809       tmp = gen_rtx_VEC_DUPLICATE (mode, val);
23810       tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
23811       emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23812     }
23813   else
23814     {
23815       rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
23816
23817       emit_move_insn (mem, target);
23818
23819       tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
23820       emit_move_insn (tmp, val);
23821
23822       emit_move_insn (target, mem);
23823     }
23824 }
23825
23826 void
23827 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
23828 {
23829   enum machine_mode mode = GET_MODE (vec);
23830   enum machine_mode inner_mode = GET_MODE_INNER (mode);
23831   bool use_vec_extr = false;
23832   rtx tmp;
23833
23834   switch (mode)
23835     {
23836     case V2SImode:
23837     case V2SFmode:
23838       if (!mmx_ok)
23839         break;
23840       /* FALLTHRU */
23841
23842     case V2DFmode:
23843     case V2DImode:
23844       use_vec_extr = true;
23845       break;
23846
23847     case V4SFmode:
23848       use_vec_extr = TARGET_SSE4_1;
23849       if (use_vec_extr)
23850         break;
23851
23852       switch (elt)
23853         {
23854         case 0:
23855           tmp = vec;
23856           break;
23857
23858         case 1:
23859         case 3:
23860           tmp = gen_reg_rtx (mode);
23861           emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
23862                                        GEN_INT (elt), GEN_INT (elt),
23863                                        GEN_INT (elt+4), GEN_INT (elt+4)));
23864           break;
23865
23866         case 2:
23867           tmp = gen_reg_rtx (mode);
23868           emit_insn (gen_sse_unpckhps (tmp, vec, vec));
23869           break;
23870
23871         default:
23872           gcc_unreachable ();
23873         }
23874       vec = tmp;
23875       use_vec_extr = true;
23876       elt = 0;
23877       break;
23878
23879     case V4SImode:
23880       use_vec_extr = TARGET_SSE4_1;
23881       if (use_vec_extr)
23882         break;
23883
23884       if (TARGET_SSE2)
23885         {
23886           switch (elt)
23887             {
23888             case 0:
23889               tmp = vec;
23890               break;
23891
23892             case 1:
23893             case 3:
23894               tmp = gen_reg_rtx (mode);
23895               emit_insn (gen_sse2_pshufd_1 (tmp, vec,
23896                                             GEN_INT (elt), GEN_INT (elt),
23897                                             GEN_INT (elt), GEN_INT (elt)));
23898               break;
23899
23900             case 2:
23901               tmp = gen_reg_rtx (mode);
23902               emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
23903               break;
23904
23905             default:
23906               gcc_unreachable ();
23907             }
23908           vec = tmp;
23909           use_vec_extr = true;
23910           elt = 0;
23911         }
23912       else
23913         {
23914           /* For SSE1, we have to reuse the V4SF code.  */
23915           ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
23916                                       gen_lowpart (V4SFmode, vec), elt);
23917           return;
23918         }
23919       break;
23920
23921     case V8HImode:
23922       use_vec_extr = TARGET_SSE2;
23923       break;
23924     case V4HImode:
23925       use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
23926       break;
23927
23928     case V16QImode:
23929       use_vec_extr = TARGET_SSE4_1;
23930       break;
23931
23932     case V8QImode:
23933       /* ??? Could extract the appropriate HImode element and shift.  */
23934     default:
23935       break;
23936     }
23937
23938   if (use_vec_extr)
23939     {
23940       tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
23941       tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
23942
23943       /* Let the rtl optimizers know about the zero extension performed.  */
23944       if (inner_mode == QImode || inner_mode == HImode)
23945         {
23946           tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
23947           target = gen_lowpart (SImode, target);
23948         }
23949
23950       emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23951     }
23952   else
23953     {
23954       rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
23955
23956       emit_move_insn (mem, vec);
23957
23958       tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
23959       emit_move_insn (target, tmp);
23960     }
23961 }
23962
23963 /* Expand a vector reduction on V4SFmode for SSE1.  FN is the binary
23964    pattern to reduce; DEST is the destination; IN is the input vector.  */
23965
23966 void
23967 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
23968 {
23969   rtx tmp1, tmp2, tmp3;
23970
23971   tmp1 = gen_reg_rtx (V4SFmode);
23972   tmp2 = gen_reg_rtx (V4SFmode);
23973   tmp3 = gen_reg_rtx (V4SFmode);
23974
23975   emit_insn (gen_sse_movhlps (tmp1, in, in));
23976   emit_insn (fn (tmp2, tmp1, in));
23977
23978   emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
23979                                GEN_INT (1), GEN_INT (1),
23980                                GEN_INT (1+4), GEN_INT (1+4)));
23981   emit_insn (fn (dest, tmp2, tmp3));
23982 }
23983 \f
23984 /* Target hook for scalar_mode_supported_p.  */
23985 static bool
23986 ix86_scalar_mode_supported_p (enum machine_mode mode)
23987 {
23988   if (DECIMAL_FLOAT_MODE_P (mode))
23989     return true;
23990   else if (mode == TFmode)
23991     return TARGET_64BIT;
23992   else
23993     return default_scalar_mode_supported_p (mode);
23994 }
23995
23996 /* Implements target hook vector_mode_supported_p.  */
23997 static bool
23998 ix86_vector_mode_supported_p (enum machine_mode mode)
23999 {
24000   if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
24001     return true;
24002   if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
24003     return true;
24004   if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
24005     return true;
24006   if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
24007     return true;
24008   return false;
24009 }
24010
24011 /* Target hook for c_mode_for_suffix.  */
24012 static enum machine_mode
24013 ix86_c_mode_for_suffix (char suffix)
24014 {
24015   if (TARGET_64BIT && suffix == 'q')
24016     return TFmode;
24017   if (TARGET_MMX && suffix == 'w')
24018     return XFmode;
24019
24020   return VOIDmode;
24021 }
24022
24023 /* Worker function for TARGET_MD_ASM_CLOBBERS.
24024
24025    We do this in the new i386 backend to maintain source compatibility
24026    with the old cc0-based compiler.  */
24027
24028 static tree
24029 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
24030                       tree inputs ATTRIBUTE_UNUSED,
24031                       tree clobbers)
24032 {
24033   clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
24034                         clobbers);
24035   clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
24036                         clobbers);
24037   return clobbers;
24038 }
24039
24040 /* Implements target vector targetm.asm.encode_section_info.  This
24041    is not used by netware.  */
24042
24043 static void ATTRIBUTE_UNUSED
24044 ix86_encode_section_info (tree decl, rtx rtl, int first)
24045 {
24046   default_encode_section_info (decl, rtl, first);
24047
24048   if (TREE_CODE (decl) == VAR_DECL
24049       && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
24050       && ix86_in_large_data_p (decl))
24051     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
24052 }
24053
24054 /* Worker function for REVERSE_CONDITION.  */
24055
24056 enum rtx_code
24057 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
24058 {
24059   return (mode != CCFPmode && mode != CCFPUmode
24060           ? reverse_condition (code)
24061           : reverse_condition_maybe_unordered (code));
24062 }
24063
24064 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24065    to OPERANDS[0].  */
24066
24067 const char *
24068 output_387_reg_move (rtx insn, rtx *operands)
24069 {
24070   if (REG_P (operands[0]))
24071     {
24072       if (REG_P (operands[1])
24073           && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24074         {
24075           if (REGNO (operands[0]) == FIRST_STACK_REG)
24076             return output_387_ffreep (operands, 0);
24077           return "fstp\t%y0";
24078         }
24079       if (STACK_TOP_P (operands[0]))
24080         return "fld%z1\t%y1";
24081       return "fst\t%y0";
24082     }
24083   else if (MEM_P (operands[0]))
24084     {
24085       gcc_assert (REG_P (operands[1]));
24086       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24087         return "fstp%z0\t%y0";
24088       else
24089         {
24090           /* There is no non-popping store to memory for XFmode.
24091              So if we need one, follow the store with a load.  */
24092           if (GET_MODE (operands[0]) == XFmode)
24093             return "fstp%z0\t%y0\n\tfld%z0\t%y0";
24094           else
24095             return "fst%z0\t%y0";
24096         }
24097     }
24098   else
24099     gcc_unreachable();
24100 }
24101
24102 /* Output code to perform a conditional jump to LABEL, if C2 flag in
24103    FP status register is set.  */
24104
24105 void
24106 ix86_emit_fp_unordered_jump (rtx label)
24107 {
24108   rtx reg = gen_reg_rtx (HImode);
24109   rtx temp;
24110
24111   emit_insn (gen_x86_fnstsw_1 (reg));
24112
24113   if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
24114     {
24115       emit_insn (gen_x86_sahf_1 (reg));
24116
24117       temp = gen_rtx_REG (CCmode, FLAGS_REG);
24118       temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
24119     }
24120   else
24121     {
24122       emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
24123
24124       temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24125       temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
24126     }
24127
24128   temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
24129                               gen_rtx_LABEL_REF (VOIDmode, label),
24130                               pc_rtx);
24131   temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
24132
24133   emit_jump_insn (temp);
24134   predict_jump (REG_BR_PROB_BASE * 10 / 100);
24135 }
24136
24137 /* Output code to perform a log1p XFmode calculation.  */
24138
24139 void ix86_emit_i387_log1p (rtx op0, rtx op1)
24140 {
24141   rtx label1 = gen_label_rtx ();
24142   rtx label2 = gen_label_rtx ();
24143
24144   rtx tmp = gen_reg_rtx (XFmode);
24145   rtx tmp2 = gen_reg_rtx (XFmode);
24146
24147   emit_insn (gen_absxf2 (tmp, op1));
24148   emit_insn (gen_cmpxf (tmp,
24149     CONST_DOUBLE_FROM_REAL_VALUE (
24150        REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
24151        XFmode)));
24152   emit_jump_insn (gen_bge (label1));
24153
24154   emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24155   emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
24156   emit_jump (label2);
24157
24158   emit_label (label1);
24159   emit_move_insn (tmp, CONST1_RTX (XFmode));
24160   emit_insn (gen_addxf3 (tmp, op1, tmp));
24161   emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24162   emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
24163
24164   emit_label (label2);
24165 }
24166
24167 /* Output code to perform a Newton-Rhapson approximation of a single precision
24168    floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm].  */
24169
24170 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
24171 {
24172   rtx x0, x1, e0, e1, two;
24173
24174   x0 = gen_reg_rtx (mode);
24175   e0 = gen_reg_rtx (mode);
24176   e1 = gen_reg_rtx (mode);
24177   x1 = gen_reg_rtx (mode);
24178
24179   two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
24180
24181   if (VECTOR_MODE_P (mode))
24182     two = ix86_build_const_vector (SFmode, true, two);
24183
24184   two = force_reg (mode, two);
24185
24186   /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24187
24188   /* x0 = rcp(b) estimate */
24189   emit_insn (gen_rtx_SET (VOIDmode, x0,
24190                           gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
24191                                           UNSPEC_RCP)));
24192   /* e0 = x0 * b */
24193   emit_insn (gen_rtx_SET (VOIDmode, e0,
24194                           gen_rtx_MULT (mode, x0, b)));
24195   /* e1 = 2. - e0 */
24196   emit_insn (gen_rtx_SET (VOIDmode, e1,
24197                           gen_rtx_MINUS (mode, two, e0)));
24198   /* x1 = x0 * e1 */
24199   emit_insn (gen_rtx_SET (VOIDmode, x1,
24200                           gen_rtx_MULT (mode, x0, e1)));
24201   /* res = a * x1 */
24202   emit_insn (gen_rtx_SET (VOIDmode, res,
24203                           gen_rtx_MULT (mode, a, x1)));
24204 }
24205
24206 /* Output code to perform a Newton-Rhapson approximation of a
24207    single precision floating point [reciprocal] square root.  */
24208
24209 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
24210                          bool recip)
24211 {
24212   rtx x0, e0, e1, e2, e3, mthree, mhalf;
24213   REAL_VALUE_TYPE r;
24214
24215   x0 = gen_reg_rtx (mode);
24216   e0 = gen_reg_rtx (mode);
24217   e1 = gen_reg_rtx (mode);
24218   e2 = gen_reg_rtx (mode);
24219   e3 = gen_reg_rtx (mode);
24220
24221   real_arithmetic (&r, NEGATE_EXPR, &dconst3, NULL);
24222   mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24223
24224   real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
24225   mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24226
24227   if (VECTOR_MODE_P (mode))
24228     {
24229       mthree = ix86_build_const_vector (SFmode, true, mthree);
24230       mhalf = ix86_build_const_vector (SFmode, true, mhalf);
24231     }
24232
24233   /* sqrt(a)  = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
24234      rsqrt(a) = -0.5     * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
24235
24236   /* x0 = rsqrt(a) estimate */
24237   emit_insn (gen_rtx_SET (VOIDmode, x0,
24238                           gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
24239                                           UNSPEC_RSQRT)));
24240
24241   /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0).  */
24242   if (!recip)
24243     {
24244       rtx zero, mask;
24245
24246       zero = gen_reg_rtx (mode);
24247       mask = gen_reg_rtx (mode);
24248
24249       zero = force_reg (mode, CONST0_RTX(mode));
24250       emit_insn (gen_rtx_SET (VOIDmode, mask,
24251                               gen_rtx_NE (mode, zero, a)));
24252
24253       emit_insn (gen_rtx_SET (VOIDmode, x0,
24254                               gen_rtx_AND (mode, x0, mask)));
24255     }
24256
24257   /* e0 = x0 * a */
24258   emit_insn (gen_rtx_SET (VOIDmode, e0,
24259                           gen_rtx_MULT (mode, x0, a)));
24260   /* e1 = e0 * x0 */
24261   emit_insn (gen_rtx_SET (VOIDmode, e1,
24262                           gen_rtx_MULT (mode, e0, x0)));
24263
24264   /* e2 = e1 - 3. */
24265   mthree = force_reg (mode, mthree);
24266   emit_insn (gen_rtx_SET (VOIDmode, e2,
24267                           gen_rtx_PLUS (mode, e1, mthree)));
24268
24269   mhalf = force_reg (mode, mhalf);
24270   if (recip)
24271     /* e3 = -.5 * x0 */
24272     emit_insn (gen_rtx_SET (VOIDmode, e3,
24273                             gen_rtx_MULT (mode, x0, mhalf)));
24274   else
24275     /* e3 = -.5 * e0 */
24276     emit_insn (gen_rtx_SET (VOIDmode, e3,
24277                             gen_rtx_MULT (mode, e0, mhalf)));
24278   /* ret = e2 * e3 */
24279   emit_insn (gen_rtx_SET (VOIDmode, res,
24280                           gen_rtx_MULT (mode, e2, e3)));
24281 }
24282
24283 /* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
24284
24285 static void ATTRIBUTE_UNUSED
24286 i386_solaris_elf_named_section (const char *name, unsigned int flags,
24287                                 tree decl)
24288 {
24289   /* With Binutils 2.15, the "@unwind" marker must be specified on
24290      every occurrence of the ".eh_frame" section, not just the first
24291      one.  */
24292   if (TARGET_64BIT
24293       && strcmp (name, ".eh_frame") == 0)
24294     {
24295       fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
24296                flags & SECTION_WRITE ? "aw" : "a");
24297       return;
24298     }
24299   default_elf_asm_named_section (name, flags, decl);
24300 }
24301
24302 /* Return the mangling of TYPE if it is an extended fundamental type.  */
24303
24304 static const char *
24305 ix86_mangle_type (const_tree type)
24306 {
24307   type = TYPE_MAIN_VARIANT (type);
24308
24309   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
24310       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
24311     return NULL;
24312
24313   switch (TYPE_MODE (type))
24314     {
24315     case TFmode:
24316       /* __float128 is "g".  */
24317       return "g";
24318     case XFmode:
24319       /* "long double" or __float80 is "e".  */
24320       return "e";
24321     default:
24322       return NULL;
24323     }
24324 }
24325
24326 /* For 32-bit code we can save PIC register setup by using
24327    __stack_chk_fail_local hidden function instead of calling
24328    __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
24329    register, so it is better to call __stack_chk_fail directly.  */
24330
24331 static tree
24332 ix86_stack_protect_fail (void)
24333 {
24334   return TARGET_64BIT
24335          ? default_external_stack_protect_fail ()
24336          : default_hidden_stack_protect_fail ();
24337 }
24338
24339 /* Select a format to encode pointers in exception handling data.  CODE
24340    is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
24341    true if the symbol may be affected by dynamic relocations.
24342
24343    ??? All x86 object file formats are capable of representing this.
24344    After all, the relocation needed is the same as for the call insn.
24345    Whether or not a particular assembler allows us to enter such, I
24346    guess we'll have to see.  */
24347 int
24348 asm_preferred_eh_data_format (int code, int global)
24349 {
24350   if (flag_pic)
24351     {
24352       int type = DW_EH_PE_sdata8;
24353       if (!TARGET_64BIT
24354           || ix86_cmodel == CM_SMALL_PIC
24355           || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
24356         type = DW_EH_PE_sdata4;
24357       return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
24358     }
24359   if (ix86_cmodel == CM_SMALL
24360       || (ix86_cmodel == CM_MEDIUM && code))
24361     return DW_EH_PE_udata4;
24362   return DW_EH_PE_absptr;
24363 }
24364 \f
24365 /* Expand copysign from SIGN to the positive value ABS_VALUE
24366    storing in RESULT.  If MASK is non-null, it shall be a mask to mask out
24367    the sign-bit.  */
24368 static void
24369 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
24370 {
24371   enum machine_mode mode = GET_MODE (sign);
24372   rtx sgn = gen_reg_rtx (mode);
24373   if (mask == NULL_RTX)
24374     {
24375       mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
24376       if (!VECTOR_MODE_P (mode))
24377         {
24378           /* We need to generate a scalar mode mask in this case.  */
24379           rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24380           tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24381           mask = gen_reg_rtx (mode);
24382           emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24383         }
24384     }
24385   else
24386     mask = gen_rtx_NOT (mode, mask);
24387   emit_insn (gen_rtx_SET (VOIDmode, sgn,
24388                           gen_rtx_AND (mode, mask, sign)));
24389   emit_insn (gen_rtx_SET (VOIDmode, result,
24390                           gen_rtx_IOR (mode, abs_value, sgn)));
24391 }
24392
24393 /* Expand fabs (OP0) and return a new rtx that holds the result.  The
24394    mask for masking out the sign-bit is stored in *SMASK, if that is
24395    non-null.  */
24396 static rtx
24397 ix86_expand_sse_fabs (rtx op0, rtx *smask)
24398 {
24399   enum machine_mode mode = GET_MODE (op0);
24400   rtx xa, mask;
24401
24402   xa = gen_reg_rtx (mode);
24403   mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
24404   if (!VECTOR_MODE_P (mode))
24405     {
24406       /* We need to generate a scalar mode mask in this case.  */
24407       rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24408       tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24409       mask = gen_reg_rtx (mode);
24410       emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24411     }
24412   emit_insn (gen_rtx_SET (VOIDmode, xa,
24413                           gen_rtx_AND (mode, op0, mask)));
24414
24415   if (smask)
24416     *smask = mask;
24417
24418   return xa;
24419 }
24420
24421 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
24422    swapping the operands if SWAP_OPERANDS is true.  The expanded
24423    code is a forward jump to a newly created label in case the
24424    comparison is true.  The generated label rtx is returned.  */
24425 static rtx
24426 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
24427                                   bool swap_operands)
24428 {
24429   rtx label, tmp;
24430
24431   if (swap_operands)
24432     {
24433       tmp = op0;
24434       op0 = op1;
24435       op1 = tmp;
24436     }
24437
24438   label = gen_label_rtx ();
24439   tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
24440   emit_insn (gen_rtx_SET (VOIDmode, tmp,
24441                           gen_rtx_COMPARE (CCFPUmode, op0, op1)));
24442   tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
24443   tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24444                               gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
24445   tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24446   JUMP_LABEL (tmp) = label;
24447
24448   return label;
24449 }
24450
24451 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
24452    using comparison code CODE.  Operands are swapped for the comparison if
24453    SWAP_OPERANDS is true.  Returns a rtx for the generated mask.  */
24454 static rtx
24455 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
24456                               bool swap_operands)
24457 {
24458   enum machine_mode mode = GET_MODE (op0);
24459   rtx mask = gen_reg_rtx (mode);
24460
24461   if (swap_operands)
24462     {
24463       rtx tmp = op0;
24464       op0 = op1;
24465       op1 = tmp;
24466     }
24467
24468   if (mode == DFmode)
24469     emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
24470                                     gen_rtx_fmt_ee (code, mode, op0, op1)));
24471   else
24472     emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
24473                                    gen_rtx_fmt_ee (code, mode, op0, op1)));
24474
24475   return mask;
24476 }
24477
24478 /* Generate and return a rtx of mode MODE for 2**n where n is the number
24479    of bits of the mantissa of MODE, which must be one of DFmode or SFmode.  */
24480 static rtx
24481 ix86_gen_TWO52 (enum machine_mode mode)
24482 {
24483   REAL_VALUE_TYPE TWO52r;
24484   rtx TWO52;
24485
24486   real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
24487   TWO52 = const_double_from_real_value (TWO52r, mode);
24488   TWO52 = force_reg (mode, TWO52);
24489
24490   return TWO52;
24491 }
24492
24493 /* Expand SSE sequence for computing lround from OP1 storing
24494    into OP0.  */
24495 void
24496 ix86_expand_lround (rtx op0, rtx op1)
24497 {
24498   /* C code for the stuff we're doing below:
24499        tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
24500        return (long)tmp;
24501    */
24502   enum machine_mode mode = GET_MODE (op1);
24503   const struct real_format *fmt;
24504   REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24505   rtx adj;
24506
24507   /* load nextafter (0.5, 0.0) */
24508   fmt = REAL_MODE_FORMAT (mode);
24509   real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
24510   REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24511
24512   /* adj = copysign (0.5, op1) */
24513   adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
24514   ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
24515
24516   /* adj = op1 + adj */
24517   adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
24518
24519   /* op0 = (imode)adj */
24520   expand_fix (op0, adj, 0);
24521 }
24522
24523 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
24524    into OPERAND0.  */
24525 void
24526 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
24527 {
24528   /* C code for the stuff we're doing below (for do_floor):
24529         xi = (long)op1;
24530         xi -= (double)xi > op1 ? 1 : 0;
24531         return xi;
24532    */
24533   enum machine_mode fmode = GET_MODE (op1);
24534   enum machine_mode imode = GET_MODE (op0);
24535   rtx ireg, freg, label, tmp;
24536
24537   /* reg = (long)op1 */
24538   ireg = gen_reg_rtx (imode);
24539   expand_fix (ireg, op1, 0);
24540
24541   /* freg = (double)reg */
24542   freg = gen_reg_rtx (fmode);
24543   expand_float (freg, ireg, 0);
24544
24545   /* ireg = (freg > op1) ? ireg - 1 : ireg */
24546   label = ix86_expand_sse_compare_and_jump (UNLE,
24547                                             freg, op1, !do_floor);
24548   tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
24549                              ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
24550   emit_move_insn (ireg, tmp);
24551
24552   emit_label (label);
24553   LABEL_NUSES (label) = 1;
24554
24555   emit_move_insn (op0, ireg);
24556 }
24557
24558 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
24559    result in OPERAND0.  */
24560 void
24561 ix86_expand_rint (rtx operand0, rtx operand1)
24562 {
24563   /* C code for the stuff we're doing below:
24564         xa = fabs (operand1);
24565         if (!isless (xa, 2**52))
24566           return operand1;
24567         xa = xa + 2**52 - 2**52;
24568         return copysign (xa, operand1);
24569    */
24570   enum machine_mode mode = GET_MODE (operand0);
24571   rtx res, xa, label, TWO52, mask;
24572
24573   res = gen_reg_rtx (mode);
24574   emit_move_insn (res, operand1);
24575
24576   /* xa = abs (operand1) */
24577   xa = ix86_expand_sse_fabs (res, &mask);
24578
24579   /* if (!isless (xa, TWO52)) goto label; */
24580   TWO52 = ix86_gen_TWO52 (mode);
24581   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24582
24583   xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24584   xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24585
24586   ix86_sse_copysign_to_positive (res, xa, res, mask);
24587
24588   emit_label (label);
24589   LABEL_NUSES (label) = 1;
24590
24591   emit_move_insn (operand0, res);
24592 }
24593
24594 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24595    into OPERAND0.  */
24596 void
24597 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
24598 {
24599   /* C code for the stuff we expand below.
24600         double xa = fabs (x), x2;
24601         if (!isless (xa, TWO52))
24602           return x;
24603         xa = xa + TWO52 - TWO52;
24604         x2 = copysign (xa, x);
24605      Compensate.  Floor:
24606         if (x2 > x)
24607           x2 -= 1;
24608      Compensate.  Ceil:
24609         if (x2 < x)
24610           x2 -= -1;
24611         return x2;
24612    */
24613   enum machine_mode mode = GET_MODE (operand0);
24614   rtx xa, TWO52, tmp, label, one, res, mask;
24615
24616   TWO52 = ix86_gen_TWO52 (mode);
24617
24618   /* Temporary for holding the result, initialized to the input
24619      operand to ease control flow.  */
24620   res = gen_reg_rtx (mode);
24621   emit_move_insn (res, operand1);
24622
24623   /* xa = abs (operand1) */
24624   xa = ix86_expand_sse_fabs (res, &mask);
24625
24626   /* if (!isless (xa, TWO52)) goto label; */
24627   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24628
24629   /* xa = xa + TWO52 - TWO52; */
24630   xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24631   xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24632
24633   /* xa = copysign (xa, operand1) */
24634   ix86_sse_copysign_to_positive (xa, xa, res, mask);
24635
24636   /* generate 1.0 or -1.0 */
24637   one = force_reg (mode,
24638                    const_double_from_real_value (do_floor
24639                                                  ? dconst1 : dconstm1, mode));
24640
24641   /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24642   tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
24643   emit_insn (gen_rtx_SET (VOIDmode, tmp,
24644                           gen_rtx_AND (mode, one, tmp)));
24645   /* We always need to subtract here to preserve signed zero.  */
24646   tmp = expand_simple_binop (mode, MINUS,
24647                              xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24648   emit_move_insn (res, tmp);
24649
24650   emit_label (label);
24651   LABEL_NUSES (label) = 1;
24652
24653   emit_move_insn (operand0, res);
24654 }
24655
24656 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24657    into OPERAND0.  */
24658 void
24659 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
24660 {
24661   /* C code for the stuff we expand below.
24662         double xa = fabs (x), x2;
24663         if (!isless (xa, TWO52))
24664           return x;
24665         x2 = (double)(long)x;
24666      Compensate.  Floor:
24667         if (x2 > x)
24668           x2 -= 1;
24669      Compensate.  Ceil:
24670         if (x2 < x)
24671           x2 += 1;
24672         if (HONOR_SIGNED_ZEROS (mode))
24673           return copysign (x2, x);
24674         return x2;
24675    */
24676   enum machine_mode mode = GET_MODE (operand0);
24677   rtx xa, xi, TWO52, tmp, label, one, res, mask;
24678
24679   TWO52 = ix86_gen_TWO52 (mode);
24680
24681   /* Temporary for holding the result, initialized to the input
24682      operand to ease control flow.  */
24683   res = gen_reg_rtx (mode);
24684   emit_move_insn (res, operand1);
24685
24686   /* xa = abs (operand1) */
24687   xa = ix86_expand_sse_fabs (res, &mask);
24688
24689   /* if (!isless (xa, TWO52)) goto label; */
24690   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24691
24692   /* xa = (double)(long)x */
24693   xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24694   expand_fix (xi, res, 0);
24695   expand_float (xa, xi, 0);
24696
24697   /* generate 1.0 */
24698   one = force_reg (mode, const_double_from_real_value (dconst1, mode));
24699
24700   /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24701   tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
24702   emit_insn (gen_rtx_SET (VOIDmode, tmp,
24703                           gen_rtx_AND (mode, one, tmp)));
24704   tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
24705                              xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24706   emit_move_insn (res, tmp);
24707
24708   if (HONOR_SIGNED_ZEROS (mode))
24709     ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
24710
24711   emit_label (label);
24712   LABEL_NUSES (label) = 1;
24713
24714   emit_move_insn (operand0, res);
24715 }
24716
24717 /* Expand SSE sequence for computing round from OPERAND1 storing
24718    into OPERAND0.  Sequence that works without relying on DImode truncation
24719    via cvttsd2siq that is only available on 64bit targets.  */
24720 void
24721 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
24722 {
24723   /* C code for the stuff we expand below.
24724         double xa = fabs (x), xa2, x2;
24725         if (!isless (xa, TWO52))
24726           return x;
24727      Using the absolute value and copying back sign makes
24728      -0.0 -> -0.0 correct.
24729         xa2 = xa + TWO52 - TWO52;
24730      Compensate.
24731         dxa = xa2 - xa;
24732         if (dxa <= -0.5)
24733           xa2 += 1;
24734         else if (dxa > 0.5)
24735           xa2 -= 1;
24736         x2 = copysign (xa2, x);
24737         return x2;
24738    */
24739   enum machine_mode mode = GET_MODE (operand0);
24740   rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
24741
24742   TWO52 = ix86_gen_TWO52 (mode);
24743
24744   /* Temporary for holding the result, initialized to the input
24745      operand to ease control flow.  */
24746   res = gen_reg_rtx (mode);
24747   emit_move_insn (res, operand1);
24748
24749   /* xa = abs (operand1) */
24750   xa = ix86_expand_sse_fabs (res, &mask);
24751
24752   /* if (!isless (xa, TWO52)) goto label; */
24753   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24754
24755   /* xa2 = xa + TWO52 - TWO52; */
24756   xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24757   xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
24758
24759   /* dxa = xa2 - xa; */
24760   dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
24761
24762   /* generate 0.5, 1.0 and -0.5 */
24763   half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
24764   one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
24765   mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
24766                                0, OPTAB_DIRECT);
24767
24768   /* Compensate.  */
24769   tmp = gen_reg_rtx (mode);
24770   /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
24771   tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
24772   emit_insn (gen_rtx_SET (VOIDmode, tmp,
24773                           gen_rtx_AND (mode, one, tmp)));
24774   xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24775   /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
24776   tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
24777   emit_insn (gen_rtx_SET (VOIDmode, tmp,
24778                           gen_rtx_AND (mode, one, tmp)));
24779   xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24780
24781   /* res = copysign (xa2, operand1) */
24782   ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
24783
24784   emit_label (label);
24785   LABEL_NUSES (label) = 1;
24786
24787   emit_move_insn (operand0, res);
24788 }
24789
24790 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24791    into OPERAND0.  */
24792 void
24793 ix86_expand_trunc (rtx operand0, rtx operand1)
24794 {
24795   /* C code for SSE variant we expand below.
24796         double xa = fabs (x), x2;
24797         if (!isless (xa, TWO52))
24798           return x;
24799         x2 = (double)(long)x;
24800         if (HONOR_SIGNED_ZEROS (mode))
24801           return copysign (x2, x);
24802         return x2;
24803    */
24804   enum machine_mode mode = GET_MODE (operand0);
24805   rtx xa, xi, TWO52, label, res, mask;
24806
24807   TWO52 = ix86_gen_TWO52 (mode);
24808
24809   /* Temporary for holding the result, initialized to the input
24810      operand to ease control flow.  */
24811   res = gen_reg_rtx (mode);
24812   emit_move_insn (res, operand1);
24813
24814   /* xa = abs (operand1) */
24815   xa = ix86_expand_sse_fabs (res, &mask);
24816
24817   /* if (!isless (xa, TWO52)) goto label; */
24818   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24819
24820   /* x = (double)(long)x */
24821   xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24822   expand_fix (xi, res, 0);
24823   expand_float (res, xi, 0);
24824
24825   if (HONOR_SIGNED_ZEROS (mode))
24826     ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
24827
24828   emit_label (label);
24829   LABEL_NUSES (label) = 1;
24830
24831   emit_move_insn (operand0, res);
24832 }
24833
24834 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24835    into OPERAND0.  */
24836 void
24837 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
24838 {
24839   enum machine_mode mode = GET_MODE (operand0);
24840   rtx xa, mask, TWO52, label, one, res, smask, tmp;
24841
24842   /* C code for SSE variant we expand below.
24843         double xa = fabs (x), x2;
24844         if (!isless (xa, TWO52))
24845           return x;
24846         xa2 = xa + TWO52 - TWO52;
24847      Compensate:
24848         if (xa2 > xa)
24849           xa2 -= 1.0;
24850         x2 = copysign (xa2, x);
24851         return x2;
24852    */
24853
24854   TWO52 = ix86_gen_TWO52 (mode);
24855
24856   /* Temporary for holding the result, initialized to the input
24857      operand to ease control flow.  */
24858   res = gen_reg_rtx (mode);
24859   emit_move_insn (res, operand1);
24860
24861   /* xa = abs (operand1) */
24862   xa = ix86_expand_sse_fabs (res, &smask);
24863
24864   /* if (!isless (xa, TWO52)) goto label; */
24865   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24866
24867   /* res = xa + TWO52 - TWO52; */
24868   tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24869   tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
24870   emit_move_insn (res, tmp);
24871
24872   /* generate 1.0 */
24873   one = force_reg (mode, const_double_from_real_value (dconst1, mode));
24874
24875   /* Compensate: res = xa2 - (res > xa ? 1 : 0)  */
24876   mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
24877   emit_insn (gen_rtx_SET (VOIDmode, mask,
24878                           gen_rtx_AND (mode, mask, one)));
24879   tmp = expand_simple_binop (mode, MINUS,
24880                              res, mask, NULL_RTX, 0, OPTAB_DIRECT);
24881   emit_move_insn (res, tmp);
24882
24883   /* res = copysign (res, operand1) */
24884   ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
24885
24886   emit_label (label);
24887   LABEL_NUSES (label) = 1;
24888
24889   emit_move_insn (operand0, res);
24890 }
24891
24892 /* Expand SSE sequence for computing round from OPERAND1 storing
24893    into OPERAND0.  */
24894 void
24895 ix86_expand_round (rtx operand0, rtx operand1)
24896 {
24897   /* C code for the stuff we're doing below:
24898         double xa = fabs (x);
24899         if (!isless (xa, TWO52))
24900           return x;
24901         xa = (double)(long)(xa + nextafter (0.5, 0.0));
24902         return copysign (xa, x);
24903    */
24904   enum machine_mode mode = GET_MODE (operand0);
24905   rtx res, TWO52, xa, label, xi, half, mask;
24906   const struct real_format *fmt;
24907   REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24908
24909   /* Temporary for holding the result, initialized to the input
24910      operand to ease control flow.  */
24911   res = gen_reg_rtx (mode);
24912   emit_move_insn (res, operand1);
24913
24914   TWO52 = ix86_gen_TWO52 (mode);
24915   xa = ix86_expand_sse_fabs (res, &mask);
24916   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24917
24918   /* load nextafter (0.5, 0.0) */
24919   fmt = REAL_MODE_FORMAT (mode);
24920   real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
24921   REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24922
24923   /* xa = xa + 0.5 */
24924   half = force_reg (mode, const_double_from_real_value (pred_half, mode));
24925   xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
24926
24927   /* xa = (double)(int64_t)xa */
24928   xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24929   expand_fix (xi, xa, 0);
24930   expand_float (xa, xi, 0);
24931
24932   /* res = copysign (xa, operand1) */
24933   ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
24934
24935   emit_label (label);
24936   LABEL_NUSES (label) = 1;
24937
24938   emit_move_insn (operand0, res);
24939 }
24940
24941 \f
24942 /* Validate whether a SSE5 instruction is valid or not.
24943    OPERANDS is the array of operands.
24944    NUM is the number of operands.
24945    USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
24946    NUM_MEMORY is the maximum number of memory operands to accept.  */
24947 bool
24948 ix86_sse5_valid_op_p (rtx operands[], rtx insn, int num, bool uses_oc0, int num_memory)
24949 {
24950   int mem_mask;
24951   int mem_count;
24952   int i;
24953
24954   /* Count the number of memory arguments */
24955   mem_mask = 0;
24956   mem_count = 0;
24957   for (i = 0; i < num; i++)
24958     {
24959       enum machine_mode mode = GET_MODE (operands[i]);
24960       if (register_operand (operands[i], mode))
24961         ;
24962
24963       else if (memory_operand (operands[i], mode))
24964         {
24965           mem_mask |= (1 << i);
24966           mem_count++;
24967         }
24968
24969       else
24970         {
24971           rtx pattern = PATTERN (insn);
24972
24973           /* allow 0 for pcmov */
24974           if (GET_CODE (pattern) != SET
24975               || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
24976               || i < 2
24977               || operands[i] != CONST0_RTX (mode))
24978             return false;
24979         }
24980     }
24981
24982   /* If there were no memory operations, allow the insn */
24983   if (mem_mask == 0)
24984     return true;
24985
24986   /* Do not allow the destination register to be a memory operand.  */
24987   else if (mem_mask & (1 << 0))
24988     return false;
24989
24990   /* If there are too many memory operations, disallow the instruction.  While
24991      the hardware only allows 1 memory reference, before register allocation
24992      for some insns, we allow two memory operations sometimes in order to allow
24993      code like the following to be optimized:
24994
24995         float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
24996
24997     or similar cases that are vectorized into using the fmaddss
24998     instruction.  */
24999   else if (mem_count > num_memory)
25000     return false;
25001
25002   /* Don't allow more than one memory operation if not optimizing.  */
25003   else if (mem_count > 1 && !optimize)
25004     return false;
25005
25006   else if (num == 4 && mem_count == 1)
25007     {
25008       /* formats (destination is the first argument), example fmaddss:
25009          xmm1, xmm1, xmm2, xmm3/mem
25010          xmm1, xmm1, xmm2/mem, xmm3
25011          xmm1, xmm2, xmm3/mem, xmm1
25012          xmm1, xmm2/mem, xmm3, xmm1 */
25013       if (uses_oc0)
25014         return ((mem_mask == (1 << 1))
25015                 || (mem_mask == (1 << 2))
25016                 || (mem_mask == (1 << 3)));
25017
25018       /* format, example pmacsdd:
25019          xmm1, xmm2, xmm3/mem, xmm1 */
25020       else
25021         return (mem_mask == (1 << 2));
25022     }
25023
25024   else if (num == 4 && num_memory == 2)
25025     {
25026       /* If there are two memory operations, we can load one of the memory ops
25027          into the destination register.  This is for optimizing the
25028          multiply/add ops, which the combiner has optimized both the multiply
25029          and the add insns to have a memory operation.  We have to be careful
25030          that the destination doesn't overlap with the inputs.  */
25031       rtx op0 = operands[0];
25032
25033       if (reg_mentioned_p (op0, operands[1])
25034           || reg_mentioned_p (op0, operands[2])
25035           || reg_mentioned_p (op0, operands[3]))
25036         return false;
25037
25038       /* formats (destination is the first argument), example fmaddss:
25039          xmm1, xmm1, xmm2, xmm3/mem
25040          xmm1, xmm1, xmm2/mem, xmm3
25041          xmm1, xmm2, xmm3/mem, xmm1
25042          xmm1, xmm2/mem, xmm3, xmm1
25043
25044          For the oc0 case, we will load either operands[1] or operands[3] into
25045          operands[0], so any combination of 2 memory operands is ok.  */
25046       if (uses_oc0)
25047         return true;
25048
25049       /* format, example pmacsdd:
25050          xmm1, xmm2, xmm3/mem, xmm1
25051
25052          For the integer multiply/add instructions be more restrictive and
25053          require operands[2] and operands[3] to be the memory operands.  */
25054       else
25055         return (mem_mask == ((1 << 2) | (1 << 3)));
25056     }
25057
25058   else if (num == 3 && num_memory == 1)
25059     {
25060       /* formats, example protb:
25061          xmm1, xmm2, xmm3/mem
25062          xmm1, xmm2/mem, xmm3 */
25063       if (uses_oc0)
25064         return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
25065
25066       /* format, example comeq:
25067          xmm1, xmm2, xmm3/mem */
25068       else
25069         return (mem_mask == (1 << 2));
25070     }
25071
25072   else
25073     gcc_unreachable ();
25074
25075   return false;
25076 }
25077
25078 \f
25079 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
25080    hardware will allow by using the destination register to load one of the
25081    memory operations.  Presently this is used by the multiply/add routines to
25082    allow 2 memory references.  */
25083
25084 void
25085 ix86_expand_sse5_multiple_memory (rtx operands[],
25086                                   int num,
25087                                   enum machine_mode mode)
25088 {
25089   rtx op0 = operands[0];
25090   if (num != 4
25091       || memory_operand (op0, mode)
25092       || reg_mentioned_p (op0, operands[1])
25093       || reg_mentioned_p (op0, operands[2])
25094       || reg_mentioned_p (op0, operands[3]))
25095     gcc_unreachable ();
25096
25097   /* For 2 memory operands, pick either operands[1] or operands[3] to move into
25098      the destination register.  */
25099   if (memory_operand (operands[1], mode))
25100     {
25101       emit_move_insn (op0, operands[1]);
25102       operands[1] = op0;
25103     }
25104   else if (memory_operand (operands[3], mode))
25105     {
25106       emit_move_insn (op0, operands[3]);
25107       operands[3] = op0;
25108     }
25109   else
25110     gcc_unreachable ();
25111
25112   return;
25113 }
25114
25115 \f
25116 /* Table of valid machine attributes.  */
25117 static const struct attribute_spec ix86_attribute_table[] =
25118 {
25119   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25120   /* Stdcall attribute says callee is responsible for popping arguments
25121      if they are not variable.  */
25122   { "stdcall",   0, 0, false, true,  true,  ix86_handle_cconv_attribute },
25123   /* Fastcall attribute says callee is responsible for popping arguments
25124      if they are not variable.  */
25125   { "fastcall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute },
25126   /* Cdecl attribute says the callee is a normal C declaration */
25127   { "cdecl",     0, 0, false, true,  true,  ix86_handle_cconv_attribute },
25128   /* Regparm attribute specifies how many integer arguments are to be
25129      passed in registers.  */
25130   { "regparm",   1, 1, false, true,  true,  ix86_handle_cconv_attribute },
25131   /* Sseregparm attribute says we are using x86_64 calling conventions
25132      for FP arguments.  */
25133   { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25134   /* force_align_arg_pointer says this function realigns the stack at entry.  */
25135   { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
25136     false, true,  true, ix86_handle_cconv_attribute },
25137 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25138   { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
25139   { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
25140   { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
25141 #endif
25142   { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
25143   { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
25144 #ifdef SUBTARGET_ATTRIBUTE_TABLE
25145   SUBTARGET_ATTRIBUTE_TABLE,
25146 #endif
25147   { NULL,        0, 0, false, false, false, NULL }
25148 };
25149
25150 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
25151 static int
25152 x86_builtin_vectorization_cost (bool runtime_test)
25153 {
25154   /* If the branch of the runtime test is taken - i.e. - the vectorized
25155      version is skipped - this incurs a misprediction cost (because the
25156      vectorized version is expected to be the fall-through).  So we subtract
25157      the latency of a mispredicted branch from the costs that are incured
25158      when the vectorized version is executed.
25159
25160      TODO: The values in individual target tables have to be tuned or new
25161      fields may be needed. For eg. on K8, the default branch path is the
25162      not-taken path. If the taken path is predicted correctly, the minimum
25163      penalty of going down the taken-path is 1 cycle. If the taken-path is
25164      not predicted correctly, then the minimum penalty is 10 cycles.  */
25165
25166   if (runtime_test)
25167     {
25168       return (-(ix86_cost->cond_taken_branch_cost));
25169     }
25170   else
25171     return 0;
25172 }
25173
25174 /* Initialize the GCC target structure.  */
25175 #undef TARGET_ATTRIBUTE_TABLE
25176 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25177 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25178 #  undef TARGET_MERGE_DECL_ATTRIBUTES
25179 #  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25180 #endif
25181
25182 #undef TARGET_COMP_TYPE_ATTRIBUTES
25183 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25184
25185 #undef TARGET_INIT_BUILTINS
25186 #define TARGET_INIT_BUILTINS ix86_init_builtins
25187 #undef TARGET_EXPAND_BUILTIN
25188 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25189
25190 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25191 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25192   ix86_builtin_vectorized_function
25193
25194 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
25195 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25196
25197 #undef TARGET_BUILTIN_RECIPROCAL
25198 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25199
25200 #undef TARGET_ASM_FUNCTION_EPILOGUE
25201 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25202
25203 #undef TARGET_ENCODE_SECTION_INFO
25204 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25205 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25206 #else
25207 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25208 #endif
25209
25210 #undef TARGET_ASM_OPEN_PAREN
25211 #define TARGET_ASM_OPEN_PAREN ""
25212 #undef TARGET_ASM_CLOSE_PAREN
25213 #define TARGET_ASM_CLOSE_PAREN ""
25214
25215 #undef TARGET_ASM_ALIGNED_HI_OP
25216 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25217 #undef TARGET_ASM_ALIGNED_SI_OP
25218 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25219 #ifdef ASM_QUAD
25220 #undef TARGET_ASM_ALIGNED_DI_OP
25221 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25222 #endif
25223
25224 #undef TARGET_ASM_UNALIGNED_HI_OP
25225 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25226 #undef TARGET_ASM_UNALIGNED_SI_OP
25227 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25228 #undef TARGET_ASM_UNALIGNED_DI_OP
25229 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25230
25231 #undef TARGET_SCHED_ADJUST_COST
25232 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25233 #undef TARGET_SCHED_ISSUE_RATE
25234 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25235 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25236 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25237   ia32_multipass_dfa_lookahead
25238
25239 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25240 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25241
25242 #ifdef HAVE_AS_TLS
25243 #undef TARGET_HAVE_TLS
25244 #define TARGET_HAVE_TLS true
25245 #endif
25246 #undef TARGET_CANNOT_FORCE_CONST_MEM
25247 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25248 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
25249 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
25250
25251 #undef TARGET_DELEGITIMIZE_ADDRESS
25252 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25253
25254 #undef TARGET_MS_BITFIELD_LAYOUT_P
25255 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
25256
25257 #if TARGET_MACHO
25258 #undef TARGET_BINDS_LOCAL_P
25259 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
25260 #endif
25261 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25262 #undef TARGET_BINDS_LOCAL_P
25263 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
25264 #endif
25265
25266 #undef TARGET_ASM_OUTPUT_MI_THUNK
25267 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
25268 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
25269 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
25270
25271 #undef TARGET_ASM_FILE_START
25272 #define TARGET_ASM_FILE_START x86_file_start
25273
25274 #undef TARGET_DEFAULT_TARGET_FLAGS
25275 #define TARGET_DEFAULT_TARGET_FLAGS     \
25276   (TARGET_DEFAULT                       \
25277    | TARGET_SUBTARGET_DEFAULT           \
25278    | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
25279
25280 #undef TARGET_HANDLE_OPTION
25281 #define TARGET_HANDLE_OPTION ix86_handle_option
25282
25283 #undef TARGET_RTX_COSTS
25284 #define TARGET_RTX_COSTS ix86_rtx_costs
25285 #undef TARGET_ADDRESS_COST
25286 #define TARGET_ADDRESS_COST ix86_address_cost
25287
25288 #undef TARGET_FIXED_CONDITION_CODE_REGS
25289 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
25290 #undef TARGET_CC_MODES_COMPATIBLE
25291 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
25292
25293 #undef TARGET_MACHINE_DEPENDENT_REORG
25294 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
25295
25296 #undef TARGET_BUILD_BUILTIN_VA_LIST
25297 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
25298
25299 #undef TARGET_EXPAND_BUILTIN_VA_START
25300 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
25301
25302 #undef TARGET_MD_ASM_CLOBBERS
25303 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
25304
25305 #undef TARGET_PROMOTE_PROTOTYPES
25306 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
25307 #undef TARGET_STRUCT_VALUE_RTX
25308 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
25309 #undef TARGET_SETUP_INCOMING_VARARGS
25310 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
25311 #undef TARGET_MUST_PASS_IN_STACK
25312 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
25313 #undef TARGET_PASS_BY_REFERENCE
25314 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
25315 #undef TARGET_INTERNAL_ARG_POINTER
25316 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
25317 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
25318 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
25319 #undef TARGET_STRICT_ARGUMENT_NAMING
25320 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
25321
25322 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
25323 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
25324
25325 #undef TARGET_SCALAR_MODE_SUPPORTED_P
25326 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
25327
25328 #undef TARGET_VECTOR_MODE_SUPPORTED_P
25329 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
25330
25331 #undef TARGET_C_MODE_FOR_SUFFIX
25332 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
25333
25334 #ifdef HAVE_AS_TLS
25335 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
25336 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
25337 #endif
25338
25339 #ifdef SUBTARGET_INSERT_ATTRIBUTES
25340 #undef TARGET_INSERT_ATTRIBUTES
25341 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
25342 #endif
25343
25344 #undef TARGET_MANGLE_TYPE
25345 #define TARGET_MANGLE_TYPE ix86_mangle_type
25346
25347 #undef TARGET_STACK_PROTECT_FAIL
25348 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
25349
25350 #undef TARGET_FUNCTION_VALUE
25351 #define TARGET_FUNCTION_VALUE ix86_function_value
25352
25353 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
25354 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
25355
25356 struct gcc_target targetm = TARGET_INITIALIZER;
25357 \f
25358 #include "gt-i386.h"