gcc/config/i386/i386.c

   1 /* Subroutines used for code generation on IA-32.
   2    Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
   3    2002, 2003, 2004, 2005, 2006, 2007, 2008
   4    Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3, or (at your option)
  11 any later version.
  12
  13 GCC is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "rtl.h"
  27 #include "tree.h"
  28 #include "tm_p.h"
  29 #include "regs.h"
  30 #include "hard-reg-set.h"
  31 #include "real.h"
  32 #include "insn-config.h"
  33 #include "conditions.h"
  34 #include "output.h"
  35 #include "insn-codes.h"
  36 #include "insn-attr.h"
  37 #include "flags.h"
  38 #include "c-common.h"
  39 #include "except.h"
  40 #include "function.h"
  41 #include "recog.h"
  42 #include "expr.h"
  43 #include "optabs.h"
  44 #include "toplev.h"
  45 #include "basic-block.h"
  46 #include "ggc.h"
  47 #include "target.h"
  48 #include "target-def.h"
  49 #include "langhooks.h"
  50 #include "cgraph.h"
  51 #include "tree-gimple.h"
  52 #include "dwarf2.h"
  53 #include "df.h"
  54 #include "tm-constrs.h"
  55 #include "params.h"
  56
  57 static int x86_builtin_vectorization_cost (bool);
  58 static rtx legitimize_dllimport_symbol (rtx, bool);
  59
  60 #ifndef CHECK_STACK_LIMIT
  61 #define CHECK_STACK_LIMIT (-1)
  62 #endif
  63
  64 /* Return index of given mode in mult and division cost tables.  */
  65 #define MODE_INDEX(mode)                                        \
  66   ((mode) == QImode ? 0                                         \
  67    : (mode) == HImode ? 1                                       \
  68    : (mode) == SImode ? 2                                       \
  69    : (mode) == DImode ? 3                                       \
  70    : 4)
  71
  72 /* Processor costs (relative to an add) */
  73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes.  */
  74 #define COSTS_N_BYTES(N) ((N) * 2)
  75
  76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
  77
  78 static const
  79 struct processor_costs size_cost = {    /* costs for tuning for size */
  80   COSTS_N_BYTES (2),                    /* cost of an add instruction */
  81   COSTS_N_BYTES (3),                    /* cost of a lea instruction */
  82   COSTS_N_BYTES (2),                    /* variable shift costs */
  83   COSTS_N_BYTES (3),                    /* constant shift costs */
  84   {COSTS_N_BYTES (3),                   /* cost of starting multiply for QI */
  85    COSTS_N_BYTES (3),                   /*                               HI */
  86    COSTS_N_BYTES (3),                   /*                               SI */
  87    COSTS_N_BYTES (3),                   /*                               DI */
  88    COSTS_N_BYTES (5)},                  /*                            other */
  89   0,                                    /* cost of multiply per each bit set */
  90   {COSTS_N_BYTES (3),                   /* cost of a divide/mod for QI */
  91    COSTS_N_BYTES (3),                   /*                          HI */
  92    COSTS_N_BYTES (3),                   /*                          SI */
  93    COSTS_N_BYTES (3),                   /*                          DI */
  94    COSTS_N_BYTES (5)},                  /*                       other */
  95   COSTS_N_BYTES (3),                    /* cost of movsx */
  96   COSTS_N_BYTES (3),                    /* cost of movzx */
  97   0,                                    /* "large" insn */
  98   2,                                    /* MOVE_RATIO */
  99   2,                                    /* cost for loading QImode using movzbl */
 100   {2, 2, 2},                            /* cost of loading integer registers
 101                                            in QImode, HImode and SImode.
 102                                            Relative to reg-reg move (2).  */
 103   {2, 2, 2},                            /* cost of storing integer registers */
 104   2,                                    /* cost of reg,reg fld/fst */
 105   {2, 2, 2},                            /* cost of loading fp registers
 106                                            in SFmode, DFmode and XFmode */
 107   {2, 2, 2},                            /* cost of storing fp registers
 108                                            in SFmode, DFmode and XFmode */
 109   3,                                    /* cost of moving MMX register */
 110   {3, 3},                               /* cost of loading MMX registers
 111                                            in SImode and DImode */
 112   {3, 3},                               /* cost of storing MMX registers
 113                                            in SImode and DImode */
 114   3,                                    /* cost of moving SSE register */
 115   {3, 3, 3},                            /* cost of loading SSE registers
 116                                            in SImode, DImode and TImode */
 117   {3, 3, 3},                            /* cost of storing SSE registers
 118                                            in SImode, DImode and TImode */
 119   3,                                    /* MMX or SSE register to integer */
 120   0,                                    /* size of l1 cache  */
 121   0,                                    /* size of l2 cache  */
 122   0,                                    /* size of prefetch block */
 123   0,                                    /* number of parallel prefetches */
 124   2,                                    /* Branch cost */
 125   COSTS_N_BYTES (2),                    /* cost of FADD and FSUB insns.  */
 126   COSTS_N_BYTES (2),                    /* cost of FMUL instruction.  */
 127   COSTS_N_BYTES (2),                    /* cost of FDIV instruction.  */
 128   COSTS_N_BYTES (2),                    /* cost of FABS instruction.  */
 129   COSTS_N_BYTES (2),                    /* cost of FCHS instruction.  */
 130   COSTS_N_BYTES (2),                    /* cost of FSQRT instruction.  */
 131   {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
 132    {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
 133   {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
 134    {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
 135   1,                                    /* scalar_stmt_cost.  */
 136   1,                                    /* scalar load_cost.  */
 137   1,                                    /* scalar_store_cost.  */
 138   1,                                    /* vec_stmt_cost.  */
 139   1,                                    /* vec_to_scalar_cost.  */
 140   1,                                    /* scalar_to_vec_cost.  */
 141   1,                                    /* vec_align_load_cost.  */
 142   1,                                    /* vec_unalign_load_cost.  */
 143   1,                                    /* vec_store_cost.  */
 144   1,                                    /* cond_taken_branch_cost.  */
 145   1,                                    /* cond_not_taken_branch_cost.  */
 146 };
 147
 148 /* Processor costs (relative to an add) */
 149 static const
 150 struct processor_costs i386_cost = {    /* 386 specific costs */
 151   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 152   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 153   COSTS_N_INSNS (3),                    /* variable shift costs */
 154   COSTS_N_INSNS (2),                    /* constant shift costs */
 155   {COSTS_N_INSNS (6),                   /* cost of starting multiply for QI */
 156    COSTS_N_INSNS (6),                   /*                               HI */
 157    COSTS_N_INSNS (6),                   /*                               SI */
 158    COSTS_N_INSNS (6),                   /*                               DI */
 159    COSTS_N_INSNS (6)},                  /*                               other */
 160   COSTS_N_INSNS (1),                    /* cost of multiply per each bit set */
 161   {COSTS_N_INSNS (23),                  /* cost of a divide/mod for QI */
 162    COSTS_N_INSNS (23),                  /*                          HI */
 163    COSTS_N_INSNS (23),                  /*                          SI */
 164    COSTS_N_INSNS (23),                  /*                          DI */
 165    COSTS_N_INSNS (23)},                 /*                          other */
 166   COSTS_N_INSNS (3),                    /* cost of movsx */
 167   COSTS_N_INSNS (2),                    /* cost of movzx */
 168   15,                                   /* "large" insn */
 169   3,                                    /* MOVE_RATIO */
 170   4,                                    /* cost for loading QImode using movzbl */
 171   {2, 4, 2},                            /* cost of loading integer registers
 172                                            in QImode, HImode and SImode.
 173                                            Relative to reg-reg move (2).  */
 174   {2, 4, 2},                            /* cost of storing integer registers */
 175   2,                                    /* cost of reg,reg fld/fst */
 176   {8, 8, 8},                            /* cost of loading fp registers
 177                                            in SFmode, DFmode and XFmode */
 178   {8, 8, 8},                            /* cost of storing fp registers
 179                                            in SFmode, DFmode and XFmode */
 180   2,                                    /* cost of moving MMX register */
 181   {4, 8},                               /* cost of loading MMX registers
 182                                            in SImode and DImode */
 183   {4, 8},                               /* cost of storing MMX registers
 184                                            in SImode and DImode */
 185   2,                                    /* cost of moving SSE register */
 186   {4, 8, 16},                           /* cost of loading SSE registers
 187                                            in SImode, DImode and TImode */
 188   {4, 8, 16},                           /* cost of storing SSE registers
 189                                            in SImode, DImode and TImode */
 190   3,                                    /* MMX or SSE register to integer */
 191   0,                                    /* size of l1 cache  */
 192   0,                                    /* size of l2 cache  */
 193   0,                                    /* size of prefetch block */
 194   0,                                    /* number of parallel prefetches */
 195   1,                                    /* Branch cost */
 196   COSTS_N_INSNS (23),                   /* cost of FADD and FSUB insns.  */
 197   COSTS_N_INSNS (27),                   /* cost of FMUL instruction.  */
 198   COSTS_N_INSNS (88),                   /* cost of FDIV instruction.  */
 199   COSTS_N_INSNS (22),                   /* cost of FABS instruction.  */
 200   COSTS_N_INSNS (24),                   /* cost of FCHS instruction.  */
 201   COSTS_N_INSNS (122),                  /* cost of FSQRT instruction.  */
 202   {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
 203    DUMMY_STRINGOP_ALGS},
 204   {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
 205    DUMMY_STRINGOP_ALGS},
 206   1,                                    /* scalar_stmt_cost.  */
 207   1,                                    /* scalar load_cost.  */
 208   1,                                    /* scalar_store_cost.  */
 209   1,                                    /* vec_stmt_cost.  */
 210   1,                                    /* vec_to_scalar_cost.  */
 211   1,                                    /* scalar_to_vec_cost.  */
 212   1,                                    /* vec_align_load_cost.  */
 213   2,                                    /* vec_unalign_load_cost.  */
 214   1,                                    /* vec_store_cost.  */
 215   3,                                    /* cond_taken_branch_cost.  */
 216   1,                                    /* cond_not_taken_branch_cost.  */
 217 };
 218
 219 static const
 220 struct processor_costs i486_cost = {    /* 486 specific costs */
 221   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 222   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 223   COSTS_N_INSNS (3),                    /* variable shift costs */
 224   COSTS_N_INSNS (2),                    /* constant shift costs */
 225   {COSTS_N_INSNS (12),                  /* cost of starting multiply for QI */
 226    COSTS_N_INSNS (12),                  /*                               HI */
 227    COSTS_N_INSNS (12),                  /*                               SI */
 228    COSTS_N_INSNS (12),                  /*                               DI */
 229    COSTS_N_INSNS (12)},                 /*                               other */
 230   1,                                    /* cost of multiply per each bit set */
 231   {COSTS_N_INSNS (40),                  /* cost of a divide/mod for QI */
 232    COSTS_N_INSNS (40),                  /*                          HI */
 233    COSTS_N_INSNS (40),                  /*                          SI */
 234    COSTS_N_INSNS (40),                  /*                          DI */
 235    COSTS_N_INSNS (40)},                 /*                          other */
 236   COSTS_N_INSNS (3),                    /* cost of movsx */
 237   COSTS_N_INSNS (2),                    /* cost of movzx */
 238   15,                                   /* "large" insn */
 239   3,                                    /* MOVE_RATIO */
 240   4,                                    /* cost for loading QImode using movzbl */
 241   {2, 4, 2},                            /* cost of loading integer registers
 242                                            in QImode, HImode and SImode.
 243                                            Relative to reg-reg move (2).  */
 244   {2, 4, 2},                            /* cost of storing integer registers */
 245   2,                                    /* cost of reg,reg fld/fst */
 246   {8, 8, 8},                            /* cost of loading fp registers
 247                                            in SFmode, DFmode and XFmode */
 248   {8, 8, 8},                            /* cost of storing fp registers
 249                                            in SFmode, DFmode and XFmode */
 250   2,                                    /* cost of moving MMX register */
 251   {4, 8},                               /* cost of loading MMX registers
 252                                            in SImode and DImode */
 253   {4, 8},                               /* cost of storing MMX registers
 254                                            in SImode and DImode */
 255   2,                                    /* cost of moving SSE register */
 256   {4, 8, 16},                           /* cost of loading SSE registers
 257                                            in SImode, DImode and TImode */
 258   {4, 8, 16},                           /* cost of storing SSE registers
 259                                            in SImode, DImode and TImode */
 260   3,                                    /* MMX or SSE register to integer */
 261   4,                                    /* size of l1 cache.  486 has 8kB cache
 262                                            shared for code and data, so 4kB is
 263                                            not really precise.  */
 264   4,                                    /* size of l2 cache  */
 265   0,                                    /* size of prefetch block */
 266   0,                                    /* number of parallel prefetches */
 267   1,                                    /* Branch cost */
 268   COSTS_N_INSNS (8),                    /* cost of FADD and FSUB insns.  */
 269   COSTS_N_INSNS (16),                   /* cost of FMUL instruction.  */
 270   COSTS_N_INSNS (73),                   /* cost of FDIV instruction.  */
 271   COSTS_N_INSNS (3),                    /* cost of FABS instruction.  */
 272   COSTS_N_INSNS (3),                    /* cost of FCHS instruction.  */
 273   COSTS_N_INSNS (83),                   /* cost of FSQRT instruction.  */
 274   {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
 275    DUMMY_STRINGOP_ALGS},
 276   {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
 277    DUMMY_STRINGOP_ALGS},
 278   1,                                    /* scalar_stmt_cost.  */
 279   1,                                    /* scalar load_cost.  */
 280   1,                                    /* scalar_store_cost.  */
 281   1,                                    /* vec_stmt_cost.  */
 282   1,                                    /* vec_to_scalar_cost.  */
 283   1,                                    /* scalar_to_vec_cost.  */
 284   1,                                    /* vec_align_load_cost.  */
 285   2,                                    /* vec_unalign_load_cost.  */
 286   1,                                    /* vec_store_cost.  */
 287   3,                                    /* cond_taken_branch_cost.  */
 288   1,                                    /* cond_not_taken_branch_cost.  */
 289 };
 290
 291 static const
 292 struct processor_costs pentium_cost = {
 293   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 294   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 295   COSTS_N_INSNS (4),                    /* variable shift costs */
 296   COSTS_N_INSNS (1),                    /* constant shift costs */
 297   {COSTS_N_INSNS (11),                  /* cost of starting multiply for QI */
 298    COSTS_N_INSNS (11),                  /*                               HI */
 299    COSTS_N_INSNS (11),                  /*                               SI */
 300    COSTS_N_INSNS (11),                  /*                               DI */
 301    COSTS_N_INSNS (11)},                 /*                               other */
 302   0,                                    /* cost of multiply per each bit set */
 303   {COSTS_N_INSNS (25),                  /* cost of a divide/mod for QI */
 304    COSTS_N_INSNS (25),                  /*                          HI */
 305    COSTS_N_INSNS (25),                  /*                          SI */
 306    COSTS_N_INSNS (25),                  /*                          DI */
 307    COSTS_N_INSNS (25)},                 /*                          other */
 308   COSTS_N_INSNS (3),                    /* cost of movsx */
 309   COSTS_N_INSNS (2),                    /* cost of movzx */
 310   8,                                    /* "large" insn */
 311   6,                                    /* MOVE_RATIO */
 312   6,                                    /* cost for loading QImode using movzbl */
 313   {2, 4, 2},                            /* cost of loading integer registers
 314                                            in QImode, HImode and SImode.
 315                                            Relative to reg-reg move (2).  */
 316   {2, 4, 2},                            /* cost of storing integer registers */
 317   2,                                    /* cost of reg,reg fld/fst */
 318   {2, 2, 6},                            /* cost of loading fp registers
 319                                            in SFmode, DFmode and XFmode */
 320   {4, 4, 6},                            /* cost of storing fp registers
 321                                            in SFmode, DFmode and XFmode */
 322   8,                                    /* cost of moving MMX register */
 323   {8, 8},                               /* cost of loading MMX registers
 324                                            in SImode and DImode */
 325   {8, 8},                               /* cost of storing MMX registers
 326                                            in SImode and DImode */
 327   2,                                    /* cost of moving SSE register */
 328   {4, 8, 16},                           /* cost of loading SSE registers
 329                                            in SImode, DImode and TImode */
 330   {4, 8, 16},                           /* cost of storing SSE registers
 331                                            in SImode, DImode and TImode */
 332   3,                                    /* MMX or SSE register to integer */
 333   8,                                    /* size of l1 cache.  */
 334   8,                                    /* size of l2 cache  */
 335   0,                                    /* size of prefetch block */
 336   0,                                    /* number of parallel prefetches */
 337   2,                                    /* Branch cost */
 338   COSTS_N_INSNS (3),                    /* cost of FADD and FSUB insns.  */
 339   COSTS_N_INSNS (3),                    /* cost of FMUL instruction.  */
 340   COSTS_N_INSNS (39),                   /* cost of FDIV instruction.  */
 341   COSTS_N_INSNS (1),                    /* cost of FABS instruction.  */
 342   COSTS_N_INSNS (1),                    /* cost of FCHS instruction.  */
 343   COSTS_N_INSNS (70),                   /* cost of FSQRT instruction.  */
 344   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 345    DUMMY_STRINGOP_ALGS},
 346   {{libcall, {{-1, rep_prefix_4_byte}}},
 347    DUMMY_STRINGOP_ALGS},
 348   1,                                    /* scalar_stmt_cost.  */
 349   1,                                    /* scalar load_cost.  */
 350   1,                                    /* scalar_store_cost.  */
 351   1,                                    /* vec_stmt_cost.  */
 352   1,                                    /* vec_to_scalar_cost.  */
 353   1,                                    /* scalar_to_vec_cost.  */
 354   1,                                    /* vec_align_load_cost.  */
 355   2,                                    /* vec_unalign_load_cost.  */
 356   1,                                    /* vec_store_cost.  */
 357   3,                                    /* cond_taken_branch_cost.  */
 358   1,                                    /* cond_not_taken_branch_cost.  */
 359 };
 360
 361 static const
 362 struct processor_costs pentiumpro_cost = {
 363   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 364   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 365   COSTS_N_INSNS (1),                    /* variable shift costs */
 366   COSTS_N_INSNS (1),                    /* constant shift costs */
 367   {COSTS_N_INSNS (4),                   /* cost of starting multiply for QI */
 368    COSTS_N_INSNS (4),                   /*                               HI */
 369    COSTS_N_INSNS (4),                   /*                               SI */
 370    COSTS_N_INSNS (4),                   /*                               DI */
 371    COSTS_N_INSNS (4)},                  /*                               other */
 372   0,                                    /* cost of multiply per each bit set */
 373   {COSTS_N_INSNS (17),                  /* cost of a divide/mod for QI */
 374    COSTS_N_INSNS (17),                  /*                          HI */
 375    COSTS_N_INSNS (17),                  /*                          SI */
 376    COSTS_N_INSNS (17),                  /*                          DI */
 377    COSTS_N_INSNS (17)},                 /*                          other */
 378   COSTS_N_INSNS (1),                    /* cost of movsx */
 379   COSTS_N_INSNS (1),                    /* cost of movzx */
 380   8,                                    /* "large" insn */
 381   6,                                    /* MOVE_RATIO */
 382   2,                                    /* cost for loading QImode using movzbl */
 383   {4, 4, 4},                            /* cost of loading integer registers
 384                                            in QImode, HImode and SImode.
 385                                            Relative to reg-reg move (2).  */
 386   {2, 2, 2},                            /* cost of storing integer registers */
 387   2,                                    /* cost of reg,reg fld/fst */
 388   {2, 2, 6},                            /* cost of loading fp registers
 389                                            in SFmode, DFmode and XFmode */
 390   {4, 4, 6},                            /* cost of storing fp registers
 391                                            in SFmode, DFmode and XFmode */
 392   2,                                    /* cost of moving MMX register */
 393   {2, 2},                               /* cost of loading MMX registers
 394                                            in SImode and DImode */
 395   {2, 2},                               /* cost of storing MMX registers
 396                                            in SImode and DImode */
 397   2,                                    /* cost of moving SSE register */
 398   {2, 2, 8},                            /* cost of loading SSE registers
 399                                            in SImode, DImode and TImode */
 400   {2, 2, 8},                            /* cost of storing SSE registers
 401                                            in SImode, DImode and TImode */
 402   3,                                    /* MMX or SSE register to integer */
 403   8,                                    /* size of l1 cache.  */
 404   256,                                  /* size of l2 cache  */
 405   32,                                   /* size of prefetch block */
 406   6,                                    /* number of parallel prefetches */
 407   2,                                    /* Branch cost */
 408   COSTS_N_INSNS (3),                    /* cost of FADD and FSUB insns.  */
 409   COSTS_N_INSNS (5),                    /* cost of FMUL instruction.  */
 410   COSTS_N_INSNS (56),                   /* cost of FDIV instruction.  */
 411   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 412   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 413   COSTS_N_INSNS (56),                   /* cost of FSQRT instruction.  */
 414   /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
 415      the alignment).  For small blocks inline loop is still a noticeable win, for bigger
 416      blocks either rep movsl or rep movsb is way to go.  Rep movsb has apparently
 417      more expensive startup time in CPU, but after 4K the difference is down in the noise.
 418    */
 419   {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
 420                         {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
 421    DUMMY_STRINGOP_ALGS},
 422   {{rep_prefix_4_byte, {{1024, unrolled_loop},
 423                         {8192, rep_prefix_4_byte}, {-1, libcall}}},
 424    DUMMY_STRINGOP_ALGS},
 425   1,                                    /* scalar_stmt_cost.  */
 426   1,                                    /* scalar load_cost.  */
 427   1,                                    /* scalar_store_cost.  */
 428   1,                                    /* vec_stmt_cost.  */
 429   1,                                    /* vec_to_scalar_cost.  */
 430   1,                                    /* scalar_to_vec_cost.  */
 431   1,                                    /* vec_align_load_cost.  */
 432   2,                                    /* vec_unalign_load_cost.  */
 433   1,                                    /* vec_store_cost.  */
 434   3,                                    /* cond_taken_branch_cost.  */
 435   1,                                    /* cond_not_taken_branch_cost.  */
 436 };
 437
 438 static const
 439 struct processor_costs geode_cost = {
 440   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 441   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 442   COSTS_N_INSNS (2),                    /* variable shift costs */
 443   COSTS_N_INSNS (1),                    /* constant shift costs */
 444   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 445    COSTS_N_INSNS (4),                   /*                               HI */
 446    COSTS_N_INSNS (7),                   /*                               SI */
 447    COSTS_N_INSNS (7),                   /*                               DI */
 448    COSTS_N_INSNS (7)},                  /*                               other */
 449   0,                                    /* cost of multiply per each bit set */
 450   {COSTS_N_INSNS (15),                  /* cost of a divide/mod for QI */
 451    COSTS_N_INSNS (23),                  /*                          HI */
 452    COSTS_N_INSNS (39),                  /*                          SI */
 453    COSTS_N_INSNS (39),                  /*                          DI */
 454    COSTS_N_INSNS (39)},                 /*                          other */
 455   COSTS_N_INSNS (1),                    /* cost of movsx */
 456   COSTS_N_INSNS (1),                    /* cost of movzx */
 457   8,                                    /* "large" insn */
 458   4,                                    /* MOVE_RATIO */
 459   1,                                    /* cost for loading QImode using movzbl */
 460   {1, 1, 1},                            /* cost of loading integer registers
 461                                            in QImode, HImode and SImode.
 462                                            Relative to reg-reg move (2).  */
 463   {1, 1, 1},                            /* cost of storing integer registers */
 464   1,                                    /* cost of reg,reg fld/fst */
 465   {1, 1, 1},                            /* cost of loading fp registers
 466                                            in SFmode, DFmode and XFmode */
 467   {4, 6, 6},                            /* cost of storing fp registers
 468                                            in SFmode, DFmode and XFmode */
 469
 470   1,                                    /* cost of moving MMX register */
 471   {1, 1},                               /* cost of loading MMX registers
 472                                            in SImode and DImode */
 473   {1, 1},                               /* cost of storing MMX registers
 474                                            in SImode and DImode */
 475   1,                                    /* cost of moving SSE register */
 476   {1, 1, 1},                            /* cost of loading SSE registers
 477                                            in SImode, DImode and TImode */
 478   {1, 1, 1},                            /* cost of storing SSE registers
 479                                            in SImode, DImode and TImode */
 480   1,                                    /* MMX or SSE register to integer */
 481   64,                                   /* size of l1 cache.  */
 482   128,                                  /* size of l2 cache.  */
 483   32,                                   /* size of prefetch block */
 484   1,                                    /* number of parallel prefetches */
 485   1,                                    /* Branch cost */
 486   COSTS_N_INSNS (6),                    /* cost of FADD and FSUB insns.  */
 487   COSTS_N_INSNS (11),                   /* cost of FMUL instruction.  */
 488   COSTS_N_INSNS (47),                   /* cost of FDIV instruction.  */
 489   COSTS_N_INSNS (1),                    /* cost of FABS instruction.  */
 490   COSTS_N_INSNS (1),                    /* cost of FCHS instruction.  */
 491   COSTS_N_INSNS (54),                   /* cost of FSQRT instruction.  */
 492   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 493    DUMMY_STRINGOP_ALGS},
 494   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 495    DUMMY_STRINGOP_ALGS},
 496   1,                                    /* scalar_stmt_cost.  */
 497   1,                                    /* scalar load_cost.  */
 498   1,                                    /* scalar_store_cost.  */
 499   1,                                    /* vec_stmt_cost.  */
 500   1,                                    /* vec_to_scalar_cost.  */
 501   1,                                    /* scalar_to_vec_cost.  */
 502   1,                                    /* vec_align_load_cost.  */
 503   2,                                    /* vec_unalign_load_cost.  */
 504   1,                                    /* vec_store_cost.  */
 505   3,                                    /* cond_taken_branch_cost.  */
 506   1,                                    /* cond_not_taken_branch_cost.  */
 507 };
 508
 509 static const
 510 struct processor_costs k6_cost = {
 511   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 512   COSTS_N_INSNS (2),                    /* cost of a lea instruction */
 513   COSTS_N_INSNS (1),                    /* variable shift costs */
 514   COSTS_N_INSNS (1),                    /* constant shift costs */
 515   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 516    COSTS_N_INSNS (3),                   /*                               HI */
 517    COSTS_N_INSNS (3),                   /*                               SI */
 518    COSTS_N_INSNS (3),                   /*                               DI */
 519    COSTS_N_INSNS (3)},                  /*                               other */
 520   0,                                    /* cost of multiply per each bit set */
 521   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
 522    COSTS_N_INSNS (18),                  /*                          HI */
 523    COSTS_N_INSNS (18),                  /*                          SI */
 524    COSTS_N_INSNS (18),                  /*                          DI */
 525    COSTS_N_INSNS (18)},                 /*                          other */
 526   COSTS_N_INSNS (2),                    /* cost of movsx */
 527   COSTS_N_INSNS (2),                    /* cost of movzx */
 528   8,                                    /* "large" insn */
 529   4,                                    /* MOVE_RATIO */
 530   3,                                    /* cost for loading QImode using movzbl */
 531   {4, 5, 4},                            /* cost of loading integer registers
 532                                            in QImode, HImode and SImode.
 533                                            Relative to reg-reg move (2).  */
 534   {2, 3, 2},                            /* cost of storing integer registers */
 535   4,                                    /* cost of reg,reg fld/fst */
 536   {6, 6, 6},                            /* cost of loading fp registers
 537                                            in SFmode, DFmode and XFmode */
 538   {4, 4, 4},                            /* cost of storing fp registers
 539                                            in SFmode, DFmode and XFmode */
 540   2,                                    /* cost of moving MMX register */
 541   {2, 2},                               /* cost of loading MMX registers
 542                                            in SImode and DImode */
 543   {2, 2},                               /* cost of storing MMX registers
 544                                            in SImode and DImode */
 545   2,                                    /* cost of moving SSE register */
 546   {2, 2, 8},                            /* cost of loading SSE registers
 547                                            in SImode, DImode and TImode */
 548   {2, 2, 8},                            /* cost of storing SSE registers
 549                                            in SImode, DImode and TImode */
 550   6,                                    /* MMX or SSE register to integer */
 551   32,                                   /* size of l1 cache.  */
 552   32,                                   /* size of l2 cache.  Some models
 553                                            have integrated l2 cache, but
 554                                            optimizing for k6 is not important
 555                                            enough to worry about that.  */
 556   32,                                   /* size of prefetch block */
 557   1,                                    /* number of parallel prefetches */
 558   1,                                    /* Branch cost */
 559   COSTS_N_INSNS (2),                    /* cost of FADD and FSUB insns.  */
 560   COSTS_N_INSNS (2),                    /* cost of FMUL instruction.  */
 561   COSTS_N_INSNS (56),                   /* cost of FDIV instruction.  */
 562   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 563   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 564   COSTS_N_INSNS (56),                   /* cost of FSQRT instruction.  */
 565   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 566    DUMMY_STRINGOP_ALGS},
 567   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 568    DUMMY_STRINGOP_ALGS},
 569   1,                                    /* scalar_stmt_cost.  */
 570   1,                                    /* scalar load_cost.  */
 571   1,                                    /* scalar_store_cost.  */
 572   1,                                    /* vec_stmt_cost.  */
 573   1,                                    /* vec_to_scalar_cost.  */
 574   1,                                    /* scalar_to_vec_cost.  */
 575   1,                                    /* vec_align_load_cost.  */
 576   2,                                    /* vec_unalign_load_cost.  */
 577   1,                                    /* vec_store_cost.  */
 578   3,                                    /* cond_taken_branch_cost.  */
 579   1,                                    /* cond_not_taken_branch_cost.  */
 580 };
 581
 582 static const
 583 struct processor_costs athlon_cost = {
 584   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 585   COSTS_N_INSNS (2),                    /* cost of a lea instruction */
 586   COSTS_N_INSNS (1),                    /* variable shift costs */
 587   COSTS_N_INSNS (1),                    /* constant shift costs */
 588   {COSTS_N_INSNS (5),                   /* cost of starting multiply for QI */
 589    COSTS_N_INSNS (5),                   /*                               HI */
 590    COSTS_N_INSNS (5),                   /*                               SI */
 591    COSTS_N_INSNS (5),                   /*                               DI */
 592    COSTS_N_INSNS (5)},                  /*                               other */
 593   0,                                    /* cost of multiply per each bit set */
 594   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
 595    COSTS_N_INSNS (26),                  /*                          HI */
 596    COSTS_N_INSNS (42),                  /*                          SI */
 597    COSTS_N_INSNS (74),                  /*                          DI */
 598    COSTS_N_INSNS (74)},                 /*                          other */
 599   COSTS_N_INSNS (1),                    /* cost of movsx */
 600   COSTS_N_INSNS (1),                    /* cost of movzx */
 601   8,                                    /* "large" insn */
 602   9,                                    /* MOVE_RATIO */
 603   4,                                    /* cost for loading QImode using movzbl */
 604   {3, 4, 3},                            /* cost of loading integer registers
 605                                            in QImode, HImode and SImode.
 606                                            Relative to reg-reg move (2).  */
 607   {3, 4, 3},                            /* cost of storing integer registers */
 608   4,                                    /* cost of reg,reg fld/fst */
 609   {4, 4, 12},                           /* cost of loading fp registers
 610                                            in SFmode, DFmode and XFmode */
 611   {6, 6, 8},                            /* cost of storing fp registers
 612                                            in SFmode, DFmode and XFmode */
 613   2,                                    /* cost of moving MMX register */
 614   {4, 4},                               /* cost of loading MMX registers
 615                                            in SImode and DImode */
 616   {4, 4},                               /* cost of storing MMX registers
 617                                            in SImode and DImode */
 618   2,                                    /* cost of moving SSE register */
 619   {4, 4, 6},                            /* cost of loading SSE registers
 620                                            in SImode, DImode and TImode */
 621   {4, 4, 5},                            /* cost of storing SSE registers
 622                                            in SImode, DImode and TImode */
 623   5,                                    /* MMX or SSE register to integer */
 624   64,                                   /* size of l1 cache.  */
 625   256,                                  /* size of l2 cache.  */
 626   64,                                   /* size of prefetch block */
 627   6,                                    /* number of parallel prefetches */
 628   5,                                    /* Branch cost */
 629   COSTS_N_INSNS (4),                    /* cost of FADD and FSUB insns.  */
 630   COSTS_N_INSNS (4),                    /* cost of FMUL instruction.  */
 631   COSTS_N_INSNS (24),                   /* cost of FDIV instruction.  */
 632   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 633   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 634   COSTS_N_INSNS (35),                   /* cost of FSQRT instruction.  */
 635   /* For some reason, Athlon deals better with REP prefix (relative to loops)
 636      compared to K8. Alignment becomes important after 8 bytes for memcpy and
 637      128 bytes for memset.  */
 638   {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
 639    DUMMY_STRINGOP_ALGS},
 640   {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
 641    DUMMY_STRINGOP_ALGS},
 642   1,                                    /* scalar_stmt_cost.  */
 643   1,                                    /* scalar load_cost.  */
 644   1,                                    /* scalar_store_cost.  */
 645   1,                                    /* vec_stmt_cost.  */
 646   1,                                    /* vec_to_scalar_cost.  */
 647   1,                                    /* scalar_to_vec_cost.  */
 648   1,                                    /* vec_align_load_cost.  */
 649   2,                                    /* vec_unalign_load_cost.  */
 650   1,                                    /* vec_store_cost.  */
 651   3,                                    /* cond_taken_branch_cost.  */
 652   1,                                    /* cond_not_taken_branch_cost.  */
 653 };
 654
 655 static const
 656 struct processor_costs k8_cost = {
 657   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 658   COSTS_N_INSNS (2),                    /* cost of a lea instruction */
 659   COSTS_N_INSNS (1),                    /* variable shift costs */
 660   COSTS_N_INSNS (1),                    /* constant shift costs */
 661   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 662    COSTS_N_INSNS (4),                   /*                               HI */
 663    COSTS_N_INSNS (3),                   /*                               SI */
 664    COSTS_N_INSNS (4),                   /*                               DI */
 665    COSTS_N_INSNS (5)},                  /*                               other */
 666   0,                                    /* cost of multiply per each bit set */
 667   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
 668    COSTS_N_INSNS (26),                  /*                          HI */
 669    COSTS_N_INSNS (42),                  /*                          SI */
 670    COSTS_N_INSNS (74),                  /*                          DI */
 671    COSTS_N_INSNS (74)},                 /*                          other */
 672   COSTS_N_INSNS (1),                    /* cost of movsx */
 673   COSTS_N_INSNS (1),                    /* cost of movzx */
 674   8,                                    /* "large" insn */
 675   9,                                    /* MOVE_RATIO */
 676   4,                                    /* cost for loading QImode using movzbl */
 677   {3, 4, 3},                            /* cost of loading integer registers
 678                                            in QImode, HImode and SImode.
 679                                            Relative to reg-reg move (2).  */
 680   {3, 4, 3},                            /* cost of storing integer registers */
 681   4,                                    /* cost of reg,reg fld/fst */
 682   {4, 4, 12},                           /* cost of loading fp registers
 683                                            in SFmode, DFmode and XFmode */
 684   {6, 6, 8},                            /* cost of storing fp registers
 685                                            in SFmode, DFmode and XFmode */
 686   2,                                    /* cost of moving MMX register */
 687   {3, 3},                               /* cost of loading MMX registers
 688                                            in SImode and DImode */
 689   {4, 4},                               /* cost of storing MMX registers
 690                                            in SImode and DImode */
 691   2,                                    /* cost of moving SSE register */
 692   {4, 3, 6},                            /* cost of loading SSE registers
 693                                            in SImode, DImode and TImode */
 694   {4, 4, 5},                            /* cost of storing SSE registers
 695                                            in SImode, DImode and TImode */
 696   5,                                    /* MMX or SSE register to integer */
 697   64,                                   /* size of l1 cache.  */
 698   512,                                  /* size of l2 cache.  */
 699   64,                                   /* size of prefetch block */
 700   /* New AMD processors never drop prefetches; if they cannot be performed
 701      immediately, they are queued.  We set number of simultaneous prefetches
 702      to a large constant to reflect this (it probably is not a good idea not
 703      to limit number of prefetches at all, as their execution also takes some
 704      time).  */
 705   100,                                  /* number of parallel prefetches */
 706   3,                                    /* Branch cost */
 707   COSTS_N_INSNS (4),                    /* cost of FADD and FSUB insns.  */
 708   COSTS_N_INSNS (4),                    /* cost of FMUL instruction.  */
 709   COSTS_N_INSNS (19),                   /* cost of FDIV instruction.  */
 710   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 711   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 712   COSTS_N_INSNS (35),                   /* cost of FSQRT instruction.  */
 713   /* K8 has optimized REP instruction for medium sized blocks, but for very small
 714      blocks it is better to use loop. For large blocks, libcall can do
 715      nontemporary accesses and beat inline considerably.  */
 716   {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
 717    {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 718   {{libcall, {{8, loop}, {24, unrolled_loop},
 719               {2048, rep_prefix_4_byte}, {-1, libcall}}},
 720    {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 721   4,                                    /* scalar_stmt_cost.  */
 722   2,                                    /* scalar load_cost.  */
 723   2,                                    /* scalar_store_cost.  */
 724   5,                                    /* vec_stmt_cost.  */
 725   0,                                    /* vec_to_scalar_cost.  */
 726   2,                                    /* scalar_to_vec_cost.  */
 727   2,                                    /* vec_align_load_cost.  */
 728   3,                                    /* vec_unalign_load_cost.  */
 729   3,                                    /* vec_store_cost.  */
 730   3,                                    /* cond_taken_branch_cost.  */
 731   2,                                    /* cond_not_taken_branch_cost.  */
 732 };
 733
 734 struct processor_costs amdfam10_cost = {
 735   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 736   COSTS_N_INSNS (2),                    /* cost of a lea instruction */
 737   COSTS_N_INSNS (1),                    /* variable shift costs */
 738   COSTS_N_INSNS (1),                    /* constant shift costs */
 739   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 740    COSTS_N_INSNS (4),                   /*                               HI */
 741    COSTS_N_INSNS (3),                   /*                               SI */
 742    COSTS_N_INSNS (4),                   /*                               DI */
 743    COSTS_N_INSNS (5)},                  /*                               other */
 744   0,                                    /* cost of multiply per each bit set */
 745   {COSTS_N_INSNS (19),                  /* cost of a divide/mod for QI */
 746    COSTS_N_INSNS (35),                  /*                          HI */
 747    COSTS_N_INSNS (51),                  /*                          SI */
 748    COSTS_N_INSNS (83),                  /*                          DI */
 749    COSTS_N_INSNS (83)},                 /*                          other */
 750   COSTS_N_INSNS (1),                    /* cost of movsx */
 751   COSTS_N_INSNS (1),                    /* cost of movzx */
 752   8,                                    /* "large" insn */
 753   9,                                    /* MOVE_RATIO */
 754   4,                                    /* cost for loading QImode using movzbl */
 755   {3, 4, 3},                            /* cost of loading integer registers
 756                                            in QImode, HImode and SImode.
 757                                            Relative to reg-reg move (2).  */
 758   {3, 4, 3},                            /* cost of storing integer registers */
 759   4,                                    /* cost of reg,reg fld/fst */
 760   {4, 4, 12},                           /* cost of loading fp registers
 761                                            in SFmode, DFmode and XFmode */
 762   {6, 6, 8},                            /* cost of storing fp registers
 763                                            in SFmode, DFmode and XFmode */
 764   2,                                    /* cost of moving MMX register */
 765   {3, 3},                               /* cost of loading MMX registers
 766                                            in SImode and DImode */
 767   {4, 4},                               /* cost of storing MMX registers
 768                                            in SImode and DImode */
 769   2,                                    /* cost of moving SSE register */
 770   {4, 4, 3},                            /* cost of loading SSE registers
 771                                            in SImode, DImode and TImode */
 772   {4, 4, 5},                            /* cost of storing SSE registers
 773                                            in SImode, DImode and TImode */
 774   3,                                    /* MMX or SSE register to integer */
 775                                         /* On K8
 776                                             MOVD reg64, xmmreg  Double  FSTORE 4
 777                                             MOVD reg32, xmmreg  Double  FSTORE 4
 778                                            On AMDFAM10
 779                                             MOVD reg64, xmmreg  Double  FADD 3
 780                                                                 1/1  1/1
 781                                             MOVD reg32, xmmreg  Double  FADD 3
 782                                                                 1/1  1/1 */
 783   64,                                   /* size of l1 cache.  */
 784   512,                                  /* size of l2 cache.  */
 785   64,                                   /* size of prefetch block */
 786   /* New AMD processors never drop prefetches; if they cannot be performed
 787      immediately, they are queued.  We set number of simultaneous prefetches
 788      to a large constant to reflect this (it probably is not a good idea not
 789      to limit number of prefetches at all, as their execution also takes some
 790      time).  */
 791   100,                                  /* number of parallel prefetches */
 792   2,                                    /* Branch cost */
 793   COSTS_N_INSNS (4),                    /* cost of FADD and FSUB insns.  */
 794   COSTS_N_INSNS (4),                    /* cost of FMUL instruction.  */
 795   COSTS_N_INSNS (19),                   /* cost of FDIV instruction.  */
 796   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 797   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 798   COSTS_N_INSNS (35),                   /* cost of FSQRT instruction.  */
 799
 800   /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
 801      very small blocks it is better to use loop. For large blocks, libcall can
 802      do nontemporary accesses and beat inline considerably.  */
 803   {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
 804    {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 805   {{libcall, {{8, loop}, {24, unrolled_loop},
 806               {2048, rep_prefix_4_byte}, {-1, libcall}}},
 807    {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 808   4,                                    /* scalar_stmt_cost.  */
 809   2,                                    /* scalar load_cost.  */
 810   2,                                    /* scalar_store_cost.  */
 811   6,                                    /* vec_stmt_cost.  */
 812   0,                                    /* vec_to_scalar_cost.  */
 813   2,                                    /* scalar_to_vec_cost.  */
 814   2,                                    /* vec_align_load_cost.  */
 815   2,                                    /* vec_unalign_load_cost.  */
 816   2,                                    /* vec_store_cost.  */
 817   2,                                    /* cond_taken_branch_cost.  */
 818   1,                                    /* cond_not_taken_branch_cost.  */
 819 };
 820
 821 static const
 822 struct processor_costs pentium4_cost = {
 823   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 824   COSTS_N_INSNS (3),                    /* cost of a lea instruction */
 825   COSTS_N_INSNS (4),                    /* variable shift costs */
 826   COSTS_N_INSNS (4),                    /* constant shift costs */
 827   {COSTS_N_INSNS (15),                  /* cost of starting multiply for QI */
 828    COSTS_N_INSNS (15),                  /*                               HI */
 829    COSTS_N_INSNS (15),                  /*                               SI */
 830    COSTS_N_INSNS (15),                  /*                               DI */
 831    COSTS_N_INSNS (15)},                 /*                               other */
 832   0,                                    /* cost of multiply per each bit set */
 833   {COSTS_N_INSNS (56),                  /* cost of a divide/mod for QI */
 834    COSTS_N_INSNS (56),                  /*                          HI */
 835    COSTS_N_INSNS (56),                  /*                          SI */
 836    COSTS_N_INSNS (56),                  /*                          DI */
 837    COSTS_N_INSNS (56)},                 /*                          other */
 838   COSTS_N_INSNS (1),                    /* cost of movsx */
 839   COSTS_N_INSNS (1),                    /* cost of movzx */
 840   16,                                   /* "large" insn */
 841   6,                                    /* MOVE_RATIO */
 842   2,                                    /* cost for loading QImode using movzbl */
 843   {4, 5, 4},                            /* cost of loading integer registers
 844                                            in QImode, HImode and SImode.
 845                                            Relative to reg-reg move (2).  */
 846   {2, 3, 2},                            /* cost of storing integer registers */
 847   2,                                    /* cost of reg,reg fld/fst */
 848   {2, 2, 6},                            /* cost of loading fp registers
 849                                            in SFmode, DFmode and XFmode */
 850   {4, 4, 6},                            /* cost of storing fp registers
 851                                            in SFmode, DFmode and XFmode */
 852   2,                                    /* cost of moving MMX register */
 853   {2, 2},                               /* cost of loading MMX registers
 854                                            in SImode and DImode */
 855   {2, 2},                               /* cost of storing MMX registers
 856                                            in SImode and DImode */
 857   12,                                   /* cost of moving SSE register */
 858   {12, 12, 12},                         /* cost of loading SSE registers
 859                                            in SImode, DImode and TImode */
 860   {2, 2, 8},                            /* cost of storing SSE registers
 861                                            in SImode, DImode and TImode */
 862   10,                                   /* MMX or SSE register to integer */
 863   8,                                    /* size of l1 cache.  */
 864   256,                                  /* size of l2 cache.  */
 865   64,                                   /* size of prefetch block */
 866   6,                                    /* number of parallel prefetches */
 867   2,                                    /* Branch cost */
 868   COSTS_N_INSNS (5),                    /* cost of FADD and FSUB insns.  */
 869   COSTS_N_INSNS (7),                    /* cost of FMUL instruction.  */
 870   COSTS_N_INSNS (43),                   /* cost of FDIV instruction.  */
 871   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 872   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 873   COSTS_N_INSNS (43),                   /* cost of FSQRT instruction.  */
 874   {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
 875    DUMMY_STRINGOP_ALGS},
 876   {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
 877    {-1, libcall}}},
 878    DUMMY_STRINGOP_ALGS},
 879   1,                                    /* scalar_stmt_cost.  */
 880   1,                                    /* scalar load_cost.  */
 881   1,                                    /* scalar_store_cost.  */
 882   1,                                    /* vec_stmt_cost.  */
 883   1,                                    /* vec_to_scalar_cost.  */
 884   1,                                    /* scalar_to_vec_cost.  */
 885   1,                                    /* vec_align_load_cost.  */
 886   2,                                    /* vec_unalign_load_cost.  */
 887   1,                                    /* vec_store_cost.  */
 888   3,                                    /* cond_taken_branch_cost.  */
 889   1,                                    /* cond_not_taken_branch_cost.  */
 890 };
 891
 892 static const
 893 struct processor_costs nocona_cost = {
 894   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 895   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 896   COSTS_N_INSNS (1),                    /* variable shift costs */
 897   COSTS_N_INSNS (1),                    /* constant shift costs */
 898   {COSTS_N_INSNS (10),                  /* cost of starting multiply for QI */
 899    COSTS_N_INSNS (10),                  /*                               HI */
 900    COSTS_N_INSNS (10),                  /*                               SI */
 901    COSTS_N_INSNS (10),                  /*                               DI */
 902    COSTS_N_INSNS (10)},                 /*                               other */
 903   0,                                    /* cost of multiply per each bit set */
 904   {COSTS_N_INSNS (66),                  /* cost of a divide/mod for QI */
 905    COSTS_N_INSNS (66),                  /*                          HI */
 906    COSTS_N_INSNS (66),                  /*                          SI */
 907    COSTS_N_INSNS (66),                  /*                          DI */
 908    COSTS_N_INSNS (66)},                 /*                          other */
 909   COSTS_N_INSNS (1),                    /* cost of movsx */
 910   COSTS_N_INSNS (1),                    /* cost of movzx */
 911   16,                                   /* "large" insn */
 912   17,                                   /* MOVE_RATIO */
 913   4,                                    /* cost for loading QImode using movzbl */
 914   {4, 4, 4},                            /* cost of loading integer registers
 915                                            in QImode, HImode and SImode.
 916                                            Relative to reg-reg move (2).  */
 917   {4, 4, 4},                            /* cost of storing integer registers */
 918   3,                                    /* cost of reg,reg fld/fst */
 919   {12, 12, 12},                         /* cost of loading fp registers
 920                                            in SFmode, DFmode and XFmode */
 921   {4, 4, 4},                            /* cost of storing fp registers
 922                                            in SFmode, DFmode and XFmode */
 923   6,                                    /* cost of moving MMX register */
 924   {12, 12},                             /* cost of loading MMX registers
 925                                            in SImode and DImode */
 926   {12, 12},                             /* cost of storing MMX registers
 927                                            in SImode and DImode */
 928   6,                                    /* cost of moving SSE register */
 929   {12, 12, 12},                         /* cost of loading SSE registers
 930                                            in SImode, DImode and TImode */
 931   {12, 12, 12},                         /* cost of storing SSE registers
 932                                            in SImode, DImode and TImode */
 933   8,                                    /* MMX or SSE register to integer */
 934   8,                                    /* size of l1 cache.  */
 935   1024,                                 /* size of l2 cache.  */
 936   128,                                  /* size of prefetch block */
 937   8,                                    /* number of parallel prefetches */
 938   1,                                    /* Branch cost */
 939   COSTS_N_INSNS (6),                    /* cost of FADD and FSUB insns.  */
 940   COSTS_N_INSNS (8),                    /* cost of FMUL instruction.  */
 941   COSTS_N_INSNS (40),                   /* cost of FDIV instruction.  */
 942   COSTS_N_INSNS (3),                    /* cost of FABS instruction.  */
 943   COSTS_N_INSNS (3),                    /* cost of FCHS instruction.  */
 944   COSTS_N_INSNS (44),                   /* cost of FSQRT instruction.  */
 945   {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
 946    {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
 947               {100000, unrolled_loop}, {-1, libcall}}}},
 948   {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
 949    {-1, libcall}}},
 950    {libcall, {{24, loop}, {64, unrolled_loop},
 951               {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 952   1,                                    /* scalar_stmt_cost.  */
 953   1,                                    /* scalar load_cost.  */
 954   1,                                    /* scalar_store_cost.  */
 955   1,                                    /* vec_stmt_cost.  */
 956   1,                                    /* vec_to_scalar_cost.  */
 957   1,                                    /* scalar_to_vec_cost.  */
 958   1,                                    /* vec_align_load_cost.  */
 959   2,                                    /* vec_unalign_load_cost.  */
 960   1,                                    /* vec_store_cost.  */
 961   3,                                    /* cond_taken_branch_cost.  */
 962   1,                                    /* cond_not_taken_branch_cost.  */
 963 };
 964
 965 static const
 966 struct processor_costs core2_cost = {
 967   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 968   COSTS_N_INSNS (1) + 1,                /* cost of a lea instruction */
 969   COSTS_N_INSNS (1),                    /* variable shift costs */
 970   COSTS_N_INSNS (1),                    /* constant shift costs */
 971   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 972    COSTS_N_INSNS (3),                   /*                               HI */
 973    COSTS_N_INSNS (3),                   /*                               SI */
 974    COSTS_N_INSNS (3),                   /*                               DI */
 975    COSTS_N_INSNS (3)},                  /*                               other */
 976   0,                                    /* cost of multiply per each bit set */
 977   {COSTS_N_INSNS (22),                  /* cost of a divide/mod for QI */
 978    COSTS_N_INSNS (22),                  /*                          HI */
 979    COSTS_N_INSNS (22),                  /*                          SI */
 980    COSTS_N_INSNS (22),                  /*                          DI */
 981    COSTS_N_INSNS (22)},                 /*                          other */
 982   COSTS_N_INSNS (1),                    /* cost of movsx */
 983   COSTS_N_INSNS (1),                    /* cost of movzx */
 984   8,                                    /* "large" insn */
 985   16,                                   /* MOVE_RATIO */
 986   2,                                    /* cost for loading QImode using movzbl */
 987   {6, 6, 6},                            /* cost of loading integer registers
 988                                            in QImode, HImode and SImode.
 989                                            Relative to reg-reg move (2).  */
 990   {4, 4, 4},                            /* cost of storing integer registers */
 991   2,                                    /* cost of reg,reg fld/fst */
 992   {6, 6, 6},                            /* cost of loading fp registers
 993                                            in SFmode, DFmode and XFmode */
 994   {4, 4, 4},                            /* cost of loading integer registers */
 995   2,                                    /* cost of moving MMX register */
 996   {6, 6},                               /* cost of loading MMX registers
 997                                            in SImode and DImode */
 998   {4, 4},                               /* cost of storing MMX registers
 999                                            in SImode and DImode */
1000   2,                                    /* cost of moving SSE register */
1001   {6, 6, 6},                            /* cost of loading SSE registers
1002                                            in SImode, DImode and TImode */
1003   {4, 4, 4},                            /* cost of storing SSE registers
1004                                            in SImode, DImode and TImode */
1005   2,                                    /* MMX or SSE register to integer */
1006   32,                                   /* size of l1 cache.  */
1007   2048,                                 /* size of l2 cache.  */
1008   128,                                  /* size of prefetch block */
1009   8,                                    /* number of parallel prefetches */
1010   3,                                    /* Branch cost */
1011   COSTS_N_INSNS (3),                    /* cost of FADD and FSUB insns.  */
1012   COSTS_N_INSNS (5),                    /* cost of FMUL instruction.  */
1013   COSTS_N_INSNS (32),                   /* cost of FDIV instruction.  */
1014   COSTS_N_INSNS (1),                    /* cost of FABS instruction.  */
1015   COSTS_N_INSNS (1),                    /* cost of FCHS instruction.  */
1016   COSTS_N_INSNS (58),                   /* cost of FSQRT instruction.  */
1017   {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1018    {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1019               {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1020   {{libcall, {{8, loop}, {15, unrolled_loop},
1021               {2048, rep_prefix_4_byte}, {-1, libcall}}},
1022    {libcall, {{24, loop}, {32, unrolled_loop},
1023               {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1024   1,                                    /* scalar_stmt_cost.  */
1025   1,                                    /* scalar load_cost.  */
1026   1,                                    /* scalar_store_cost.  */
1027   1,                                    /* vec_stmt_cost.  */
1028   1,                                    /* vec_to_scalar_cost.  */
1029   1,                                    /* scalar_to_vec_cost.  */
1030   1,                                    /* vec_align_load_cost.  */
1031   2,                                    /* vec_unalign_load_cost.  */
1032   1,                                    /* vec_store_cost.  */
1033   3,                                    /* cond_taken_branch_cost.  */
1034   1,                                    /* cond_not_taken_branch_cost.  */
1035 };
1036
1037 /* Generic64 should produce code tuned for Nocona and K8.  */
1038 static const
1039 struct processor_costs generic64_cost = {
1040   COSTS_N_INSNS (1),                    /* cost of an add instruction */
1041   /* On all chips taken into consideration lea is 2 cycles and more.  With
1042      this cost however our current implementation of synth_mult results in
1043      use of unnecessary temporary registers causing regression on several
1044      SPECfp benchmarks.  */
1045   COSTS_N_INSNS (1) + 1,                /* cost of a lea instruction */
1046   COSTS_N_INSNS (1),                    /* variable shift costs */
1047   COSTS_N_INSNS (1),                    /* constant shift costs */
1048   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
1049    COSTS_N_INSNS (4),                   /*                               HI */
1050    COSTS_N_INSNS (3),                   /*                               SI */
1051    COSTS_N_INSNS (4),                   /*                               DI */
1052    COSTS_N_INSNS (2)},                  /*                               other */
1053   0,                                    /* cost of multiply per each bit set */
1054   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
1055    COSTS_N_INSNS (26),                  /*                          HI */
1056    COSTS_N_INSNS (42),                  /*                          SI */
1057    COSTS_N_INSNS (74),                  /*                          DI */
1058    COSTS_N_INSNS (74)},                 /*                          other */
1059   COSTS_N_INSNS (1),                    /* cost of movsx */
1060   COSTS_N_INSNS (1),                    /* cost of movzx */
1061   8,                                    /* "large" insn */
1062   17,                                   /* MOVE_RATIO */
1063   4,                                    /* cost for loading QImode using movzbl */
1064   {4, 4, 4},                            /* cost of loading integer registers
1065                                            in QImode, HImode and SImode.
1066                                            Relative to reg-reg move (2).  */
1067   {4, 4, 4},                            /* cost of storing integer registers */
1068   4,                                    /* cost of reg,reg fld/fst */
1069   {12, 12, 12},                         /* cost of loading fp registers
1070                                            in SFmode, DFmode and XFmode */
1071   {6, 6, 8},                            /* cost of storing fp registers
1072                                            in SFmode, DFmode and XFmode */
1073   2,                                    /* cost of moving MMX register */
1074   {8, 8},                               /* cost of loading MMX registers
1075                                            in SImode and DImode */
1076   {8, 8},                               /* cost of storing MMX registers
1077                                            in SImode and DImode */
1078   2,                                    /* cost of moving SSE register */
1079   {8, 8, 8},                            /* cost of loading SSE registers
1080                                            in SImode, DImode and TImode */
1081   {8, 8, 8},                            /* cost of storing SSE registers
1082                                            in SImode, DImode and TImode */
1083   5,                                    /* MMX or SSE register to integer */
1084   32,                                   /* size of l1 cache.  */
1085   512,                                  /* size of l2 cache.  */
1086   64,                                   /* size of prefetch block */
1087   6,                                    /* number of parallel prefetches */
1088   /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1089      is increased to perhaps more appropriate value of 5.  */
1090   3,                                    /* Branch cost */
1091   COSTS_N_INSNS (8),                    /* cost of FADD and FSUB insns.  */
1092   COSTS_N_INSNS (8),                    /* cost of FMUL instruction.  */
1093   COSTS_N_INSNS (20),                   /* cost of FDIV instruction.  */
1094   COSTS_N_INSNS (8),                    /* cost of FABS instruction.  */
1095   COSTS_N_INSNS (8),                    /* cost of FCHS instruction.  */
1096   COSTS_N_INSNS (40),                   /* cost of FSQRT instruction.  */
1097   {DUMMY_STRINGOP_ALGS,
1098    {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099   {DUMMY_STRINGOP_ALGS,
1100    {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101   1,                                    /* scalar_stmt_cost.  */
1102   1,                                    /* scalar load_cost.  */
1103   1,                                    /* scalar_store_cost.  */
1104   1,                                    /* vec_stmt_cost.  */
1105   1,                                    /* vec_to_scalar_cost.  */
1106   1,                                    /* scalar_to_vec_cost.  */
1107   1,                                    /* vec_align_load_cost.  */
1108   2,                                    /* vec_unalign_load_cost.  */
1109   1,                                    /* vec_store_cost.  */
1110   3,                                    /* cond_taken_branch_cost.  */
1111   1,                                    /* cond_not_taken_branch_cost.  */
1112 };
1113
1114 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8.  */
1115 static const
1116 struct processor_costs generic32_cost = {
1117   COSTS_N_INSNS (1),                    /* cost of an add instruction */
1118   COSTS_N_INSNS (1) + 1,                /* cost of a lea instruction */
1119   COSTS_N_INSNS (1),                    /* variable shift costs */
1120   COSTS_N_INSNS (1),                    /* constant shift costs */
1121   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
1122    COSTS_N_INSNS (4),                   /*                               HI */
1123    COSTS_N_INSNS (3),                   /*                               SI */
1124    COSTS_N_INSNS (4),                   /*                               DI */
1125    COSTS_N_INSNS (2)},                  /*                               other */
1126   0,                                    /* cost of multiply per each bit set */
1127   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
1128    COSTS_N_INSNS (26),                  /*                          HI */
1129    COSTS_N_INSNS (42),                  /*                          SI */
1130    COSTS_N_INSNS (74),                  /*                          DI */
1131    COSTS_N_INSNS (74)},                 /*                          other */
1132   COSTS_N_INSNS (1),                    /* cost of movsx */
1133   COSTS_N_INSNS (1),                    /* cost of movzx */
1134   8,                                    /* "large" insn */
1135   17,                                   /* MOVE_RATIO */
1136   4,                                    /* cost for loading QImode using movzbl */
1137   {4, 4, 4},                            /* cost of loading integer registers
1138                                            in QImode, HImode and SImode.
1139                                            Relative to reg-reg move (2).  */
1140   {4, 4, 4},                            /* cost of storing integer registers */
1141   4,                                    /* cost of reg,reg fld/fst */
1142   {12, 12, 12},                         /* cost of loading fp registers
1143                                            in SFmode, DFmode and XFmode */
1144   {6, 6, 8},                            /* cost of storing fp registers
1145                                            in SFmode, DFmode and XFmode */
1146   2,                                    /* cost of moving MMX register */
1147   {8, 8},                               /* cost of loading MMX registers
1148                                            in SImode and DImode */
1149   {8, 8},                               /* cost of storing MMX registers
1150                                            in SImode and DImode */
1151   2,                                    /* cost of moving SSE register */
1152   {8, 8, 8},                            /* cost of loading SSE registers
1153                                            in SImode, DImode and TImode */
1154   {8, 8, 8},                            /* cost of storing SSE registers
1155                                            in SImode, DImode and TImode */
1156   5,                                    /* MMX or SSE register to integer */
1157   32,                                   /* size of l1 cache.  */
1158   256,                                  /* size of l2 cache.  */
1159   64,                                   /* size of prefetch block */
1160   6,                                    /* number of parallel prefetches */
1161   3,                                    /* Branch cost */
1162   COSTS_N_INSNS (8),                    /* cost of FADD and FSUB insns.  */
1163   COSTS_N_INSNS (8),                    /* cost of FMUL instruction.  */
1164   COSTS_N_INSNS (20),                   /* cost of FDIV instruction.  */
1165   COSTS_N_INSNS (8),                    /* cost of FABS instruction.  */
1166   COSTS_N_INSNS (8),                    /* cost of FCHS instruction.  */
1167   COSTS_N_INSNS (40),                   /* cost of FSQRT instruction.  */
1168   {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1169    DUMMY_STRINGOP_ALGS},
1170   {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171    DUMMY_STRINGOP_ALGS},
1172   1,                                    /* scalar_stmt_cost.  */
1173   1,                                    /* scalar load_cost.  */
1174   1,                                    /* scalar_store_cost.  */
1175   1,                                    /* vec_stmt_cost.  */
1176   1,                                    /* vec_to_scalar_cost.  */
1177   1,                                    /* scalar_to_vec_cost.  */
1178   1,                                    /* vec_align_load_cost.  */
1179   2,                                    /* vec_unalign_load_cost.  */
1180   1,                                    /* vec_store_cost.  */
1181   3,                                    /* cond_taken_branch_cost.  */
1182   1,                                    /* cond_not_taken_branch_cost.  */
1183 };
1184
1185 const struct processor_costs *ix86_cost = &pentium_cost;
1186
1187 /* Processor feature/optimization bitmasks.  */
1188 #define m_386 (1<<PROCESSOR_I386)
1189 #define m_486 (1<<PROCESSOR_I486)
1190 #define m_PENT (1<<PROCESSOR_PENTIUM)
1191 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1192 #define m_PENT4  (1<<PROCESSOR_PENTIUM4)
1193 #define m_NOCONA  (1<<PROCESSOR_NOCONA)
1194 #define m_CORE2  (1<<PROCESSOR_CORE2)
1195
1196 #define m_GEODE  (1<<PROCESSOR_GEODE)
1197 #define m_K6  (1<<PROCESSOR_K6)
1198 #define m_K6_GEODE  (m_K6 | m_GEODE)
1199 #define m_K8  (1<<PROCESSOR_K8)
1200 #define m_ATHLON  (1<<PROCESSOR_ATHLON)
1201 #define m_ATHLON_K8  (m_K8 | m_ATHLON)
1202 #define m_AMDFAM10  (1<<PROCESSOR_AMDFAM10)
1203 #define m_AMD_MULTIPLE  (m_K8 | m_ATHLON | m_AMDFAM10)
1204
1205 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1206 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1207
1208 /* Generic instruction choice should be common subset of supported CPUs
1209    (PPro/PENT4/NOCONA/CORE2/Athlon/K8).  */
1210 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1211
1212 /* Feature tests against the various tunings.  */
1213 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1214   /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1215      negatively, so enabling for Generic64 seems like good code size
1216      tradeoff.  We can't enable it for 32bit generic because it does not
1217      work well with PPro base chips.  */
1218   m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1219
1220   /* X86_TUNE_PUSH_MEMORY */
1221   m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1222   | m_NOCONA | m_CORE2 | m_GENERIC,
1223
1224   /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1225   m_486 | m_PENT,
1226
1227   /* X86_TUNE_USE_BIT_TEST */
1228   m_386,
1229
1230   /* X86_TUNE_UNROLL_STRLEN */
1231   m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1232
1233   /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1234   m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1235
1236   /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1237      on simulation result. But after P4 was made, no performance benefit
1238      was observed with branch hints.  It also increases the code size.
1239      As a result, icc never generates branch hints.  */
1240   0,
1241
1242   /* X86_TUNE_DOUBLE_WITH_ADD */
1243   ~m_386,
1244
1245   /* X86_TUNE_USE_SAHF */
1246   m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1247   | m_NOCONA | m_CORE2 | m_GENERIC,
1248
1249   /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1250      partial dependencies.  */
1251   m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1252   | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1253
1254   /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1255      register stalls on Generic32 compilation setting as well.  However
1256      in current implementation the partial register stalls are not eliminated
1257      very well - they can be introduced via subregs synthesized by combine
1258      and can happen in caller/callee saving sequences.  Because this option
1259      pays back little on PPro based chips and is in conflict with partial reg
1260      dependencies used by Athlon/P4 based chips, it is better to leave it off
1261      for generic32 for now.  */
1262   m_PPRO,
1263
1264   /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1265   m_CORE2 | m_GENERIC,
1266
1267   /* X86_TUNE_USE_HIMODE_FIOP */
1268   m_386 | m_486 | m_K6_GEODE,
1269
1270   /* X86_TUNE_USE_SIMODE_FIOP */
1271   ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1272
1273   /* X86_TUNE_USE_MOV0 */
1274   m_K6,
1275
1276   /* X86_TUNE_USE_CLTD */
1277   ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1278
1279   /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx.  */
1280   m_PENT4,
1281
1282   /* X86_TUNE_SPLIT_LONG_MOVES */
1283   m_PPRO,
1284
1285   /* X86_TUNE_READ_MODIFY_WRITE */
1286   ~m_PENT,
1287
1288   /* X86_TUNE_READ_MODIFY */
1289   ~(m_PENT | m_PPRO),
1290
1291   /* X86_TUNE_PROMOTE_QIMODE */
1292   m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1293   | m_GENERIC /* | m_PENT4 ? */,
1294
1295   /* X86_TUNE_FAST_PREFIX */
1296   ~(m_PENT | m_486 | m_386),
1297
1298   /* X86_TUNE_SINGLE_STRINGOP */
1299   m_386 | m_PENT4 | m_NOCONA,
1300
1301   /* X86_TUNE_QIMODE_MATH */
1302   ~0,
1303
1304   /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1305      register stalls.  Just like X86_TUNE_PARTIAL_REG_STALL this option
1306      might be considered for Generic32 if our scheme for avoiding partial
1307      stalls was more effective.  */
1308   ~m_PPRO,
1309
1310   /* X86_TUNE_PROMOTE_QI_REGS */
1311   0,
1312
1313   /* X86_TUNE_PROMOTE_HI_REGS */
1314   m_PPRO,
1315
1316   /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop.  */
1317   m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1318
1319   /* X86_TUNE_ADD_ESP_8 */
1320   m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1321   | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1322
1323   /* X86_TUNE_SUB_ESP_4 */
1324   m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1325
1326   /* X86_TUNE_SUB_ESP_8 */
1327   m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1328   | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1329
1330   /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1331      for DFmode copies */
1332   ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1333     | m_GENERIC | m_GEODE),
1334
1335   /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1336   m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1337
1338   /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1339      conflict here in between PPro/Pentium4 based chips that thread 128bit
1340      SSE registers as single units versus K8 based chips that divide SSE
1341      registers to two 64bit halves.  This knob promotes all store destinations
1342      to be 128bit to allow register renaming on 128bit SSE units, but usually
1343      results in one extra microop on 64bit SSE units.  Experimental results
1344      shows that disabling this option on P4 brings over 20% SPECfp regression,
1345      while enabling it on K8 brings roughly 2.4% regression that can be partly
1346      masked by careful scheduling of moves.  */
1347   m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1348
1349   /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1350   m_AMDFAM10,
1351
1352   /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1353      are resolved on SSE register parts instead of whole registers, so we may
1354      maintain just lower part of scalar values in proper format leaving the
1355      upper part undefined.  */
1356   m_ATHLON_K8,
1357
1358   /* X86_TUNE_SSE_TYPELESS_STORES */
1359   m_AMD_MULTIPLE,
1360
1361   /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1362   m_PPRO | m_PENT4 | m_NOCONA,
1363
1364   /* X86_TUNE_MEMORY_MISMATCH_STALL */
1365   m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1366
1367   /* X86_TUNE_PROLOGUE_USING_MOVE */
1368   m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1369
1370   /* X86_TUNE_EPILOGUE_USING_MOVE */
1371   m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1372
1373   /* X86_TUNE_SHIFT1 */
1374   ~m_486,
1375
1376   /* X86_TUNE_USE_FFREEP */
1377   m_AMD_MULTIPLE,
1378
1379   /* X86_TUNE_INTER_UNIT_MOVES */
1380   ~(m_AMD_MULTIPLE | m_GENERIC),
1381
1382   /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1383   ~(m_AMDFAM10),
1384
1385   /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1386      than 4 branch instructions in the 16 byte window.  */
1387   m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1388
1389   /* X86_TUNE_SCHEDULE */
1390   m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1391
1392   /* X86_TUNE_USE_BT */
1393   m_AMD_MULTIPLE,
1394
1395   /* X86_TUNE_USE_INCDEC */
1396   ~(m_PENT4 | m_NOCONA | m_GENERIC),
1397
1398   /* X86_TUNE_PAD_RETURNS */
1399   m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1400
1401   /* X86_TUNE_EXT_80387_CONSTANTS */
1402   m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1403
1404   /* X86_TUNE_SHORTEN_X87_SSE */
1405   ~m_K8,
1406
1407   /* X86_TUNE_AVOID_VECTOR_DECODE */
1408   m_K8 | m_GENERIC64,
1409
1410   /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1411      and SImode multiply, but 386 and 486 do HImode multiply faster.  */
1412   ~(m_386 | m_486),
1413
1414   /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1415      vector path on AMD machines.  */
1416   m_K8 | m_GENERIC64 | m_AMDFAM10,
1417
1418   /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1419      machines.  */
1420   m_K8 | m_GENERIC64 | m_AMDFAM10,
1421
1422   /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1423      than a MOV.  */
1424   m_PENT,
1425
1426   /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1427      but one byte longer.  */
1428   m_PENT,
1429
1430   /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1431      operand that cannot be represented using a modRM byte.  The XOR
1432      replacement is long decoded, so this split helps here as well.  */
1433   m_K6,
1434
1435   /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1436      from integer to FP. */
1437   m_AMDFAM10,
1438 };
1439
1440 /* Feature tests against the various architecture variations.  */
1441 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1442   /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro.  */
1443   ~(m_386 | m_486 | m_PENT | m_K6),
1444
1445   /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486.  */
1446   ~m_386,
1447
1448   /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1449   ~(m_386 | m_486),
1450
1451   /* X86_ARCH_XADD: Exchange and add was added for 80486.  */
1452   ~m_386,
1453
1454   /* X86_ARCH_BSWAP: Byteswap was added for 80486.  */
1455   ~m_386,
1456 };
1457
1458 static const unsigned int x86_accumulate_outgoing_args
1459   = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1460
1461 static const unsigned int x86_arch_always_fancy_math_387
1462   = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1463     | m_NOCONA | m_CORE2 | m_GENERIC;
1464
1465 static enum stringop_alg stringop_alg = no_stringop;
1466
1467 /* In case the average insn count for single function invocation is
1468    lower than this constant, emit fast (but longer) prologue and
1469    epilogue code.  */
1470 #define FAST_PROLOGUE_INSN_COUNT 20
1471
1472 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
1473 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1474 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1475 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1476
1477 /* Array of the smallest class containing reg number REGNO, indexed by
1478    REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
1479
1480 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1481 {
1482   /* ax, dx, cx, bx */
1483   AREG, DREG, CREG, BREG,
1484   /* si, di, bp, sp */
1485   SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1486   /* FP registers */
1487   FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1488   FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1489   /* arg pointer */
1490   NON_Q_REGS,
1491   /* flags, fpsr, fpcr, frame */
1492   NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1493   /* SSE registers */
1494   SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1495   SSE_REGS, SSE_REGS,
1496   /* MMX registers */
1497   MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1498   MMX_REGS, MMX_REGS,
1499   /* REX registers */
1500   NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1501   NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1502   /* SSE REX registers */
1503   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1504   SSE_REGS, SSE_REGS,
1505 };
1506
1507 /* The "default" register map used in 32bit mode.  */
1508
1509 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1510 {
1511   0, 2, 1, 3, 6, 7, 4, 5,               /* general regs */
1512   12, 13, 14, 15, 16, 17, 18, 19,       /* fp regs */
1513   -1, -1, -1, -1, -1,                   /* arg, flags, fpsr, fpcr, frame */
1514   21, 22, 23, 24, 25, 26, 27, 28,       /* SSE */
1515   29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
1516   -1, -1, -1, -1, -1, -1, -1, -1,       /* extended integer registers */
1517   -1, -1, -1, -1, -1, -1, -1, -1,       /* extended SSE registers */
1518 };
1519
1520 static int const x86_64_int_parameter_registers[6] =
1521 {
1522   5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1523   FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1524 };
1525
1526 static int const x86_64_ms_abi_int_parameter_registers[4] =
1527 {
1528   2 /*RCX*/, 1 /*RDX*/,
1529   FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1530 };
1531
1532 static int const x86_64_int_return_registers[4] =
1533 {
1534   0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1535 };
1536
1537 /* The "default" register map used in 64bit mode.  */
1538 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1539 {
1540   0, 1, 2, 3, 4, 5, 6, 7,               /* general regs */
1541   33, 34, 35, 36, 37, 38, 39, 40,       /* fp regs */
1542   -1, -1, -1, -1, -1,                   /* arg, flags, fpsr, fpcr, frame */
1543   17, 18, 19, 20, 21, 22, 23, 24,       /* SSE */
1544   41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
1545   8,9,10,11,12,13,14,15,                /* extended integer registers */
1546   25, 26, 27, 28, 29, 30, 31, 32,       /* extended SSE registers */
1547 };
1548
1549 /* Define the register numbers to be used in Dwarf debugging information.
1550    The SVR4 reference port C compiler uses the following register numbers
1551    in its Dwarf output code:
1552         0 for %eax (gcc regno = 0)
1553         1 for %ecx (gcc regno = 2)
1554         2 for %edx (gcc regno = 1)
1555         3 for %ebx (gcc regno = 3)
1556         4 for %esp (gcc regno = 7)
1557         5 for %ebp (gcc regno = 6)
1558         6 for %esi (gcc regno = 4)
1559         7 for %edi (gcc regno = 5)
1560    The following three DWARF register numbers are never generated by
1561    the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1562    believes these numbers have these meanings.
1563         8  for %eip    (no gcc equivalent)
1564         9  for %eflags (gcc regno = 17)
1565         10 for %trapno (no gcc equivalent)
1566    It is not at all clear how we should number the FP stack registers
1567    for the x86 architecture.  If the version of SDB on x86/svr4 were
1568    a bit less brain dead with respect to floating-point then we would
1569    have a precedent to follow with respect to DWARF register numbers
1570    for x86 FP registers, but the SDB on x86/svr4 is so completely
1571    broken with respect to FP registers that it is hardly worth thinking
1572    of it as something to strive for compatibility with.
1573    The version of x86/svr4 SDB I have at the moment does (partially)
1574    seem to believe that DWARF register number 11 is associated with
1575    the x86 register %st(0), but that's about all.  Higher DWARF
1576    register numbers don't seem to be associated with anything in
1577    particular, and even for DWARF regno 11, SDB only seems to under-
1578    stand that it should say that a variable lives in %st(0) (when
1579    asked via an `=' command) if we said it was in DWARF regno 11,
1580    but SDB still prints garbage when asked for the value of the
1581    variable in question (via a `/' command).
1582    (Also note that the labels SDB prints for various FP stack regs
1583    when doing an `x' command are all wrong.)
1584    Note that these problems generally don't affect the native SVR4
1585    C compiler because it doesn't allow the use of -O with -g and
1586    because when it is *not* optimizing, it allocates a memory
1587    location for each floating-point variable, and the memory
1588    location is what gets described in the DWARF AT_location
1589    attribute for the variable in question.
1590    Regardless of the severe mental illness of the x86/svr4 SDB, we
1591    do something sensible here and we use the following DWARF
1592    register numbers.  Note that these are all stack-top-relative
1593    numbers.
1594         11 for %st(0) (gcc regno = 8)
1595         12 for %st(1) (gcc regno = 9)
1596         13 for %st(2) (gcc regno = 10)
1597         14 for %st(3) (gcc regno = 11)
1598         15 for %st(4) (gcc regno = 12)
1599         16 for %st(5) (gcc regno = 13)
1600         17 for %st(6) (gcc regno = 14)
1601         18 for %st(7) (gcc regno = 15)
1602 */
1603 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1604 {
1605   0, 2, 1, 3, 6, 7, 5, 4,               /* general regs */
1606   11, 12, 13, 14, 15, 16, 17, 18,       /* fp regs */
1607   -1, 9, -1, -1, -1,                    /* arg, flags, fpsr, fpcr, frame */
1608   21, 22, 23, 24, 25, 26, 27, 28,       /* SSE registers */
1609   29, 30, 31, 32, 33, 34, 35, 36,       /* MMX registers */
1610   -1, -1, -1, -1, -1, -1, -1, -1,       /* extended integer registers */
1611   -1, -1, -1, -1, -1, -1, -1, -1,       /* extended SSE registers */
1612 };
1613
1614 /* Test and compare insns in i386.md store the information needed to
1615    generate branch and scc insns here.  */
1616
1617 rtx ix86_compare_op0 = NULL_RTX;
1618 rtx ix86_compare_op1 = NULL_RTX;
1619 rtx ix86_compare_emitted = NULL_RTX;
1620
1621 /* Size of the register save area.  */
1622 #define X86_64_VARARGS_SIZE (X86_64_REGPARM_MAX * UNITS_PER_WORD + X86_64_SSE_REGPARM_MAX * 16)
1623
1624 /* Define the structure for the machine field in struct function.  */
1625
1626 struct stack_local_entry GTY(())
1627 {
1628   unsigned short mode;
1629   unsigned short n;
1630   rtx rtl;
1631   struct stack_local_entry *next;
1632 };
1633
1634 /* Structure describing stack frame layout.
1635    Stack grows downward:
1636
1637    [arguments]
1638                                               <- ARG_POINTER
1639    saved pc
1640
1641    saved frame pointer if frame_pointer_needed
1642                                               <- HARD_FRAME_POINTER
1643    [saved regs]
1644
1645    [padding1]          \
1646                         )
1647    [va_arg registers]  (
1648                         > to_allocate         <- FRAME_POINTER
1649    [frame]             (
1650                         )
1651    [padding2]          /
1652   */
1653 struct ix86_frame
1654 {
1655   int nregs;
1656   int padding1;
1657   int va_arg_size;
1658   HOST_WIDE_INT frame;
1659   int padding2;
1660   int outgoing_arguments_size;
1661   int red_zone_size;
1662
1663   HOST_WIDE_INT to_allocate;
1664   /* The offsets relative to ARG_POINTER.  */
1665   HOST_WIDE_INT frame_pointer_offset;
1666   HOST_WIDE_INT hard_frame_pointer_offset;
1667   HOST_WIDE_INT stack_pointer_offset;
1668
1669   /* When save_regs_using_mov is set, emit prologue using
1670      move instead of push instructions.  */
1671   bool save_regs_using_mov;
1672 };
1673
1674 /* Code model option.  */
1675 enum cmodel ix86_cmodel;
1676 /* Asm dialect.  */
1677 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1678 /* TLS dialects.  */
1679 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1680
1681 /* Which unit we are generating floating point math for.  */
1682 enum fpmath_unit ix86_fpmath;
1683
1684 /* Which cpu are we scheduling for.  */
1685 enum processor_type ix86_tune;
1686
1687 /* Which instruction set architecture to use.  */
1688 enum processor_type ix86_arch;
1689
1690 /* true if sse prefetch instruction is not NOOP.  */
1691 int x86_prefetch_sse;
1692
1693 /* ix86_regparm_string as a number */
1694 static int ix86_regparm;
1695
1696 /* -mstackrealign option */
1697 extern int ix86_force_align_arg_pointer;
1698 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1699
1700 /* Preferred alignment for stack boundary in bits.  */
1701 unsigned int ix86_preferred_stack_boundary;
1702
1703 /* Values 1-5: see jump.c */
1704 int ix86_branch_cost;
1705
1706 /* Variables which are this size or smaller are put in the data/bss
1707    or ldata/lbss sections.  */
1708
1709 int ix86_section_threshold = 65536;
1710
1711 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
1712 char internal_label_prefix[16];
1713 int internal_label_prefix_len;
1714
1715 /* Fence to use after loop using movnt.  */
1716 tree x86_mfence;
1717
1718 /* Register class used for passing given 64bit part of the argument.
1719    These represent classes as documented by the PS ABI, with the exception
1720    of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1721    use SF or DFmode move instead of DImode to avoid reformatting penalties.
1722
1723    Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1724    whenever possible (upper half does contain padding).  */
1725 enum x86_64_reg_class
1726   {
1727     X86_64_NO_CLASS,
1728     X86_64_INTEGER_CLASS,
1729     X86_64_INTEGERSI_CLASS,
1730     X86_64_SSE_CLASS,
1731     X86_64_SSESF_CLASS,
1732     X86_64_SSEDF_CLASS,
1733     X86_64_SSEUP_CLASS,
1734     X86_64_X87_CLASS,
1735     X86_64_X87UP_CLASS,
1736     X86_64_COMPLEX_X87_CLASS,
1737     X86_64_MEMORY_CLASS
1738   };
1739 static const char * const x86_64_reg_class_name[] =
1740 {
1741   "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1742   "sseup", "x87", "x87up", "cplx87", "no"
1743 };
1744
1745 #define MAX_CLASSES 4
1746
1747 /* Table of constants used by fldpi, fldln2, etc....  */
1748 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1749 static bool ext_80387_constants_init = 0;
1750
1751 \f
1752 static struct machine_function * ix86_init_machine_status (void);
1753 static rtx ix86_function_value (const_tree, const_tree, bool);
1754 static int ix86_function_regparm (const_tree, const_tree);
1755 static void ix86_compute_frame_layout (struct ix86_frame *);
1756 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1757                                                  rtx, rtx, int);
1758
1759 \f
1760 /* The svr4 ABI for the i386 says that records and unions are returned
1761    in memory.  */
1762 #ifndef DEFAULT_PCC_STRUCT_RETURN
1763 #define DEFAULT_PCC_STRUCT_RETURN 1
1764 #endif
1765
1766 /* Bit flags that specify the ISA we are compiling for.  */
1767 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1768
1769 /* A mask of ix86_isa_flags that includes bit X if X
1770    was set or cleared on the command line.  */
1771 static int ix86_isa_flags_explicit;
1772
1773 /* Define a set of ISAs which are available when a given ISA is
1774    enabled.  MMX and SSE ISAs are handled separately.  */
1775
1776 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1777 #define OPTION_MASK_ISA_3DNOW_SET \
1778   (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1779
1780 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1781 #define OPTION_MASK_ISA_SSE2_SET \
1782   (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1783 #define OPTION_MASK_ISA_SSE3_SET \
1784   (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1785 #define OPTION_MASK_ISA_SSSE3_SET \
1786   (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1787 #define OPTION_MASK_ISA_SSE4_1_SET \
1788   (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1789 #define OPTION_MASK_ISA_SSE4_2_SET \
1790   (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1791
1792 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1793    as -msse4.2.  */
1794 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1795
1796 #define OPTION_MASK_ISA_SSE4A_SET \
1797   (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1798 #define OPTION_MASK_ISA_SSE5_SET \
1799   (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1800
1801 /* Define a set of ISAs which aren't available when a given ISA is
1802    disabled.  MMX and SSE ISAs are handled separately.  */
1803
1804 #define OPTION_MASK_ISA_MMX_UNSET \
1805   (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1806 #define OPTION_MASK_ISA_3DNOW_UNSET \
1807   (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1808 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1809
1810 #define OPTION_MASK_ISA_SSE_UNSET \
1811   (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1812 #define OPTION_MASK_ISA_SSE2_UNSET \
1813   (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1814 #define OPTION_MASK_ISA_SSE3_UNSET \
1815   (OPTION_MASK_ISA_SSE3 \
1816    | OPTION_MASK_ISA_SSSE3_UNSET \
1817    | OPTION_MASK_ISA_SSE4A_UNSET )
1818 #define OPTION_MASK_ISA_SSSE3_UNSET \
1819   (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1820 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1821   (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1822 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4_2
1823
1824 /* SSE4 includes both SSE4.1 and SSE4.2.  -mno-sse4 should the same
1825    as -mno-sse4.1. */
1826 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1827
1828 #define OPTION_MASK_ISA_SSE4A_UNSET \
1829   (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1830
1831 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1832
1833 /* Vectorization library interface and handlers.  */
1834 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1835 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1836 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1837
1838 /* Implement TARGET_HANDLE_OPTION.  */
1839
1840 static bool
1841 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1842 {
1843   switch (code)
1844     {
1845     case OPT_mmmx:
1846       if (value)
1847         {
1848           ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1849           ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1850         }
1851       else
1852         {
1853           ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1854           ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1855         }
1856       return true;
1857
1858     case OPT_m3dnow:
1859       if (value)
1860         {
1861           ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
1862           ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
1863         }
1864       else
1865         {
1866           ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1867           ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1868         }
1869       return true;
1870
1871     case OPT_m3dnowa:
1872       return false;
1873
1874     case OPT_msse:
1875       if (value)
1876         {
1877           ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
1878           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
1879         }
1880       else
1881         {
1882           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1883           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1884         }
1885       return true;
1886
1887     case OPT_msse2:
1888       if (value)
1889         {
1890           ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
1891           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
1892         }
1893       else
1894         {
1895           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1896           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1897         }
1898       return true;
1899
1900     case OPT_msse3:
1901       if (value)
1902         {
1903           ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
1904           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
1905         }
1906       else
1907         {
1908           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1909           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1910         }
1911       return true;
1912
1913     case OPT_mssse3:
1914       if (value)
1915         {
1916           ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
1917           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
1918         }
1919       else
1920         {
1921           ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1922           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1923         }
1924       return true;
1925
1926     case OPT_msse4_1:
1927       if (value)
1928         {
1929           ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
1930           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
1931         }
1932       else
1933         {
1934           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1935           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1936         }
1937       return true;
1938
1939     case OPT_msse4_2:
1940       if (value)
1941         {
1942           ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
1943           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
1944         }
1945       else
1946         {
1947           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1948           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1949         }
1950       return true;
1951
1952     case OPT_msse4:
1953       ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
1954       ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
1955       return true;
1956
1957     case OPT_mno_sse4:
1958       ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1959       ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1960       return true;
1961
1962     case OPT_msse4a:
1963       if (value)
1964         {
1965           ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
1966           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
1967         }
1968       else
1969         {
1970           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1971           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1972         }
1973       return true;
1974
1975     case OPT_msse5:
1976       if (value)
1977         {
1978           ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
1979           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
1980         }
1981       else
1982         {
1983           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
1984           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
1985         }
1986       return true;
1987
1988     default:
1989       return true;
1990     }
1991 }
1992
1993 /* Sometimes certain combinations of command options do not make
1994    sense on a particular target machine.  You can define a macro
1995    `OVERRIDE_OPTIONS' to take account of this.  This macro, if
1996    defined, is executed once just after all the command options have
1997    been parsed.
1998
1999    Don't use this macro to turn on various extra optimizations for
2000    `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
2001
2002 void
2003 override_options (void)
2004 {
2005   int i;
2006   int ix86_tune_defaulted = 0;
2007   int ix86_arch_specified = 0;
2008   unsigned int ix86_arch_mask, ix86_tune_mask;
2009
2010   /* Comes from final.c -- no real reason to change it.  */
2011 #define MAX_CODE_ALIGN 16
2012
2013   static struct ptt
2014     {
2015       const struct processor_costs *cost;       /* Processor costs */
2016       const int align_loop;                     /* Default alignments.  */
2017       const int align_loop_max_skip;
2018       const int align_jump;
2019       const int align_jump_max_skip;
2020       const int align_func;
2021     }
2022   const processor_target_table[PROCESSOR_max] =
2023     {
2024       {&i386_cost, 4, 3, 4, 3, 4},
2025       {&i486_cost, 16, 15, 16, 15, 16},
2026       {&pentium_cost, 16, 7, 16, 7, 16},
2027       {&pentiumpro_cost, 16, 15, 16, 10, 16},
2028       {&geode_cost, 0, 0, 0, 0, 0},
2029       {&k6_cost, 32, 7, 32, 7, 32},
2030       {&athlon_cost, 16, 7, 16, 7, 16},
2031       {&pentium4_cost, 0, 0, 0, 0, 0},
2032       {&k8_cost, 16, 7, 16, 7, 16},
2033       {&nocona_cost, 0, 0, 0, 0, 0},
2034       {&core2_cost, 16, 10, 16, 10, 16},
2035       {&generic32_cost, 16, 7, 16, 7, 16},
2036       {&generic64_cost, 16, 10, 16, 10, 16},
2037       {&amdfam10_cost, 32, 24, 32, 7, 32}
2038     };
2039
2040   static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2041     {
2042       "generic",
2043       "i386",
2044       "i486",
2045       "pentium",
2046       "pentium-mmx",
2047       "pentiumpro",
2048       "pentium2",
2049       "pentium3",
2050       "pentium4",
2051       "pentium-m",
2052       "prescott",
2053       "nocona",
2054       "core2",
2055       "geode",
2056       "k6",
2057       "k6-2",
2058       "k6-3",
2059       "athlon",
2060       "athlon-4",
2061       "k8",
2062       "amdfam10"
2063     };
2064
2065   enum pta_flags
2066     {
2067       PTA_SSE = 1 << 0,
2068       PTA_SSE2 = 1 << 1,
2069       PTA_SSE3 = 1 << 2,
2070       PTA_MMX = 1 << 3,
2071       PTA_PREFETCH_SSE = 1 << 4,
2072       PTA_3DNOW = 1 << 5,
2073       PTA_3DNOW_A = 1 << 6,
2074       PTA_64BIT = 1 << 7,
2075       PTA_SSSE3 = 1 << 8,
2076       PTA_CX16 = 1 << 9,
2077       PTA_POPCNT = 1 << 10,
2078       PTA_ABM = 1 << 11,
2079       PTA_SSE4A = 1 << 12,
2080       PTA_NO_SAHF = 1 << 13,
2081       PTA_SSE4_1 = 1 << 14,
2082       PTA_SSE4_2 = 1 << 15,
2083       PTA_SSE5 = 1 << 16,
2084       PTA_AES = 1 << 17,
2085       PTA_PCLMUL = 1 << 18
2086     };
2087
2088   static struct pta
2089     {
2090       const char *const name;           /* processor name or nickname.  */
2091       const enum processor_type processor;
2092       const unsigned /*enum pta_flags*/ flags;
2093     }
2094   const processor_alias_table[] =
2095     {
2096       {"i386", PROCESSOR_I386, 0},
2097       {"i486", PROCESSOR_I486, 0},
2098       {"i586", PROCESSOR_PENTIUM, 0},
2099       {"pentium", PROCESSOR_PENTIUM, 0},
2100       {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2101       {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2102       {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2103       {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2104       {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2105       {"i686", PROCESSOR_PENTIUMPRO, 0},
2106       {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2107       {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2108       {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2109       {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2110       {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2111       {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2112       {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2113       {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2114       {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2115                                     | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2116                                     | PTA_CX16 | PTA_NO_SAHF)},
2117       {"core2", PROCESSOR_CORE2, (PTA_64BIT
2118                                   | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2119                                   | PTA_SSSE3
2120                                   | PTA_CX16)},
2121       {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2122                                   |PTA_PREFETCH_SSE)},
2123       {"k6", PROCESSOR_K6, PTA_MMX},
2124       {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2125       {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2126       {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2127                                     | PTA_PREFETCH_SSE)},
2128       {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2129                                           | PTA_PREFETCH_SSE)},
2130       {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2131                                       | PTA_SSE)},
2132       {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2133                                        | PTA_SSE)},
2134       {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2135                                        | PTA_SSE)},
2136       {"x86-64", PROCESSOR_K8, (PTA_64BIT
2137                                 | PTA_MMX | PTA_SSE | PTA_SSE2
2138                                 | PTA_NO_SAHF)},
2139       {"k8", PROCESSOR_K8, (PTA_64BIT
2140                             | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2141                             | PTA_SSE | PTA_SSE2
2142                             | PTA_NO_SAHF)},
2143       {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2144                                  | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2145                                  | PTA_SSE | PTA_SSE2 | PTA_SSE3
2146                                  | PTA_NO_SAHF)},
2147       {"opteron", PROCESSOR_K8, (PTA_64BIT
2148                                  | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2149                                  | PTA_SSE | PTA_SSE2
2150                                  | PTA_NO_SAHF)},
2151       {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2152                                       | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2153                                       | PTA_SSE | PTA_SSE2 | PTA_SSE3
2154                                       | PTA_NO_SAHF)},
2155       {"athlon64", PROCESSOR_K8, (PTA_64BIT
2156                                   | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2157                                   | PTA_SSE | PTA_SSE2
2158                                   | PTA_NO_SAHF)},
2159       {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2160                                        | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2161                                        | PTA_SSE | PTA_SSE2 | PTA_SSE3
2162                                        | PTA_NO_SAHF)},
2163       {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2164                                    | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2165                                    | PTA_SSE | PTA_SSE2
2166                                    | PTA_NO_SAHF)},
2167       {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2168                                         | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2169                                         | PTA_SSE | PTA_SSE2 | PTA_SSE3
2170                                         | PTA_SSE4A
2171                                         | PTA_CX16 | PTA_ABM)},
2172       {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2173                                          | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2174                                          | PTA_SSE | PTA_SSE2 | PTA_SSE3
2175                                          | PTA_SSE4A
2176                                          | PTA_CX16 | PTA_ABM)},
2177       {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch.  */ },
2178       {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch.  */ },
2179     };
2180
2181   int const pta_size = ARRAY_SIZE (processor_alias_table);
2182
2183 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2184   SUBTARGET_OVERRIDE_OPTIONS;
2185 #endif
2186
2187 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2188   SUBSUBTARGET_OVERRIDE_OPTIONS;
2189 #endif
2190
2191   /* -fPIC is the default for x86_64.  */
2192   if (TARGET_MACHO && TARGET_64BIT)
2193     flag_pic = 2;
2194
2195   /* Set the default values for switches whose default depends on TARGET_64BIT
2196      in case they weren't overwritten by command line options.  */
2197   if (TARGET_64BIT)
2198     {
2199       /* Mach-O doesn't support omitting the frame pointer for now.  */
2200       if (flag_omit_frame_pointer == 2)
2201         flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2202       if (flag_asynchronous_unwind_tables == 2)
2203         flag_asynchronous_unwind_tables = 1;
2204       if (flag_pcc_struct_return == 2)
2205         flag_pcc_struct_return = 0;
2206     }
2207   else
2208     {
2209       if (flag_omit_frame_pointer == 2)
2210         flag_omit_frame_pointer = 0;
2211       if (flag_asynchronous_unwind_tables == 2)
2212         flag_asynchronous_unwind_tables = 0;
2213       if (flag_pcc_struct_return == 2)
2214         flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2215     }
2216
2217   /* Need to check -mtune=generic first.  */
2218   if (ix86_tune_string)
2219     {
2220       if (!strcmp (ix86_tune_string, "generic")
2221           || !strcmp (ix86_tune_string, "i686")
2222           /* As special support for cross compilers we read -mtune=native
2223              as -mtune=generic.  With native compilers we won't see the
2224              -mtune=native, as it was changed by the driver.  */
2225           || !strcmp (ix86_tune_string, "native"))
2226         {
2227           if (TARGET_64BIT)
2228             ix86_tune_string = "generic64";
2229           else
2230             ix86_tune_string = "generic32";
2231         }
2232       else if (!strncmp (ix86_tune_string, "generic", 7))
2233         error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2234     }
2235   else
2236     {
2237       if (ix86_arch_string)
2238         ix86_tune_string = ix86_arch_string;
2239       if (!ix86_tune_string)
2240         {
2241           ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2242           ix86_tune_defaulted = 1;
2243         }
2244
2245       /* ix86_tune_string is set to ix86_arch_string or defaulted.  We
2246          need to use a sensible tune option.  */
2247       if (!strcmp (ix86_tune_string, "generic")
2248           || !strcmp (ix86_tune_string, "x86-64")
2249           || !strcmp (ix86_tune_string, "i686"))
2250         {
2251           if (TARGET_64BIT)
2252             ix86_tune_string = "generic64";
2253           else
2254             ix86_tune_string = "generic32";
2255         }
2256     }
2257   if (ix86_stringop_string)
2258     {
2259       if (!strcmp (ix86_stringop_string, "rep_byte"))
2260         stringop_alg = rep_prefix_1_byte;
2261       else if (!strcmp (ix86_stringop_string, "libcall"))
2262         stringop_alg = libcall;
2263       else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2264         stringop_alg = rep_prefix_4_byte;
2265       else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2266         stringop_alg = rep_prefix_8_byte;
2267       else if (!strcmp (ix86_stringop_string, "byte_loop"))
2268         stringop_alg = loop_1_byte;
2269       else if (!strcmp (ix86_stringop_string, "loop"))
2270         stringop_alg = loop;
2271       else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2272         stringop_alg = unrolled_loop;
2273       else
2274         error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2275     }
2276   if (!strcmp (ix86_tune_string, "x86-64"))
2277     warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated.  Use -mtune=k8 or "
2278              "-mtune=generic instead as appropriate.");
2279
2280   if (!ix86_arch_string)
2281     ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2282   else
2283     ix86_arch_specified = 1;
2284
2285   if (!strcmp (ix86_arch_string, "generic"))
2286     error ("generic CPU can be used only for -mtune= switch");
2287   if (!strncmp (ix86_arch_string, "generic", 7))
2288     error ("bad value (%s) for -march= switch", ix86_arch_string);
2289
2290   if (ix86_cmodel_string != 0)
2291     {
2292       if (!strcmp (ix86_cmodel_string, "small"))
2293         ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2294       else if (!strcmp (ix86_cmodel_string, "medium"))
2295         ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2296       else if (!strcmp (ix86_cmodel_string, "large"))
2297         ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2298       else if (flag_pic)
2299         error ("code model %s does not support PIC mode", ix86_cmodel_string);
2300       else if (!strcmp (ix86_cmodel_string, "32"))
2301         ix86_cmodel = CM_32;
2302       else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2303         ix86_cmodel = CM_KERNEL;
2304       else
2305         error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2306     }
2307   else
2308     {
2309       /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2310          use of rip-relative addressing.  This eliminates fixups that
2311          would otherwise be needed if this object is to be placed in a
2312          DLL, and is essentially just as efficient as direct addressing.  */
2313       if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2314         ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2315       else if (TARGET_64BIT)
2316         ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2317       else
2318         ix86_cmodel = CM_32;
2319     }
2320   if (ix86_asm_string != 0)
2321     {
2322       if (! TARGET_MACHO
2323           && !strcmp (ix86_asm_string, "intel"))
2324         ix86_asm_dialect = ASM_INTEL;
2325       else if (!strcmp (ix86_asm_string, "att"))
2326         ix86_asm_dialect = ASM_ATT;
2327       else
2328         error ("bad value (%s) for -masm= switch", ix86_asm_string);
2329     }
2330   if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2331     error ("code model %qs not supported in the %s bit mode",
2332            ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2333   if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2334     sorry ("%i-bit mode not compiled in",
2335            (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2336
2337   for (i = 0; i < pta_size; i++)
2338     if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2339       {
2340         ix86_arch = processor_alias_table[i].processor;
2341         /* Default cpu tuning to the architecture.  */
2342         ix86_tune = ix86_arch;
2343
2344         if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2345           error ("CPU you selected does not support x86-64 "
2346                  "instruction set");
2347
2348         if (processor_alias_table[i].flags & PTA_MMX
2349             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2350           ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2351         if (processor_alias_table[i].flags & PTA_3DNOW
2352             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2353           ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2354         if (processor_alias_table[i].flags & PTA_3DNOW_A
2355             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2356           ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2357         if (processor_alias_table[i].flags & PTA_SSE
2358             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2359           ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2360         if (processor_alias_table[i].flags & PTA_SSE2
2361             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2362           ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2363         if (processor_alias_table[i].flags & PTA_SSE3
2364             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2365           ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2366         if (processor_alias_table[i].flags & PTA_SSSE3
2367             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2368           ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2369         if (processor_alias_table[i].flags & PTA_SSE4_1
2370             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2371           ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2372         if (processor_alias_table[i].flags & PTA_SSE4_2
2373             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2374           ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2375         if (processor_alias_table[i].flags & PTA_SSE4A
2376             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2377           ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2378         if (processor_alias_table[i].flags & PTA_SSE5
2379             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2380           ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2381
2382         if (processor_alias_table[i].flags & PTA_ABM)
2383           x86_abm = true;
2384         if (processor_alias_table[i].flags & PTA_CX16)
2385           x86_cmpxchg16b = true;
2386         if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2387           x86_popcnt = true;
2388         if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2389           x86_prefetch_sse = true;
2390         if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2391           x86_sahf = true;
2392         if (processor_alias_table[i].flags & PTA_AES)
2393           x86_aes = true;
2394         if (processor_alias_table[i].flags & PTA_PCLMUL)
2395           x86_pclmul = true;
2396
2397         break;
2398       }
2399
2400   if (i == pta_size)
2401     error ("bad value (%s) for -march= switch", ix86_arch_string);
2402
2403   ix86_arch_mask = 1u << ix86_arch;
2404   for (i = 0; i < X86_ARCH_LAST; ++i)
2405     ix86_arch_features[i] &= ix86_arch_mask;
2406
2407   for (i = 0; i < pta_size; i++)
2408     if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2409       {
2410         ix86_tune = processor_alias_table[i].processor;
2411         if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2412           {
2413             if (ix86_tune_defaulted)
2414               {
2415                 ix86_tune_string = "x86-64";
2416                 for (i = 0; i < pta_size; i++)
2417                   if (! strcmp (ix86_tune_string,
2418                                 processor_alias_table[i].name))
2419                     break;
2420                 ix86_tune = processor_alias_table[i].processor;
2421               }
2422             else
2423               error ("CPU you selected does not support x86-64 "
2424                      "instruction set");
2425           }
2426         /* Intel CPUs have always interpreted SSE prefetch instructions as
2427            NOPs; so, we can enable SSE prefetch instructions even when
2428            -mtune (rather than -march) points us to a processor that has them.
2429            However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2430            higher processors.  */
2431         if (TARGET_CMOVE
2432             && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2433           x86_prefetch_sse = true;
2434         break;
2435       }
2436   if (i == pta_size)
2437     error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2438
2439   /* Enable SSE2 if AES or PCLMUL is enabled.  */
2440   if ((x86_aes || x86_pclmul)
2441       && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2442     {
2443       ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2444       ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2445     }
2446
2447   ix86_tune_mask = 1u << ix86_tune;
2448   for (i = 0; i < X86_TUNE_LAST; ++i)
2449     ix86_tune_features[i] &= ix86_tune_mask;
2450
2451   if (optimize_size)
2452     ix86_cost = &size_cost;
2453   else
2454     ix86_cost = processor_target_table[ix86_tune].cost;
2455
2456   /* Arrange to set up i386_stack_locals for all functions.  */
2457   init_machine_status = ix86_init_machine_status;
2458
2459   /* Validate -mregparm= value.  */
2460   if (ix86_regparm_string)
2461     {
2462       if (TARGET_64BIT)
2463         warning (0, "-mregparm is ignored in 64-bit mode");
2464       i = atoi (ix86_regparm_string);
2465       if (i < 0 || i > REGPARM_MAX)
2466         error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2467       else
2468         ix86_regparm = i;
2469     }
2470   if (TARGET_64BIT)
2471     ix86_regparm = REGPARM_MAX;
2472
2473   /* If the user has provided any of the -malign-* options,
2474      warn and use that value only if -falign-* is not set.
2475      Remove this code in GCC 3.2 or later.  */
2476   if (ix86_align_loops_string)
2477     {
2478       warning (0, "-malign-loops is obsolete, use -falign-loops");
2479       if (align_loops == 0)
2480         {
2481           i = atoi (ix86_align_loops_string);
2482           if (i < 0 || i > MAX_CODE_ALIGN)
2483             error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2484           else
2485             align_loops = 1 << i;
2486         }
2487     }
2488
2489   if (ix86_align_jumps_string)
2490     {
2491       warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2492       if (align_jumps == 0)
2493         {
2494           i = atoi (ix86_align_jumps_string);
2495           if (i < 0 || i > MAX_CODE_ALIGN)
2496             error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2497           else
2498             align_jumps = 1 << i;
2499         }
2500     }
2501
2502   if (ix86_align_funcs_string)
2503     {
2504       warning (0, "-malign-functions is obsolete, use -falign-functions");
2505       if (align_functions == 0)
2506         {
2507           i = atoi (ix86_align_funcs_string);
2508           if (i < 0 || i > MAX_CODE_ALIGN)
2509             error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2510           else
2511             align_functions = 1 << i;
2512         }
2513     }
2514
2515   /* Default align_* from the processor table.  */
2516   if (align_loops == 0)
2517     {
2518       align_loops = processor_target_table[ix86_tune].align_loop;
2519       align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2520     }
2521   if (align_jumps == 0)
2522     {
2523       align_jumps = processor_target_table[ix86_tune].align_jump;
2524       align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2525     }
2526   if (align_functions == 0)
2527     {
2528       align_functions = processor_target_table[ix86_tune].align_func;
2529     }
2530
2531   /* Validate -mbranch-cost= value, or provide default.  */
2532   ix86_branch_cost = ix86_cost->branch_cost;
2533   if (ix86_branch_cost_string)
2534     {
2535       i = atoi (ix86_branch_cost_string);
2536       if (i < 0 || i > 5)
2537         error ("-mbranch-cost=%d is not between 0 and 5", i);
2538       else
2539         ix86_branch_cost = i;
2540     }
2541   if (ix86_section_threshold_string)
2542     {
2543       i = atoi (ix86_section_threshold_string);
2544       if (i < 0)
2545         error ("-mlarge-data-threshold=%d is negative", i);
2546       else
2547         ix86_section_threshold = i;
2548     }
2549
2550   if (ix86_tls_dialect_string)
2551     {
2552       if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2553         ix86_tls_dialect = TLS_DIALECT_GNU;
2554       else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2555         ix86_tls_dialect = TLS_DIALECT_GNU2;
2556       else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2557         ix86_tls_dialect = TLS_DIALECT_SUN;
2558       else
2559         error ("bad value (%s) for -mtls-dialect= switch",
2560                ix86_tls_dialect_string);
2561     }
2562
2563   if (ix87_precision_string)
2564     {
2565       i = atoi (ix87_precision_string);
2566       if (i != 32 && i != 64 && i != 80)
2567         error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2568     }
2569
2570   if (TARGET_64BIT)
2571     {
2572       target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2573
2574       /* Enable by default the SSE and MMX builtins.  Do allow the user to
2575          explicitly disable any of these.  In particular, disabling SSE and
2576          MMX for kernel code is extremely useful.  */
2577       if (!ix86_arch_specified)
2578       ix86_isa_flags
2579         |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2580              | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2581
2582       if (TARGET_RTD)
2583         warning (0, "-mrtd is ignored in 64bit mode");
2584     }
2585   else
2586     {
2587       target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2588
2589       if (!ix86_arch_specified)
2590       ix86_isa_flags
2591         |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2592
2593       /* i386 ABI does not specify red zone.  It still makes sense to use it
2594          when programmer takes care to stack from being destroyed.  */
2595       if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2596         target_flags |= MASK_NO_RED_ZONE;
2597     }
2598
2599   /* Keep nonleaf frame pointers.  */
2600   if (flag_omit_frame_pointer)
2601     target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2602   else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2603     flag_omit_frame_pointer = 1;
2604
2605   /* If we're doing fast math, we don't care about comparison order
2606      wrt NaNs.  This lets us use a shorter comparison sequence.  */
2607   if (flag_finite_math_only)
2608     target_flags &= ~MASK_IEEE_FP;
2609
2610   /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2611      since the insns won't need emulation.  */
2612   if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2613     target_flags &= ~MASK_NO_FANCY_MATH_387;
2614
2615   /* Likewise, if the target doesn't have a 387, or we've specified
2616      software floating point, don't use 387 inline intrinsics.  */
2617   if (!TARGET_80387)
2618     target_flags |= MASK_NO_FANCY_MATH_387;
2619
2620   /* Turn on MMX builtins for -msse.  */
2621   if (TARGET_SSE)
2622     {
2623       ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2624       x86_prefetch_sse = true;
2625     }
2626
2627   /* Turn on popcnt instruction for -msse4.2 or -mabm.  */
2628   if (TARGET_SSE4_2 || TARGET_ABM)
2629     x86_popcnt = true;
2630
2631   /* Validate -mpreferred-stack-boundary= value, or provide default.
2632      The default of 128 bits is for Pentium III's SSE __m128.  We can't
2633      change it because of optimize_size.  Otherwise, we can't mix object
2634      files compiled with -Os and -On.  */
2635   ix86_preferred_stack_boundary = 128;
2636   if (ix86_preferred_stack_boundary_string)
2637     {
2638       i = atoi (ix86_preferred_stack_boundary_string);
2639       if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2640         error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2641                TARGET_64BIT ? 4 : 2);
2642       else
2643         ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2644     }
2645
2646   /* Accept -msseregparm only if at least SSE support is enabled.  */
2647   if (TARGET_SSEREGPARM
2648       && ! TARGET_SSE)
2649     error ("-msseregparm used without SSE enabled");
2650
2651   ix86_fpmath = TARGET_FPMATH_DEFAULT;
2652   if (ix86_fpmath_string != 0)
2653     {
2654       if (! strcmp (ix86_fpmath_string, "387"))
2655         ix86_fpmath = FPMATH_387;
2656       else if (! strcmp (ix86_fpmath_string, "sse"))
2657         {
2658           if (!TARGET_SSE)
2659             {
2660               warning (0, "SSE instruction set disabled, using 387 arithmetics");
2661               ix86_fpmath = FPMATH_387;
2662             }
2663           else
2664             ix86_fpmath = FPMATH_SSE;
2665         }
2666       else if (! strcmp (ix86_fpmath_string, "387,sse")
2667                || ! strcmp (ix86_fpmath_string, "sse,387"))
2668         {
2669           if (!TARGET_SSE)
2670             {
2671               warning (0, "SSE instruction set disabled, using 387 arithmetics");
2672               ix86_fpmath = FPMATH_387;
2673             }
2674           else if (!TARGET_80387)
2675             {
2676               warning (0, "387 instruction set disabled, using SSE arithmetics");
2677               ix86_fpmath = FPMATH_SSE;
2678             }
2679           else
2680             ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2681         }
2682       else
2683         error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2684     }
2685
2686   /* If the i387 is disabled, then do not return values in it. */
2687   if (!TARGET_80387)
2688     target_flags &= ~MASK_FLOAT_RETURNS;
2689
2690   /* Use external vectorized library in vectorizing intrinsics.  */
2691   if (ix86_veclibabi_string)
2692     {
2693       if (strcmp (ix86_veclibabi_string, "svml") == 0)
2694         ix86_veclib_handler = ix86_veclibabi_svml;
2695       else if (strcmp (ix86_veclibabi_string, "acml") == 0)
2696         ix86_veclib_handler = ix86_veclibabi_acml;
2697       else
2698         error ("unknown vectorization library ABI type (%s) for "
2699                "-mveclibabi= switch", ix86_veclibabi_string);
2700     }
2701
2702   if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2703       && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2704       && !optimize_size)
2705     target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2706
2707   /* ??? Unwind info is not correct around the CFG unless either a frame
2708      pointer is present or M_A_O_A is set.  Fixing this requires rewriting
2709      unwind info generation to be aware of the CFG and propagating states
2710      around edges.  */
2711   if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2712        || flag_exceptions || flag_non_call_exceptions)
2713       && flag_omit_frame_pointer
2714       && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2715     {
2716       if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2717         warning (0, "unwind tables currently require either a frame pointer "
2718                  "or -maccumulate-outgoing-args for correctness");
2719       target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2720     }
2721
2722   /* If stack probes are required, the space used for large function
2723      arguments on the stack must also be probed, so enable
2724      -maccumulate-outgoing-args so this happens in the prologue.  */
2725   if (TARGET_STACK_PROBE
2726       && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2727     {
2728       if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2729         warning (0, "stack probing requires -maccumulate-outgoing-args "
2730                  "for correctness");
2731       target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2732     }
2733
2734   /* For sane SSE instruction set generation we need fcomi instruction.
2735      It is safe to enable all CMOVE instructions.  */
2736   if (TARGET_SSE)
2737     TARGET_CMOVE = 1;
2738
2739   /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
2740   {
2741     char *p;
2742     ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2743     p = strchr (internal_label_prefix, 'X');
2744     internal_label_prefix_len = p - internal_label_prefix;
2745     *p = '\0';
2746   }
2747
2748   /* When scheduling description is not available, disable scheduler pass
2749      so it won't slow down the compilation and make x87 code slower.  */
2750   if (!TARGET_SCHEDULE)
2751     flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2752
2753   if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2754     set_param_value ("simultaneous-prefetches",
2755                      ix86_cost->simultaneous_prefetches);
2756   if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2757     set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2758   if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2759     set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2760   if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2761     set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2762
2763   /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
2764      can be optimized to ap = __builtin_next_arg (0).
2765      For abi switching it should be corrected.  */
2766   if (!TARGET_64BIT || DEFAULT_ABI == MS_ABI)
2767     targetm.expand_builtin_va_start = NULL;
2768
2769 #ifdef USE_IX86_CLD
2770   /* Use -mcld by default for 32-bit code if configured with --enable-cld.  */
2771   if (!TARGET_64BIT)
2772     target_flags |= MASK_CLD & ~target_flags_explicit;
2773 #endif
2774 }
2775 \f
2776 /* Return true if this goes in large data/bss.  */
2777
2778 static bool
2779 ix86_in_large_data_p (tree exp)
2780 {
2781   if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2782     return false;
2783
2784   /* Functions are never large data.  */
2785   if (TREE_CODE (exp) == FUNCTION_DECL)
2786     return false;
2787
2788   if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2789     {
2790       const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2791       if (strcmp (section, ".ldata") == 0
2792           || strcmp (section, ".lbss") == 0)
2793         return true;
2794       return false;
2795     }
2796   else
2797     {
2798       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2799
2800       /* If this is an incomplete type with size 0, then we can't put it
2801          in data because it might be too big when completed.  */
2802       if (!size || size > ix86_section_threshold)
2803         return true;
2804     }
2805
2806   return false;
2807 }
2808
2809 /* Switch to the appropriate section for output of DECL.
2810    DECL is either a `VAR_DECL' node or a constant of some sort.
2811    RELOC indicates whether forming the initial value of DECL requires
2812    link-time relocations.  */
2813
2814 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2815         ATTRIBUTE_UNUSED;
2816
2817 static section *
2818 x86_64_elf_select_section (tree decl, int reloc,
2819                            unsigned HOST_WIDE_INT align)
2820 {
2821   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2822       && ix86_in_large_data_p (decl))
2823     {
2824       const char *sname = NULL;
2825       unsigned int flags = SECTION_WRITE;
2826       switch (categorize_decl_for_section (decl, reloc))
2827         {
2828         case SECCAT_DATA:
2829           sname = ".ldata";
2830           break;
2831         case SECCAT_DATA_REL:
2832           sname = ".ldata.rel";
2833           break;
2834         case SECCAT_DATA_REL_LOCAL:
2835           sname = ".ldata.rel.local";
2836           break;
2837         case SECCAT_DATA_REL_RO:
2838           sname = ".ldata.rel.ro";
2839           break;
2840         case SECCAT_DATA_REL_RO_LOCAL:
2841           sname = ".ldata.rel.ro.local";
2842           break;
2843         case SECCAT_BSS:
2844           sname = ".lbss";
2845           flags |= SECTION_BSS;
2846           break;
2847         case SECCAT_RODATA:
2848         case SECCAT_RODATA_MERGE_STR:
2849         case SECCAT_RODATA_MERGE_STR_INIT:
2850         case SECCAT_RODATA_MERGE_CONST:
2851           sname = ".lrodata";
2852           flags = 0;
2853           break;
2854         case SECCAT_SRODATA:
2855         case SECCAT_SDATA:
2856         case SECCAT_SBSS:
2857           gcc_unreachable ();
2858         case SECCAT_TEXT:
2859         case SECCAT_TDATA:
2860         case SECCAT_TBSS:
2861           /* We don't split these for medium model.  Place them into
2862              default sections and hope for best.  */
2863           break;
2864         case SECCAT_EMUTLS_VAR:
2865         case SECCAT_EMUTLS_TMPL:
2866           gcc_unreachable ();
2867         }
2868       if (sname)
2869         {
2870           /* We might get called with string constants, but get_named_section
2871              doesn't like them as they are not DECLs.  Also, we need to set
2872              flags in that case.  */
2873           if (!DECL_P (decl))
2874             return get_section (sname, flags, NULL);
2875           return get_named_section (decl, sname, reloc);
2876         }
2877     }
2878   return default_elf_select_section (decl, reloc, align);
2879 }
2880
2881 /* Build up a unique section name, expressed as a
2882    STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2883    RELOC indicates whether the initial value of EXP requires
2884    link-time relocations.  */
2885
2886 static void ATTRIBUTE_UNUSED
2887 x86_64_elf_unique_section (tree decl, int reloc)
2888 {
2889   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2890       && ix86_in_large_data_p (decl))
2891     {
2892       const char *prefix = NULL;
2893       /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
2894       bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2895
2896       switch (categorize_decl_for_section (decl, reloc))
2897         {
2898         case SECCAT_DATA:
2899         case SECCAT_DATA_REL:
2900         case SECCAT_DATA_REL_LOCAL:
2901         case SECCAT_DATA_REL_RO:
2902         case SECCAT_DATA_REL_RO_LOCAL:
2903           prefix = one_only ? ".ld" : ".ldata";
2904           break;
2905         case SECCAT_BSS:
2906           prefix = one_only ? ".lb" : ".lbss";
2907           break;
2908         case SECCAT_RODATA:
2909         case SECCAT_RODATA_MERGE_STR:
2910         case SECCAT_RODATA_MERGE_STR_INIT:
2911         case SECCAT_RODATA_MERGE_CONST:
2912           prefix = one_only ? ".lr" : ".lrodata";
2913           break;
2914         case SECCAT_SRODATA:
2915         case SECCAT_SDATA:
2916         case SECCAT_SBSS:
2917           gcc_unreachable ();
2918         case SECCAT_TEXT:
2919         case SECCAT_TDATA:
2920         case SECCAT_TBSS:
2921           /* We don't split these for medium model.  Place them into
2922              default sections and hope for best.  */
2923           break;
2924         case SECCAT_EMUTLS_VAR:
2925           prefix = targetm.emutls.var_section;
2926           break;
2927         case SECCAT_EMUTLS_TMPL:
2928           prefix = targetm.emutls.tmpl_section;
2929           break;
2930         }
2931       if (prefix)
2932         {
2933           const char *name, *linkonce;
2934           char *string;
2935
2936           name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2937           name = targetm.strip_name_encoding (name);
2938
2939           /* If we're using one_only, then there needs to be a .gnu.linkonce
2940              prefix to the section name.  */
2941           linkonce = one_only ? ".gnu.linkonce" : "";
2942
2943           string = ACONCAT ((linkonce, prefix, ".", name, NULL));
2944
2945           DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
2946           return;
2947         }
2948     }
2949   default_unique_section (decl, reloc);
2950 }
2951
2952 #ifdef COMMON_ASM_OP
2953 /* This says how to output assembler code to declare an
2954    uninitialized external linkage data object.
2955
2956    For medium model x86-64 we need to use .largecomm opcode for
2957    large objects.  */
2958 void
2959 x86_elf_aligned_common (FILE *file,
2960                         const char *name, unsigned HOST_WIDE_INT size,
2961                         int align)
2962 {
2963   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2964       && size > (unsigned int)ix86_section_threshold)
2965     fprintf (file, ".largecomm\t");
2966   else
2967     fprintf (file, "%s", COMMON_ASM_OP);
2968   assemble_name (file, name);
2969   fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2970            size, align / BITS_PER_UNIT);
2971 }
2972 #endif
2973
2974 /* Utility function for targets to use in implementing
2975    ASM_OUTPUT_ALIGNED_BSS.  */
2976
2977 void
2978 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2979                         const char *name, unsigned HOST_WIDE_INT size,
2980                         int align)
2981 {
2982   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2983       && size > (unsigned int)ix86_section_threshold)
2984     switch_to_section (get_named_section (decl, ".lbss", 0));
2985   else
2986     switch_to_section (bss_section);
2987   ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2988 #ifdef ASM_DECLARE_OBJECT_NAME
2989   last_assemble_variable_decl = decl;
2990   ASM_DECLARE_OBJECT_NAME (file, name, decl);
2991 #else
2992   /* Standard thing is just output label for the object.  */
2993   ASM_OUTPUT_LABEL (file, name);
2994 #endif /* ASM_DECLARE_OBJECT_NAME */
2995   ASM_OUTPUT_SKIP (file, size ? size : 1);
2996 }
2997 \f
2998 void
2999 optimization_options (int level, int size ATTRIBUTE_UNUSED)
3000 {
3001   /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
3002      make the problem with not enough registers even worse.  */
3003 #ifdef INSN_SCHEDULING
3004   if (level > 1)
3005     flag_schedule_insns = 0;
3006 #endif
3007
3008   if (TARGET_MACHO)
3009     /* The Darwin libraries never set errno, so we might as well
3010        avoid calling them when that's the only reason we would.  */
3011     flag_errno_math = 0;
3012
3013   /* The default values of these switches depend on the TARGET_64BIT
3014      that is not known at this moment.  Mark these values with 2 and
3015      let user the to override these.  In case there is no command line option
3016      specifying them, we will set the defaults in override_options.  */
3017   if (optimize >= 1)
3018     flag_omit_frame_pointer = 2;
3019   flag_pcc_struct_return = 2;
3020   flag_asynchronous_unwind_tables = 2;
3021   flag_vect_cost_model = 1;
3022 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
3023   SUBTARGET_OPTIMIZATION_OPTIONS;
3024 #endif
3025 }
3026 \f
3027 /* Decide whether we can make a sibling call to a function.  DECL is the
3028    declaration of the function being targeted by the call and EXP is the
3029    CALL_EXPR representing the call.  */
3030
3031 static bool
3032 ix86_function_ok_for_sibcall (tree decl, tree exp)
3033 {
3034   tree func;
3035   rtx a, b;
3036
3037   /* If we are generating position-independent code, we cannot sibcall
3038      optimize any indirect call, or a direct call to a global function,
3039      as the PLT requires %ebx be live.  */
3040   if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
3041     return false;
3042
3043   if (decl)
3044     func = decl;
3045   else
3046     {
3047       func = TREE_TYPE (CALL_EXPR_FN (exp));
3048       if (POINTER_TYPE_P (func))
3049         func = TREE_TYPE (func);
3050     }
3051
3052   /* Check that the return value locations are the same.  Like
3053      if we are returning floats on the 80387 register stack, we cannot
3054      make a sibcall from a function that doesn't return a float to a
3055      function that does or, conversely, from a function that does return
3056      a float to a function that doesn't; the necessary stack adjustment
3057      would not be executed.  This is also the place we notice
3058      differences in the return value ABI.  Note that it is ok for one
3059      of the functions to have void return type as long as the return
3060      value of the other is passed in a register.  */
3061   a = ix86_function_value (TREE_TYPE (exp), func, false);
3062   b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
3063                            cfun->decl, false);
3064   if (STACK_REG_P (a) || STACK_REG_P (b))
3065     {
3066       if (!rtx_equal_p (a, b))
3067         return false;
3068     }
3069   else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
3070     ;
3071   else if (!rtx_equal_p (a, b))
3072     return false;
3073
3074   /* If this call is indirect, we'll need to be able to use a call-clobbered
3075      register for the address of the target function.  Make sure that all
3076      such registers are not used for passing parameters.  */
3077   if (!decl && !TARGET_64BIT)
3078     {
3079       tree type;
3080
3081       /* We're looking at the CALL_EXPR, we need the type of the function.  */
3082       type = CALL_EXPR_FN (exp);                /* pointer expression */
3083       type = TREE_TYPE (type);                  /* pointer type */
3084       type = TREE_TYPE (type);                  /* function type */
3085
3086       if (ix86_function_regparm (type, NULL) >= 3)
3087         {
3088           /* ??? Need to count the actual number of registers to be used,
3089              not the possible number of registers.  Fix later.  */
3090           return false;
3091         }
3092     }
3093
3094   /* Dllimport'd functions are also called indirectly.  */
3095   if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
3096       && decl && DECL_DLLIMPORT_P (decl)
3097       && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
3098     return false;
3099
3100   /* If we forced aligned the stack, then sibcalling would unalign the
3101      stack, which may break the called function.  */
3102   if (cfun->machine->force_align_arg_pointer)
3103     return false;
3104
3105   /* Otherwise okay.  That also includes certain types of indirect calls.  */
3106   return true;
3107 }
3108
3109 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
3110    calling convention attributes;
3111    arguments as in struct attribute_spec.handler.  */
3112
3113 static tree
3114 ix86_handle_cconv_attribute (tree *node, tree name,
3115                                    tree args,
3116                                    int flags ATTRIBUTE_UNUSED,
3117                                    bool *no_add_attrs)
3118 {
3119   if (TREE_CODE (*node) != FUNCTION_TYPE
3120       && TREE_CODE (*node) != METHOD_TYPE
3121       && TREE_CODE (*node) != FIELD_DECL
3122       && TREE_CODE (*node) != TYPE_DECL)
3123     {
3124       warning (OPT_Wattributes, "%qs attribute only applies to functions",
3125                IDENTIFIER_POINTER (name));
3126       *no_add_attrs = true;
3127       return NULL_TREE;
3128     }
3129
3130   /* Can combine regparm with all attributes but fastcall.  */
3131   if (is_attribute_p ("regparm", name))
3132     {
3133       tree cst;
3134
3135       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3136         {
3137           error ("fastcall and regparm attributes are not compatible");
3138         }
3139
3140       cst = TREE_VALUE (args);
3141       if (TREE_CODE (cst) != INTEGER_CST)
3142         {
3143           warning (OPT_Wattributes,
3144                    "%qs attribute requires an integer constant argument",
3145                    IDENTIFIER_POINTER (name));
3146           *no_add_attrs = true;
3147         }
3148       else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3149         {
3150           warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
3151                    IDENTIFIER_POINTER (name), REGPARM_MAX);
3152           *no_add_attrs = true;
3153         }
3154
3155       if (!TARGET_64BIT
3156           && lookup_attribute (ix86_force_align_arg_pointer_string,
3157                                TYPE_ATTRIBUTES (*node))
3158           && compare_tree_int (cst, REGPARM_MAX-1))
3159         {
3160           error ("%s functions limited to %d register parameters",
3161                  ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3162         }
3163
3164       return NULL_TREE;
3165     }
3166
3167   if (TARGET_64BIT)
3168     {
3169       /* Do not warn when emulating the MS ABI.  */
3170       if (TREE_CODE (*node) != FUNCTION_TYPE || !ix86_function_type_abi (*node))
3171         warning (OPT_Wattributes, "%qs attribute ignored",
3172                  IDENTIFIER_POINTER (name));
3173       *no_add_attrs = true;
3174       return NULL_TREE;
3175     }
3176
3177   /* Can combine fastcall with stdcall (redundant) and sseregparm.  */
3178   if (is_attribute_p ("fastcall", name))
3179     {
3180       if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3181         {
3182           error ("fastcall and cdecl attributes are not compatible");
3183         }
3184       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3185         {
3186           error ("fastcall and stdcall attributes are not compatible");
3187         }
3188       if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3189         {
3190           error ("fastcall and regparm attributes are not compatible");
3191         }
3192     }
3193
3194   /* Can combine stdcall with fastcall (redundant), regparm and
3195      sseregparm.  */
3196   else if (is_attribute_p ("stdcall", name))
3197     {
3198       if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3199         {
3200           error ("stdcall and cdecl attributes are not compatible");
3201         }
3202       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3203         {
3204           error ("stdcall and fastcall attributes are not compatible");
3205         }
3206     }
3207
3208   /* Can combine cdecl with regparm and sseregparm.  */
3209   else if (is_attribute_p ("cdecl", name))
3210     {
3211       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3212         {
3213           error ("stdcall and cdecl attributes are not compatible");
3214         }
3215       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3216         {
3217           error ("fastcall and cdecl attributes are not compatible");
3218         }
3219     }
3220
3221   /* Can combine sseregparm with all attributes.  */
3222
3223   return NULL_TREE;
3224 }
3225
3226 /* Return 0 if the attributes for two types are incompatible, 1 if they
3227    are compatible, and 2 if they are nearly compatible (which causes a
3228    warning to be generated).  */
3229
3230 static int
3231 ix86_comp_type_attributes (const_tree type1, const_tree type2)
3232 {
3233   /* Check for mismatch of non-default calling convention.  */
3234   const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
3235
3236   if (TREE_CODE (type1) != FUNCTION_TYPE
3237       && TREE_CODE (type1) != METHOD_TYPE)
3238     return 1;
3239
3240   /* Check for mismatched fastcall/regparm types.  */
3241   if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3242        != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3243       || (ix86_function_regparm (type1, NULL)
3244           != ix86_function_regparm (type2, NULL)))
3245     return 0;
3246
3247   /* Check for mismatched sseregparm types.  */
3248   if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3249       != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
3250     return 0;
3251
3252   /* Check for mismatched return types (cdecl vs stdcall).  */
3253   if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3254       != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
3255     return 0;
3256
3257   return 1;
3258 }
3259 \f
3260 /* Return the regparm value for a function with the indicated TYPE and DECL.
3261    DECL may be NULL when calling function indirectly
3262    or considering a libcall.  */
3263
3264 static int
3265 ix86_function_regparm (const_tree type, const_tree decl)
3266 {
3267   tree attr;
3268   int regparm = ix86_regparm;
3269
3270   static bool error_issued;
3271
3272   if (TARGET_64BIT)
3273     {
3274       if (ix86_function_type_abi (type) == DEFAULT_ABI)
3275         return regparm;
3276       return DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
3277     }
3278
3279   attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3280   if (attr)
3281     {
3282       regparm
3283         = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3284
3285       if (decl && TREE_CODE (decl) == FUNCTION_DECL)
3286         {
3287           /* We can't use regparm(3) for nested functions because
3288              these pass static chain pointer in %ecx register.  */
3289           if (!error_issued && regparm == 3
3290               && decl_function_context (decl)
3291               && !DECL_NO_STATIC_CHAIN (decl))
3292             {
3293               error ("nested functions are limited to 2 register parameters");
3294               error_issued = true;
3295               return 0;
3296             }
3297         }
3298
3299       return regparm;
3300     }
3301
3302   if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3303     return 2;
3304
3305   /* Use register calling convention for local functions when possible.  */
3306   if (decl && TREE_CODE (decl) == FUNCTION_DECL
3307       && flag_unit_at_a_time && !profile_flag)
3308     {
3309       /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified.  */
3310       struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3311       if (i && i->local)
3312         {
3313           int local_regparm, globals = 0, regno;
3314           struct function *f;
3315
3316           /* Make sure no regparm register is taken by a
3317              fixed register variable.  */
3318           for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
3319             if (fixed_regs[local_regparm])
3320               break;
3321
3322           /* We can't use regparm(3) for nested functions as these use
3323              static chain pointer in third argument.  */
3324           if (local_regparm == 3
3325               && (decl_function_context (decl)
3326                   || ix86_force_align_arg_pointer)
3327               && !DECL_NO_STATIC_CHAIN (decl))
3328             local_regparm = 2;
3329
3330           /* If the function realigns its stackpointer, the prologue will
3331              clobber %ecx.  If we've already generated code for the callee,
3332              the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3333              scanning the attributes for the self-realigning property.  */
3334           f = DECL_STRUCT_FUNCTION (decl);
3335           if (local_regparm == 3
3336               && (f ? !!f->machine->force_align_arg_pointer
3337                   : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3338                                         TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3339             local_regparm = 2;
3340
3341           /* Each fixed register usage increases register pressure,
3342              so less registers should be used for argument passing.
3343              This functionality can be overriden by an explicit
3344              regparm value.  */
3345           for (regno = 0; regno <= DI_REG; regno++)
3346             if (fixed_regs[regno])
3347               globals++;
3348
3349           local_regparm
3350             = globals < local_regparm ? local_regparm - globals : 0;
3351
3352           if (local_regparm > regparm)
3353             regparm = local_regparm;
3354         }
3355     }
3356
3357   return regparm;
3358 }
3359
3360 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3361    DFmode (2) arguments in SSE registers for a function with the
3362    indicated TYPE and DECL.  DECL may be NULL when calling function
3363    indirectly or considering a libcall.  Otherwise return 0.  */
3364
3365 static int
3366 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
3367 {
3368   gcc_assert (!TARGET_64BIT);
3369
3370   /* Use SSE registers to pass SFmode and DFmode arguments if requested
3371      by the sseregparm attribute.  */
3372   if (TARGET_SSEREGPARM
3373       || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3374     {
3375       if (!TARGET_SSE)
3376         {
3377           if (warn)
3378             {
3379               if (decl)
3380                 error ("Calling %qD with attribute sseregparm without "
3381                        "SSE/SSE2 enabled", decl);
3382               else
3383                 error ("Calling %qT with attribute sseregparm without "
3384                        "SSE/SSE2 enabled", type);
3385             }
3386           return 0;
3387         }
3388
3389       return 2;
3390     }
3391
3392   /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3393      (and DFmode for SSE2) arguments in SSE registers.  */
3394   if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3395     {
3396       /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified.  */
3397       struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3398       if (i && i->local)
3399         return TARGET_SSE2 ? 2 : 1;
3400     }
3401
3402   return 0;
3403 }
3404
3405 /* Return true if EAX is live at the start of the function.  Used by
3406    ix86_expand_prologue to determine if we need special help before
3407    calling allocate_stack_worker.  */
3408
3409 static bool
3410 ix86_eax_live_at_start_p (void)
3411 {
3412   /* Cheat.  Don't bother working forward from ix86_function_regparm
3413      to the function type to whether an actual argument is located in
3414      eax.  Instead just look at cfg info, which is still close enough
3415      to correct at this point.  This gives false positives for broken
3416      functions that might use uninitialized data that happens to be
3417      allocated in eax, but who cares?  */
3418   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3419 }
3420
3421 /* Value is the number of bytes of arguments automatically
3422    popped when returning from a subroutine call.
3423    FUNDECL is the declaration node of the function (as a tree),
3424    FUNTYPE is the data type of the function (as a tree),
3425    or for a library call it is an identifier node for the subroutine name.
3426    SIZE is the number of bytes of arguments passed on the stack.
3427
3428    On the 80386, the RTD insn may be used to pop them if the number
3429      of args is fixed, but if the number is variable then the caller
3430      must pop them all.  RTD can't be used for library calls now
3431      because the library is compiled with the Unix compiler.
3432    Use of RTD is a selectable option, since it is incompatible with
3433    standard Unix calling sequences.  If the option is not selected,
3434    the caller must always pop the args.
3435
3436    The attribute stdcall is equivalent to RTD on a per module basis.  */
3437
3438 int
3439 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3440 {
3441   int rtd;
3442
3443   /* None of the 64-bit ABIs pop arguments.  */
3444   if (TARGET_64BIT)
3445     return 0;
3446
3447   rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3448
3449   /* Cdecl functions override -mrtd, and never pop the stack.  */
3450   if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3451     {
3452       /* Stdcall and fastcall functions will pop the stack if not
3453          variable args.  */
3454       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3455           || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3456         rtd = 1;
3457
3458       if (rtd && ! stdarg_p (funtype))
3459         return size;
3460     }
3461
3462   /* Lose any fake structure return argument if it is passed on the stack.  */
3463   if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3464       && !KEEP_AGGREGATE_RETURN_POINTER)
3465     {
3466       int nregs = ix86_function_regparm (funtype, fundecl);
3467       if (nregs == 0)
3468         return GET_MODE_SIZE (Pmode);
3469     }
3470
3471   return 0;
3472 }
3473 \f
3474 /* Argument support functions.  */
3475
3476 /* Return true when register may be used to pass function parameters.  */
3477 bool
3478 ix86_function_arg_regno_p (int regno)
3479 {
3480   int i;
3481   const int *parm_regs;
3482
3483   if (!TARGET_64BIT)
3484     {
3485       if (TARGET_MACHO)
3486         return (regno < REGPARM_MAX
3487                 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3488       else
3489         return (regno < REGPARM_MAX
3490                 || (TARGET_MMX && MMX_REGNO_P (regno)
3491                     && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3492                 || (TARGET_SSE && SSE_REGNO_P (regno)
3493                     && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3494     }
3495
3496   if (TARGET_MACHO)
3497     {
3498       if (SSE_REGNO_P (regno) && TARGET_SSE)
3499         return true;
3500     }
3501   else
3502     {
3503       if (TARGET_SSE && SSE_REGNO_P (regno)
3504           && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3505         return true;
3506     }
3507
3508   /* TODO: The function should depend on current function ABI but
3509      builtins.c would need updating then. Therefore we use the
3510      default ABI.  */
3511
3512   /* RAX is used as hidden argument to va_arg functions.  */
3513   if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
3514     return true;
3515
3516   if (DEFAULT_ABI == MS_ABI)
3517     parm_regs = x86_64_ms_abi_int_parameter_registers;
3518   else
3519     parm_regs = x86_64_int_parameter_registers;
3520   for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
3521                                          : X86_64_REGPARM_MAX); i++)
3522     if (regno == parm_regs[i])
3523       return true;
3524   return false;
3525 }
3526
3527 /* Return if we do not know how to pass TYPE solely in registers.  */
3528
3529 static bool
3530 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3531 {
3532   if (must_pass_in_stack_var_size_or_pad (mode, type))
3533     return true;
3534
3535   /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
3536      The layout_type routine is crafty and tries to trick us into passing
3537      currently unsupported vector types on the stack by using TImode.  */
3538   return (!TARGET_64BIT && mode == TImode
3539           && type && TREE_CODE (type) != VECTOR_TYPE);
3540 }
3541
3542 /* It returns the size, in bytes, of the area reserved for arguments passed
3543    in registers for the function represented by fndecl dependent to the used
3544    abi format.  */
3545 int
3546 ix86_reg_parm_stack_space (const_tree fndecl)
3547 {
3548   int call_abi = 0;
3549   /* For libcalls it is possible that there is no fndecl at hand.
3550      Therefore assume for this case the default abi of the target.  */
3551   if (!fndecl)
3552     call_abi = DEFAULT_ABI;
3553   else
3554     call_abi = ix86_function_abi (fndecl);
3555   if (call_abi == 1)
3556     return 32;
3557   return 0;
3558 }
3559
3560 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
3561    call abi used.  */
3562 int
3563 ix86_function_type_abi (const_tree fntype)
3564 {
3565   if (TARGET_64BIT && fntype != NULL)
3566     {
3567       int abi;
3568       if (DEFAULT_ABI == SYSV_ABI)
3569         abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
3570       else
3571         abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
3572
3573       if (DEFAULT_ABI == MS_ABI && abi == SYSV_ABI)
3574         sorry ("using sysv calling convention on target w64 is not supported");
3575
3576       return abi;
3577     }
3578   return DEFAULT_ABI;
3579 }
3580
3581 int
3582 ix86_function_abi (const_tree fndecl)
3583 {
3584   if (! fndecl)
3585     return DEFAULT_ABI;
3586   return ix86_function_type_abi (TREE_TYPE (fndecl));
3587 }
3588
3589 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
3590    call abi used.  */
3591 int
3592 ix86_cfun_abi (void)
3593 {
3594   if (! cfun || ! TARGET_64BIT)
3595     return DEFAULT_ABI;
3596   return cfun->machine->call_abi;
3597 }
3598
3599 /* regclass.c  */
3600 extern void init_regs (void);
3601
3602 /* Implementation of call abi switching target hook. Specific to FNDECL
3603    the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
3604    for more details.
3605    To prevent redudant calls of costy function init_regs (), it checks not to
3606    reset register usage for default abi.  */
3607 void
3608 ix86_call_abi_override (const_tree fndecl)
3609 {
3610   if (fndecl == NULL_TREE)
3611     cfun->machine->call_abi = DEFAULT_ABI;
3612   else
3613     cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
3614   if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
3615     {
3616       if (call_used_regs[4 /*RSI*/] != 0 || call_used_regs[5 /*RDI*/] != 0)
3617         {
3618           call_used_regs[4 /*RSI*/] = 0;
3619           call_used_regs[5 /*RDI*/] = 0;
3620           init_regs ();
3621         }
3622     }
3623   else if (TARGET_64BIT)
3624     {
3625       if (call_used_regs[4 /*RSI*/] != 1 || call_used_regs[5 /*RDI*/] != 1)
3626         {
3627           call_used_regs[4 /*RSI*/] = 1;
3628           call_used_regs[5 /*RDI*/] = 1;
3629           init_regs ();
3630         }
3631     }
3632 }
3633
3634 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3635    for a call to a function whose data type is FNTYPE.
3636    For a library call, FNTYPE is 0.  */
3637
3638 void
3639 init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
3640                       tree fntype,      /* tree ptr for function decl */
3641                       rtx libname,      /* SYMBOL_REF of library name or 0 */
3642                       tree fndecl)
3643 {
3644   struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
3645   memset (cum, 0, sizeof (*cum));
3646
3647   cum->call_abi = ix86_function_type_abi (fntype);
3648   /* Set up the number of registers to use for passing arguments.  */
3649   cum->nregs = ix86_regparm;
3650   if (TARGET_64BIT)
3651     {
3652       if (cum->call_abi != DEFAULT_ABI)
3653         cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
3654                                              : X64_REGPARM_MAX;
3655     }
3656   if (TARGET_SSE)
3657     {
3658       cum->sse_nregs = SSE_REGPARM_MAX;
3659       if (TARGET_64BIT)
3660         {
3661           if (cum->call_abi != DEFAULT_ABI)
3662             cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
3663                                                      : X64_SSE_REGPARM_MAX;
3664         }
3665     }
3666   if (TARGET_MMX)
3667     cum->mmx_nregs = MMX_REGPARM_MAX;
3668   cum->warn_sse = true;
3669   cum->warn_mmx = true;
3670
3671   /* Because type might mismatch in between caller and callee, we need to
3672      use actual type of function for local calls.
3673      FIXME: cgraph_analyze can be told to actually record if function uses
3674      va_start so for local functions maybe_vaarg can be made aggressive
3675      helping K&R code.
3676      FIXME: once typesytem is fixed, we won't need this code anymore.  */
3677   if (i && i->local)
3678     fntype = TREE_TYPE (fndecl);
3679   cum->maybe_vaarg = (fntype
3680                       ? (!prototype_p (fntype) || stdarg_p (fntype))
3681                       : !libname);
3682
3683   if (!TARGET_64BIT)
3684     {
3685       /* If there are variable arguments, then we won't pass anything
3686          in registers in 32-bit mode. */
3687       if (stdarg_p (fntype))
3688         {
3689           cum->nregs = 0;
3690           cum->sse_nregs = 0;
3691           cum->mmx_nregs = 0;
3692           cum->warn_sse = 0;
3693           cum->warn_mmx = 0;
3694           return;
3695         }
3696
3697       /* Use ecx and edx registers if function has fastcall attribute,
3698          else look for regparm information.  */
3699       if (fntype)
3700         {
3701           if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3702             {
3703               cum->nregs = 2;
3704               cum->fastcall = 1;
3705             }
3706           else
3707             cum->nregs = ix86_function_regparm (fntype, fndecl);
3708         }
3709
3710       /* Set up the number of SSE registers used for passing SFmode
3711          and DFmode arguments.  Warn for mismatching ABI.  */
3712       cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
3713     }
3714 }
3715
3716 /* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
3717    But in the case of vector types, it is some vector mode.
3718
3719    When we have only some of our vector isa extensions enabled, then there
3720    are some modes for which vector_mode_supported_p is false.  For these
3721    modes, the generic vector support in gcc will choose some non-vector mode
3722    in order to implement the type.  By computing the natural mode, we'll
3723    select the proper ABI location for the operand and not depend on whatever
3724    the middle-end decides to do with these vector types.  */
3725
3726 static enum machine_mode
3727 type_natural_mode (const_tree type)
3728 {
3729   enum machine_mode mode = TYPE_MODE (type);
3730
3731   if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3732     {
3733       HOST_WIDE_INT size = int_size_in_bytes (type);
3734       if ((size == 8 || size == 16)
3735           /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
3736           && TYPE_VECTOR_SUBPARTS (type) > 1)
3737         {
3738           enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3739
3740           if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3741             mode = MIN_MODE_VECTOR_FLOAT;
3742           else
3743             mode = MIN_MODE_VECTOR_INT;
3744
3745           /* Get the mode which has this inner mode and number of units.  */
3746           for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3747             if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3748                 && GET_MODE_INNER (mode) == innermode)
3749               return mode;
3750
3751           gcc_unreachable ();
3752         }
3753     }
3754
3755   return mode;
3756 }
3757
3758 /* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
3759    this may not agree with the mode that the type system has chosen for the
3760    register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
3761    go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
3762
3763 static rtx
3764 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3765                      unsigned int regno)
3766 {
3767   rtx tmp;
3768
3769   if (orig_mode != BLKmode)
3770     tmp = gen_rtx_REG (orig_mode, regno);
3771   else
3772     {
3773       tmp = gen_rtx_REG (mode, regno);
3774       tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3775       tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3776     }
3777
3778   return tmp;
3779 }
3780
3781 /* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
3782    of this code is to classify each 8bytes of incoming argument by the register
3783    class and assign registers accordingly.  */
3784
3785 /* Return the union class of CLASS1 and CLASS2.
3786    See the x86-64 PS ABI for details.  */
3787
3788 static enum x86_64_reg_class
3789 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3790 {
3791   /* Rule #1: If both classes are equal, this is the resulting class.  */
3792   if (class1 == class2)
3793     return class1;
3794
3795   /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3796      the other class.  */
3797   if (class1 == X86_64_NO_CLASS)
3798     return class2;
3799   if (class2 == X86_64_NO_CLASS)
3800     return class1;
3801
3802   /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
3803   if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3804     return X86_64_MEMORY_CLASS;
3805
3806   /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
3807   if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3808       || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3809     return X86_64_INTEGERSI_CLASS;
3810   if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3811       || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3812     return X86_64_INTEGER_CLASS;
3813
3814   /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3815      MEMORY is used.  */
3816   if (class1 == X86_64_X87_CLASS
3817       || class1 == X86_64_X87UP_CLASS
3818       || class1 == X86_64_COMPLEX_X87_CLASS
3819       || class2 == X86_64_X87_CLASS
3820       || class2 == X86_64_X87UP_CLASS
3821       || class2 == X86_64_COMPLEX_X87_CLASS)
3822     return X86_64_MEMORY_CLASS;
3823
3824   /* Rule #6: Otherwise class SSE is used.  */
3825   return X86_64_SSE_CLASS;
3826 }
3827
3828 /* Classify the argument of type TYPE and mode MODE.
3829    CLASSES will be filled by the register class used to pass each word
3830    of the operand.  The number of words is returned.  In case the parameter
3831    should be passed in memory, 0 is returned. As a special case for zero
3832    sized containers, classes[0] will be NO_CLASS and 1 is returned.
3833
3834    BIT_OFFSET is used internally for handling records and specifies offset
3835    of the offset in bits modulo 256 to avoid overflow cases.
3836
3837    See the x86-64 PS ABI for details.
3838 */
3839
3840 static int
3841 classify_argument (enum machine_mode mode, const_tree type,
3842                    enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3843 {
3844   HOST_WIDE_INT bytes =
3845     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3846   int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3847
3848   /* Variable sized entities are always passed/returned in memory.  */
3849   if (bytes < 0)
3850     return 0;
3851
3852   if (mode != VOIDmode
3853       && targetm.calls.must_pass_in_stack (mode, type))
3854     return 0;
3855
3856   if (type && AGGREGATE_TYPE_P (type))
3857     {
3858       int i;
3859       tree field;
3860       enum x86_64_reg_class subclasses[MAX_CLASSES];
3861
3862       /* On x86-64 we pass structures larger than 16 bytes on the stack.  */
3863       if (bytes > 16)
3864         return 0;
3865
3866       for (i = 0; i < words; i++)
3867         classes[i] = X86_64_NO_CLASS;
3868
3869       /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
3870          signalize memory class, so handle it as special case.  */
3871       if (!words)
3872         {
3873           classes[0] = X86_64_NO_CLASS;
3874           return 1;
3875         }
3876
3877       /* Classify each field of record and merge classes.  */
3878       switch (TREE_CODE (type))
3879         {
3880         case RECORD_TYPE:
3881           /* And now merge the fields of structure.  */
3882           for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3883             {
3884               if (TREE_CODE (field) == FIELD_DECL)
3885                 {
3886                   int num;
3887
3888                   if (TREE_TYPE (field) == error_mark_node)
3889                     continue;
3890
3891                   /* Bitfields are always classified as integer.  Handle them
3892                      early, since later code would consider them to be
3893                      misaligned integers.  */
3894                   if (DECL_BIT_FIELD (field))
3895                     {
3896                       for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3897                            i < ((int_bit_position (field) + (bit_offset % 64))
3898                                 + tree_low_cst (DECL_SIZE (field), 0)
3899                                 + 63) / 8 / 8; i++)
3900                         classes[i] =
3901                           merge_classes (X86_64_INTEGER_CLASS,
3902                                          classes[i]);
3903                     }
3904                   else
3905                     {
3906                       num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3907                                                TREE_TYPE (field), subclasses,
3908                                                (int_bit_position (field)
3909                                                 + bit_offset) % 256);
3910                       if (!num)
3911                         return 0;
3912                       for (i = 0; i < num; i++)
3913                         {
3914                           int pos =
3915                             (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3916                           classes[i + pos] =
3917                             merge_classes (subclasses[i], classes[i + pos]);
3918                         }
3919                     }
3920                 }
3921             }
3922           break;
3923
3924         case ARRAY_TYPE:
3925           /* Arrays are handled as small records.  */
3926           {
3927             int num;
3928             num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3929                                      TREE_TYPE (type), subclasses, bit_offset);
3930             if (!num)
3931               return 0;
3932
3933             /* The partial classes are now full classes.  */
3934             if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3935               subclasses[0] = X86_64_SSE_CLASS;
3936             if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3937               subclasses[0] = X86_64_INTEGER_CLASS;
3938
3939             for (i = 0; i < words; i++)
3940               classes[i] = subclasses[i % num];
3941
3942             break;
3943           }
3944         case UNION_TYPE:
3945         case QUAL_UNION_TYPE:
3946           /* Unions are similar to RECORD_TYPE but offset is always 0.
3947              */
3948           for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3949             {
3950               if (TREE_CODE (field) == FIELD_DECL)
3951                 {
3952                   int num;
3953
3954                   if (TREE_TYPE (field) == error_mark_node)
3955                     continue;
3956
3957                   num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3958                                            TREE_TYPE (field), subclasses,
3959                                            bit_offset);
3960                   if (!num)
3961                     return 0;
3962                   for (i = 0; i < num; i++)
3963                     classes[i] = merge_classes (subclasses[i], classes[i]);
3964                 }
3965             }
3966           break;
3967
3968         default:
3969           gcc_unreachable ();
3970         }
3971
3972       /* Final merger cleanup.  */
3973       for (i = 0; i < words; i++)
3974         {
3975           /* If one class is MEMORY, everything should be passed in
3976              memory.  */
3977           if (classes[i] == X86_64_MEMORY_CLASS)
3978             return 0;
3979
3980           /* The X86_64_SSEUP_CLASS should be always preceded by
3981              X86_64_SSE_CLASS.  */
3982           if (classes[i] == X86_64_SSEUP_CLASS
3983               && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3984             classes[i] = X86_64_SSE_CLASS;
3985
3986           /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
3987           if (classes[i] == X86_64_X87UP_CLASS
3988               && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3989             classes[i] = X86_64_SSE_CLASS;
3990         }
3991       return words;
3992     }
3993
3994   /* Compute alignment needed.  We align all types to natural boundaries with
3995      exception of XFmode that is aligned to 64bits.  */
3996   if (mode != VOIDmode && mode != BLKmode)
3997     {
3998       int mode_alignment = GET_MODE_BITSIZE (mode);
3999
4000       if (mode == XFmode)
4001         mode_alignment = 128;
4002       else if (mode == XCmode)
4003         mode_alignment = 256;
4004       if (COMPLEX_MODE_P (mode))
4005         mode_alignment /= 2;
4006       /* Misaligned fields are always returned in memory.  */
4007       if (bit_offset % mode_alignment)
4008         return 0;
4009     }
4010
4011   /* for V1xx modes, just use the base mode */
4012   if (VECTOR_MODE_P (mode) && mode != V1DImode
4013       && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
4014     mode = GET_MODE_INNER (mode);
4015
4016   /* Classification of atomic types.  */
4017   switch (mode)
4018     {
4019     case SDmode:
4020     case DDmode:
4021       classes[0] = X86_64_SSE_CLASS;
4022       return 1;
4023     case TDmode:
4024       classes[0] = X86_64_SSE_CLASS;
4025       classes[1] = X86_64_SSEUP_CLASS;
4026       return 2;
4027     case DImode:
4028     case SImode:
4029     case HImode:
4030     case QImode:
4031     case CSImode:
4032     case CHImode:
4033     case CQImode:
4034       if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
4035         classes[0] = X86_64_INTEGERSI_CLASS;
4036       else
4037         classes[0] = X86_64_INTEGER_CLASS;
4038       return 1;
4039     case CDImode:
4040     case TImode:
4041       classes[0] = classes[1] = X86_64_INTEGER_CLASS;
4042       return 2;
4043     case CTImode:
4044       return 0;
4045     case SFmode:
4046       if (!(bit_offset % 64))
4047         classes[0] = X86_64_SSESF_CLASS;
4048       else
4049         classes[0] = X86_64_SSE_CLASS;
4050       return 1;
4051     case DFmode:
4052       classes[0] = X86_64_SSEDF_CLASS;
4053       return 1;
4054     case XFmode:
4055       classes[0] = X86_64_X87_CLASS;
4056       classes[1] = X86_64_X87UP_CLASS;
4057       return 2;
4058     case TFmode:
4059       classes[0] = X86_64_SSE_CLASS;
4060       classes[1] = X86_64_SSEUP_CLASS;
4061       return 2;
4062     case SCmode:
4063       classes[0] = X86_64_SSE_CLASS;
4064       return 1;
4065     case DCmode:
4066       classes[0] = X86_64_SSEDF_CLASS;
4067       classes[1] = X86_64_SSEDF_CLASS;
4068       return 2;
4069     case XCmode:
4070       classes[0] = X86_64_COMPLEX_X87_CLASS;
4071       return 1;
4072     case TCmode:
4073       /* This modes is larger than 16 bytes.  */
4074       return 0;
4075     case V4SFmode:
4076     case V4SImode:
4077     case V16QImode:
4078     case V8HImode:
4079     case V2DFmode:
4080     case V2DImode:
4081       classes[0] = X86_64_SSE_CLASS;
4082       classes[1] = X86_64_SSEUP_CLASS;
4083       return 2;
4084     case V1DImode:
4085     case V2SFmode:
4086     case V2SImode:
4087     case V4HImode:
4088     case V8QImode:
4089       classes[0] = X86_64_SSE_CLASS;
4090       return 1;
4091     case BLKmode:
4092     case VOIDmode:
4093       return 0;
4094     default:
4095       gcc_assert (VECTOR_MODE_P (mode));
4096
4097       if (bytes > 16)
4098         return 0;
4099
4100       gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
4101
4102       if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
4103         classes[0] = X86_64_INTEGERSI_CLASS;
4104       else
4105         classes[0] = X86_64_INTEGER_CLASS;
4106       classes[1] = X86_64_INTEGER_CLASS;
4107       return 1 + (bytes > 8);
4108     }
4109 }
4110
4111 /* Examine the argument and return set number of register required in each
4112    class.  Return 0 iff parameter should be passed in memory.  */
4113 static int
4114 examine_argument (enum machine_mode mode, const_tree type, int in_return,
4115                   int *int_nregs, int *sse_nregs)
4116 {
4117   enum x86_64_reg_class regclass[MAX_CLASSES];
4118   int n = classify_argument (mode, type, regclass, 0);
4119
4120   *int_nregs = 0;
4121   *sse_nregs = 0;
4122   if (!n)
4123     return 0;
4124   for (n--; n >= 0; n--)
4125     switch (regclass[n])
4126       {
4127       case X86_64_INTEGER_CLASS:
4128       case X86_64_INTEGERSI_CLASS:
4129         (*int_nregs)++;
4130         break;
4131       case X86_64_SSE_CLASS:
4132       case X86_64_SSESF_CLASS:
4133       case X86_64_SSEDF_CLASS:
4134         (*sse_nregs)++;
4135         break;
4136       case X86_64_NO_CLASS:
4137       case X86_64_SSEUP_CLASS:
4138         break;
4139       case X86_64_X87_CLASS:
4140       case X86_64_X87UP_CLASS:
4141         if (!in_return)
4142           return 0;
4143         break;
4144       case X86_64_COMPLEX_X87_CLASS:
4145         return in_return ? 2 : 0;
4146       case X86_64_MEMORY_CLASS:
4147         gcc_unreachable ();
4148       }
4149   return 1;
4150 }
4151
4152 /* Construct container for the argument used by GCC interface.  See
4153    FUNCTION_ARG for the detailed description.  */
4154
4155 static rtx
4156 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
4157                      const_tree type, int in_return, int nintregs, int nsseregs,
4158                      const int *intreg, int sse_regno)
4159 {
4160   /* The following variables hold the static issued_error state.  */
4161   static bool issued_sse_arg_error;
4162   static bool issued_sse_ret_error;
4163   static bool issued_x87_ret_error;
4164
4165   enum machine_mode tmpmode;
4166   int bytes =
4167     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4168   enum x86_64_reg_class regclass[MAX_CLASSES];
4169   int n;
4170   int i;
4171   int nexps = 0;
4172   int needed_sseregs, needed_intregs;
4173   rtx exp[MAX_CLASSES];
4174   rtx ret;
4175
4176   n = classify_argument (mode, type, regclass, 0);
4177   if (!n)
4178     return NULL;
4179   if (!examine_argument (mode, type, in_return, &needed_intregs,
4180                          &needed_sseregs))
4181     return NULL;
4182   if (needed_intregs > nintregs || needed_sseregs > nsseregs)
4183     return NULL;
4184
4185   /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
4186      some less clueful developer tries to use floating-point anyway.  */
4187   if (needed_sseregs && !TARGET_SSE)
4188     {
4189       if (in_return)
4190         {
4191           if (!issued_sse_ret_error)
4192             {
4193               error ("SSE register return with SSE disabled");
4194               issued_sse_ret_error = true;
4195             }
4196         }
4197       else if (!issued_sse_arg_error)
4198         {
4199           error ("SSE register argument with SSE disabled");
4200           issued_sse_arg_error = true;
4201         }
4202       return NULL;
4203     }
4204
4205   /* Likewise, error if the ABI requires us to return values in the
4206      x87 registers and the user specified -mno-80387.  */
4207   if (!TARGET_80387 && in_return)
4208     for (i = 0; i < n; i++)
4209       if (regclass[i] == X86_64_X87_CLASS
4210           || regclass[i] == X86_64_X87UP_CLASS
4211           || regclass[i] == X86_64_COMPLEX_X87_CLASS)
4212         {
4213           if (!issued_x87_ret_error)
4214             {
4215               error ("x87 register return with x87 disabled");
4216               issued_x87_ret_error = true;
4217             }
4218           return NULL;
4219         }
4220
4221   /* First construct simple cases.  Avoid SCmode, since we want to use
4222      single register to pass this type.  */
4223   if (n == 1 && mode != SCmode)
4224     switch (regclass[0])
4225       {
4226       case X86_64_INTEGER_CLASS:
4227       case X86_64_INTEGERSI_CLASS:
4228         return gen_rtx_REG (mode, intreg[0]);
4229       case X86_64_SSE_CLASS:
4230       case X86_64_SSESF_CLASS:
4231       case X86_64_SSEDF_CLASS:
4232         return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
4233       case X86_64_X87_CLASS:
4234       case X86_64_COMPLEX_X87_CLASS:
4235         return gen_rtx_REG (mode, FIRST_STACK_REG);
4236       case X86_64_NO_CLASS:
4237         /* Zero sized array, struct or class.  */
4238         return NULL;
4239       default:
4240         gcc_unreachable ();
4241       }
4242   if (n == 2 && regclass[0] == X86_64_SSE_CLASS
4243       && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
4244     return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
4245
4246   if (n == 2
4247       && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
4248     return gen_rtx_REG (XFmode, FIRST_STACK_REG);
4249   if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
4250       && regclass[1] == X86_64_INTEGER_CLASS
4251       && (mode == CDImode || mode == TImode || mode == TFmode)
4252       && intreg[0] + 1 == intreg[1])
4253     return gen_rtx_REG (mode, intreg[0]);
4254
4255   /* Otherwise figure out the entries of the PARALLEL.  */
4256   for (i = 0; i < n; i++)
4257     {
4258       switch (regclass[i])
4259         {
4260           case X86_64_NO_CLASS:
4261             break;
4262           case X86_64_INTEGER_CLASS:
4263           case X86_64_INTEGERSI_CLASS:
4264             /* Merge TImodes on aligned occasions here too.  */
4265             if (i * 8 + 8 > bytes)
4266               tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
4267             else if (regclass[i] == X86_64_INTEGERSI_CLASS)
4268               tmpmode = SImode;
4269             else
4270               tmpmode = DImode;
4271             /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
4272             if (tmpmode == BLKmode)
4273               tmpmode = DImode;
4274             exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4275                                                gen_rtx_REG (tmpmode, *intreg),
4276                                                GEN_INT (i*8));
4277             intreg++;
4278             break;
4279           case X86_64_SSESF_CLASS:
4280             exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4281                                                gen_rtx_REG (SFmode,
4282                                                             SSE_REGNO (sse_regno)),
4283                                                GEN_INT (i*8));
4284             sse_regno++;
4285             break;
4286           case X86_64_SSEDF_CLASS:
4287             exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4288                                                gen_rtx_REG (DFmode,
4289                                                             SSE_REGNO (sse_regno)),
4290                                                GEN_INT (i*8));
4291             sse_regno++;
4292             break;
4293           case X86_64_SSE_CLASS:
4294             if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
4295               tmpmode = TImode;
4296             else
4297               tmpmode = DImode;
4298             exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4299                                                gen_rtx_REG (tmpmode,
4300                                                             SSE_REGNO (sse_regno)),
4301                                                GEN_INT (i*8));
4302             if (tmpmode == TImode)
4303               i++;
4304             sse_regno++;
4305             break;
4306           default:
4307             gcc_unreachable ();
4308         }
4309     }
4310
4311   /* Empty aligned struct, union or class.  */
4312   if (nexps == 0)
4313     return NULL;
4314
4315   ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4316   for (i = 0; i < nexps; i++)
4317     XVECEXP (ret, 0, i) = exp [i];
4318   return ret;
4319 }
4320
4321 /* Update the data in CUM to advance over an argument of mode MODE
4322    and data type TYPE.  (TYPE is null for libcalls where that information
4323    may not be available.)  */
4324
4325 static void
4326 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4327                          tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4328 {
4329   switch (mode)
4330     {
4331     default:
4332       break;
4333
4334     case BLKmode:
4335       if (bytes < 0)
4336         break;
4337       /* FALLTHRU */
4338
4339     case DImode:
4340     case SImode:
4341     case HImode:
4342     case QImode:
4343       cum->words += words;
4344       cum->nregs -= words;
4345       cum->regno += words;
4346
4347       if (cum->nregs <= 0)
4348         {
4349           cum->nregs = 0;
4350           cum->regno = 0;
4351         }
4352       break;
4353
4354     case DFmode:
4355       if (cum->float_in_sse < 2)
4356         break;
4357     case SFmode:
4358       if (cum->float_in_sse < 1)
4359         break;
4360       /* FALLTHRU */
4361
4362     case TImode:
4363     case V16QImode:
4364     case V8HImode:
4365     case V4SImode:
4366     case V2DImode:
4367     case V4SFmode:
4368     case V2DFmode:
4369       if (!type || !AGGREGATE_TYPE_P (type))
4370         {
4371           cum->sse_words += words;
4372           cum->sse_nregs -= 1;
4373           cum->sse_regno += 1;
4374           if (cum->sse_nregs <= 0)
4375             {
4376               cum->sse_nregs = 0;
4377               cum->sse_regno = 0;
4378             }
4379         }
4380       break;
4381
4382     case V8QImode:
4383     case V4HImode:
4384     case V2SImode:
4385     case V2SFmode:
4386     case V1DImode:
4387       if (!type || !AGGREGATE_TYPE_P (type))
4388         {
4389           cum->mmx_words += words;
4390           cum->mmx_nregs -= 1;
4391           cum->mmx_regno += 1;
4392           if (cum->mmx_nregs <= 0)
4393             {
4394               cum->mmx_nregs = 0;
4395               cum->mmx_regno = 0;
4396             }
4397         }
4398       break;
4399     }
4400 }
4401
4402 static void
4403 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4404                          tree type, HOST_WIDE_INT words)
4405 {
4406   int int_nregs, sse_nregs;
4407
4408   if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4409     cum->words += words;
4410   else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4411     {
4412       cum->nregs -= int_nregs;
4413       cum->sse_nregs -= sse_nregs;
4414       cum->regno += int_nregs;
4415       cum->sse_regno += sse_nregs;
4416     }
4417   else
4418     cum->words += words;
4419 }
4420
4421 static void
4422 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4423                             HOST_WIDE_INT words)
4424 {
4425   /* Otherwise, this should be passed indirect.  */
4426   gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4427
4428   cum->words += words;
4429   if (cum->nregs > 0)
4430     {
4431       cum->nregs -= 1;
4432       cum->regno += 1;
4433     }
4434 }
4435
4436 void
4437 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4438                       tree type, int named ATTRIBUTE_UNUSED)
4439 {
4440   HOST_WIDE_INT bytes, words;
4441
4442   if (mode == BLKmode)
4443     bytes = int_size_in_bytes (type);
4444   else
4445     bytes = GET_MODE_SIZE (mode);
4446   words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4447
4448   if (type)
4449     mode = type_natural_mode (type);
4450
4451   if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
4452     function_arg_advance_ms_64 (cum, bytes, words);
4453   else if (TARGET_64BIT)
4454     function_arg_advance_64 (cum, mode, type, words);
4455   else
4456     function_arg_advance_32 (cum, mode, type, bytes, words);
4457 }
4458
4459 /* Define where to put the arguments to a function.
4460    Value is zero to push the argument on the stack,
4461    or a hard register in which to store the argument.
4462
4463    MODE is the argument's machine mode.
4464    TYPE is the data type of the argument (as a tree).
4465     This is null for libcalls where that information may
4466     not be available.
4467    CUM is a variable of type CUMULATIVE_ARGS which gives info about
4468     the preceding args and about the function being called.
4469    NAMED is nonzero if this argument is a named parameter
4470     (otherwise it is an extra parameter matching an ellipsis).  */
4471
4472 static rtx
4473 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4474                  enum machine_mode orig_mode, tree type,
4475                  HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4476 {
4477   static bool warnedsse, warnedmmx;
4478
4479   /* Avoid the AL settings for the Unix64 ABI.  */
4480   if (mode == VOIDmode)
4481     return constm1_rtx;
4482
4483   switch (mode)
4484     {
4485     default:
4486       break;
4487
4488     case BLKmode:
4489       if (bytes < 0)
4490         break;
4491       /* FALLTHRU */
4492     case DImode:
4493     case SImode:
4494     case HImode:
4495     case QImode:
4496       if (words <= cum->nregs)
4497         {
4498           int regno = cum->regno;
4499
4500           /* Fastcall allocates the first two DWORD (SImode) or
4501             smaller arguments to ECX and EDX if it isn't an
4502             aggregate type .  */
4503           if (cum->fastcall)
4504             {
4505               if (mode == BLKmode
4506                   || mode == DImode
4507                   || (type && AGGREGATE_TYPE_P (type)))
4508                 break;
4509
4510               /* ECX not EAX is the first allocated register.  */
4511               if (regno == AX_REG)
4512                 regno = CX_REG;
4513             }
4514           return gen_rtx_REG (mode, regno);
4515         }
4516       break;
4517
4518     case DFmode:
4519       if (cum->float_in_sse < 2)
4520         break;
4521     case SFmode:
4522       if (cum->float_in_sse < 1)
4523         break;
4524       /* FALLTHRU */
4525     case TImode:
4526     case V16QImode:
4527     case V8HImode:
4528     case V4SImode:
4529     case V2DImode:
4530     case V4SFmode:
4531     case V2DFmode:
4532       if (!type || !AGGREGATE_TYPE_P (type))
4533         {
4534           if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4535             {
4536               warnedsse = true;
4537               warning (0, "SSE vector argument without SSE enabled "
4538                        "changes the ABI");
4539             }
4540           if (cum->sse_nregs)
4541             return gen_reg_or_parallel (mode, orig_mode,
4542                                         cum->sse_regno + FIRST_SSE_REG);
4543         }
4544       break;
4545
4546     case V8QImode:
4547     case V4HImode:
4548     case V2SImode:
4549     case V2SFmode:
4550     case V1DImode:
4551       if (!type || !AGGREGATE_TYPE_P (type))
4552         {
4553           if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4554             {
4555               warnedmmx = true;
4556               warning (0, "MMX vector argument without MMX enabled "
4557                        "changes the ABI");
4558             }
4559           if (cum->mmx_nregs)
4560             return gen_reg_or_parallel (mode, orig_mode,
4561                                         cum->mmx_regno + FIRST_MMX_REG);
4562         }
4563       break;
4564     }
4565
4566   return NULL_RTX;
4567 }
4568
4569 static rtx
4570 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4571                  enum machine_mode orig_mode, tree type)
4572 {
4573   /* Handle a hidden AL argument containing number of registers
4574      for varargs x86-64 functions.  */
4575   if (mode == VOIDmode)
4576     return GEN_INT (cum->maybe_vaarg
4577                     ? (cum->sse_nregs < 0
4578                        ? (cum->call_abi == DEFAULT_ABI
4579                           ? SSE_REGPARM_MAX
4580                           : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4581                                                      : X64_SSE_REGPARM_MAX))
4582                : cum->sse_regno)
4583                     : -1);
4584
4585   return construct_container (mode, orig_mode, type, 0, cum->nregs,
4586                               cum->sse_nregs,
4587                               &x86_64_int_parameter_registers [cum->regno],
4588                               cum->sse_regno);
4589 }
4590
4591 static rtx
4592 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4593                     enum machine_mode orig_mode, int named,
4594                     HOST_WIDE_INT bytes)
4595 {
4596   unsigned int regno;
4597
4598   /* Avoid the AL settings for the Unix64 ABI.  */
4599   if (mode == VOIDmode)
4600     return constm1_rtx;
4601
4602   /* If we've run out of registers, it goes on the stack.  */
4603   if (cum->nregs == 0)
4604     return NULL_RTX;
4605
4606   regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4607
4608   /* Only floating point modes are passed in anything but integer regs.  */
4609   if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4610     {
4611       if (named)
4612         regno = cum->regno + FIRST_SSE_REG;
4613       else
4614         {
4615           rtx t1, t2;
4616
4617           /* Unnamed floating parameters are passed in both the
4618              SSE and integer registers.  */
4619           t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4620           t2 = gen_rtx_REG (mode, regno);
4621           t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4622           t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4623           return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4624         }
4625     }
4626   /* Handle aggregated types passed in register.  */
4627   if (orig_mode == BLKmode)
4628     {
4629       if (bytes > 0 && bytes <= 8)
4630         mode = (bytes > 4 ? DImode : SImode);
4631       if (mode == BLKmode)
4632         mode = DImode;
4633     }
4634
4635   return gen_reg_or_parallel (mode, orig_mode, regno);
4636 }
4637
4638 rtx
4639 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4640               tree type, int named)
4641 {
4642   enum machine_mode mode = omode;
4643   HOST_WIDE_INT bytes, words;
4644
4645   if (mode == BLKmode)
4646     bytes = int_size_in_bytes (type);
4647   else
4648     bytes = GET_MODE_SIZE (mode);
4649   words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4650
4651   /* To simplify the code below, represent vector types with a vector mode
4652      even if MMX/SSE are not active.  */
4653   if (type && TREE_CODE (type) == VECTOR_TYPE)
4654     mode = type_natural_mode (type);
4655
4656   if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
4657     return function_arg_ms_64 (cum, mode, omode, named, bytes);
4658   else if (TARGET_64BIT)
4659     return function_arg_64 (cum, mode, omode, type);
4660   else
4661     return function_arg_32 (cum, mode, omode, type, bytes, words);
4662 }
4663
4664 /* A C expression that indicates when an argument must be passed by
4665    reference.  If nonzero for an argument, a copy of that argument is
4666    made in memory and a pointer to the argument is passed instead of
4667    the argument itself.  The pointer is passed in whatever way is
4668    appropriate for passing a pointer to that type.  */
4669
4670 static bool
4671 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4672                         enum machine_mode mode ATTRIBUTE_UNUSED,
4673                         const_tree type, bool named ATTRIBUTE_UNUSED)
4674 {
4675   /* See Windows x64 Software Convention.  */
4676   if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
4677     {
4678       int msize = (int) GET_MODE_SIZE (mode);
4679       if (type)
4680         {
4681           /* Arrays are passed by reference.  */
4682           if (TREE_CODE (type) == ARRAY_TYPE)
4683             return true;
4684
4685           if (AGGREGATE_TYPE_P (type))
4686             {
4687               /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4688                  are passed by reference.  */
4689               msize = int_size_in_bytes (type);
4690             }
4691         }
4692
4693       /* __m128 is passed by reference.  */
4694       switch (msize) {
4695       case 1: case 2: case 4: case 8:
4696         break;
4697       default:
4698         return true;
4699       }
4700     }
4701   else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4702     return 1;
4703
4704   return 0;
4705 }
4706
4707 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4708    ABI.  */
4709 static bool
4710 contains_aligned_value_p (tree type)
4711 {
4712   enum machine_mode mode = TYPE_MODE (type);
4713   if (((TARGET_SSE && SSE_REG_MODE_P (mode)) || mode == TDmode)
4714       && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4715     return true;
4716   if (TYPE_ALIGN (type) < 128)
4717     return false;
4718
4719   if (AGGREGATE_TYPE_P (type))
4720     {
4721       /* Walk the aggregates recursively.  */
4722       switch (TREE_CODE (type))
4723         {
4724         case RECORD_TYPE:
4725         case UNION_TYPE:
4726         case QUAL_UNION_TYPE:
4727           {
4728             tree field;
4729
4730             /* Walk all the structure fields.  */
4731             for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4732               {
4733                 if (TREE_CODE (field) == FIELD_DECL
4734                     && contains_aligned_value_p (TREE_TYPE (field)))
4735                   return true;
4736               }
4737             break;
4738           }
4739
4740         case ARRAY_TYPE:
4741           /* Just for use if some languages passes arrays by value.  */
4742           if (contains_aligned_value_p (TREE_TYPE (type)))
4743             return true;
4744           break;
4745
4746         default:
4747           gcc_unreachable ();
4748         }
4749     }
4750   return false;
4751 }
4752
4753 /* Gives the alignment boundary, in bits, of an argument with the
4754    specified mode and type.  */
4755
4756 int
4757 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4758 {
4759   int align;
4760   if (type)
4761     {
4762       /* Since canonical type is used for call, we convert it to
4763          canonical type if needed.  */
4764       if (!TYPE_STRUCTURAL_EQUALITY_P (type))
4765         type = TYPE_CANONICAL (type);
4766       align = TYPE_ALIGN (type);
4767     }
4768   else
4769     align = GET_MODE_ALIGNMENT (mode);
4770   if (align < PARM_BOUNDARY)
4771     align = PARM_BOUNDARY;
4772   /* In 32bit, only _Decimal128 is aligned to its natural boundary.  */
4773   if (!TARGET_64BIT && mode != TDmode)
4774     {
4775       /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
4776          make an exception for SSE modes since these require 128bit
4777          alignment.
4778
4779          The handling here differs from field_alignment.  ICC aligns MMX
4780          arguments to 4 byte boundaries, while structure fields are aligned
4781          to 8 byte boundaries.  */
4782       if (!type)
4783         {
4784           if (!(TARGET_SSE && SSE_REG_MODE_P (mode)) && mode != TDmode)
4785             align = PARM_BOUNDARY;
4786         }
4787       else
4788         {
4789           if (!contains_aligned_value_p (type))
4790             align = PARM_BOUNDARY;
4791         }
4792     }
4793   if (align > BIGGEST_ALIGNMENT)
4794     align = BIGGEST_ALIGNMENT;
4795   return align;
4796 }
4797
4798 /* Return true if N is a possible register number of function value.  */
4799
4800 bool
4801 ix86_function_value_regno_p (int regno)
4802 {
4803   switch (regno)
4804     {
4805     case 0:
4806       return true;
4807
4808     case FIRST_FLOAT_REG:
4809       /* TODO: The function should depend on current function ABI but
4810        builtins.c would need updating then. Therefore we use the
4811        default ABI.  */
4812       if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
4813         return false;
4814       return TARGET_FLOAT_RETURNS_IN_80387;
4815
4816     case FIRST_SSE_REG:
4817       return TARGET_SSE;
4818
4819     case FIRST_MMX_REG:
4820       if (TARGET_MACHO || TARGET_64BIT)
4821         return false;
4822       return TARGET_MMX;
4823     }
4824
4825   return false;
4826 }
4827
4828 /* Define how to find the value returned by a function.
4829    VALTYPE is the data type of the value (as a tree).
4830    If the precise function being called is known, FUNC is its FUNCTION_DECL;
4831    otherwise, FUNC is 0.  */
4832
4833 static rtx
4834 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4835                    const_tree fntype, const_tree fn)
4836 {
4837   unsigned int regno;
4838
4839   /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4840      we normally prevent this case when mmx is not available.  However
4841      some ABIs may require the result to be returned like DImode.  */
4842   if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4843     regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4844
4845   /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
4846      we prevent this case when sse is not available.  However some ABIs
4847      may require the result to be returned like integer TImode.  */
4848   else if (mode == TImode
4849            || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4850     regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4851
4852   /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387).  */
4853   else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4854     regno = FIRST_FLOAT_REG;
4855   else
4856     /* Most things go in %eax.  */
4857     regno = AX_REG;
4858
4859   /* Override FP return register with %xmm0 for local functions when
4860      SSE math is enabled or for functions with sseregparm attribute.  */
4861   if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4862     {
4863       int sse_level = ix86_function_sseregparm (fntype, fn, false);
4864       if ((sse_level >= 1 && mode == SFmode)
4865           || (sse_level == 2 && mode == DFmode))
4866         regno = FIRST_SSE_REG;
4867     }
4868
4869   return gen_rtx_REG (orig_mode, regno);
4870 }
4871
4872 static rtx
4873 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4874                    const_tree valtype)
4875 {
4876   rtx ret;
4877
4878   /* Handle libcalls, which don't provide a type node.  */
4879   if (valtype == NULL)
4880     {
4881       switch (mode)
4882         {
4883         case SFmode:
4884         case SCmode:
4885         case DFmode:
4886         case DCmode:
4887         case TFmode:
4888         case SDmode:
4889         case DDmode:
4890         case TDmode:
4891           return gen_rtx_REG (mode, FIRST_SSE_REG);
4892         case XFmode:
4893         case XCmode:
4894           return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4895         case TCmode:
4896           return NULL;
4897         default:
4898           return gen_rtx_REG (mode, AX_REG);
4899         }
4900     }
4901
4902   ret = construct_container (mode, orig_mode, valtype, 1,
4903                              X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
4904                              x86_64_int_return_registers, 0);
4905
4906   /* For zero sized structures, construct_container returns NULL, but we
4907      need to keep rest of compiler happy by returning meaningful value.  */
4908   if (!ret)
4909     ret = gen_rtx_REG (orig_mode, AX_REG);
4910
4911   return ret;
4912 }
4913
4914 static rtx
4915 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4916 {
4917   unsigned int regno = AX_REG;
4918
4919   if (TARGET_SSE)
4920     {
4921       switch (GET_MODE_SIZE (mode))
4922         {
4923         case 16:
4924           if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4925              && !COMPLEX_MODE_P (mode))
4926             regno = FIRST_SSE_REG;
4927           break;
4928         case 8:
4929         case 4:
4930           if (mode == SFmode || mode == DFmode)
4931             regno = FIRST_SSE_REG;
4932           break;
4933         default:
4934           break;
4935         }
4936     }
4937   return gen_rtx_REG (orig_mode, regno);
4938 }
4939
4940 static rtx
4941 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4942                        enum machine_mode orig_mode, enum machine_mode mode)
4943 {
4944   const_tree fn, fntype;
4945
4946   fn = NULL_TREE;
4947   if (fntype_or_decl && DECL_P (fntype_or_decl))
4948     fn = fntype_or_decl;
4949   fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4950
4951   if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
4952     return function_value_ms_64 (orig_mode, mode);
4953   else if (TARGET_64BIT)
4954     return function_value_64 (orig_mode, mode, valtype);
4955   else
4956     return function_value_32 (orig_mode, mode, fntype, fn);
4957 }
4958
4959 static rtx
4960 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4961                      bool outgoing ATTRIBUTE_UNUSED)
4962 {
4963   enum machine_mode mode, orig_mode;
4964
4965   orig_mode = TYPE_MODE (valtype);
4966   mode = type_natural_mode (valtype);
4967   return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4968 }
4969
4970 rtx
4971 ix86_libcall_value (enum machine_mode mode)
4972 {
4973   return ix86_function_value_1 (NULL, NULL, mode, mode);
4974 }
4975
4976 /* Return true iff type is returned in memory.  */
4977
4978 static int ATTRIBUTE_UNUSED
4979 return_in_memory_32 (const_tree type, enum machine_mode mode)
4980 {
4981   HOST_WIDE_INT size;
4982
4983   if (mode == BLKmode)
4984     return 1;
4985
4986   size = int_size_in_bytes (type);
4987
4988   if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4989     return 0;
4990
4991   if (VECTOR_MODE_P (mode) || mode == TImode)
4992     {
4993       /* User-created vectors small enough to fit in EAX.  */
4994       if (size < 8)
4995         return 0;
4996
4997       /* MMX/3dNow values are returned in MM0,
4998          except when it doesn't exits.  */
4999       if (size == 8)
5000         return (TARGET_MMX ? 0 : 1);
5001
5002       /* SSE values are returned in XMM0, except when it doesn't exist.  */
5003       if (size == 16)
5004         return (TARGET_SSE ? 0 : 1);
5005     }
5006
5007   if (mode == XFmode)
5008     return 0;
5009
5010   if (mode == TDmode)
5011     return 1;
5012
5013   if (size > 12)
5014     return 1;
5015   return 0;
5016 }
5017
5018 static int ATTRIBUTE_UNUSED
5019 return_in_memory_64 (const_tree type, enum machine_mode mode)
5020 {
5021   int needed_intregs, needed_sseregs;
5022   return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
5023 }
5024
5025 static int ATTRIBUTE_UNUSED
5026 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
5027 {
5028   HOST_WIDE_INT size = int_size_in_bytes (type);
5029
5030   /* __m128 is returned in xmm0.  */
5031   if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
5032       && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
5033     return 0;
5034
5035   /* Otherwise, the size must be exactly in [1248]. */
5036   return (size != 1 && size != 2 && size != 4 && size != 8);
5037 }
5038
5039 static bool
5040 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5041 {
5042 #ifdef SUBTARGET_RETURN_IN_MEMORY
5043   return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
5044 #else
5045    const enum machine_mode mode = type_natural_mode (type);
5046
5047   if (TARGET_64BIT_MS_ABI)
5048      return return_in_memory_ms_64 (type, mode);
5049    else if (TARGET_64BIT)
5050      return return_in_memory_64 (type, mode);
5051    else
5052      return return_in_memory_32 (type, mode);
5053 #endif
5054 }
5055
5056 /* Return false iff TYPE is returned in memory.  This version is used
5057    on Solaris 10.  It is similar to the generic ix86_return_in_memory,
5058    but differs notably in that when MMX is available, 8-byte vectors
5059    are returned in memory, rather than in MMX registers.  */
5060
5061 bool
5062 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5063 {
5064   int size;
5065   enum machine_mode mode = type_natural_mode (type);
5066
5067   if (TARGET_64BIT)
5068     return return_in_memory_64 (type, mode);
5069
5070   if (mode == BLKmode)
5071     return 1;
5072
5073   size = int_size_in_bytes (type);
5074
5075   if (VECTOR_MODE_P (mode))
5076     {
5077       /* Return in memory only if MMX registers *are* available.  This
5078          seems backwards, but it is consistent with the existing
5079          Solaris x86 ABI.  */
5080       if (size == 8)
5081         return TARGET_MMX;
5082       if (size == 16)
5083         return !TARGET_SSE;
5084     }
5085   else if (mode == TImode)
5086     return !TARGET_SSE;
5087   else if (mode == XFmode)
5088     return 0;
5089
5090   return size > 12;
5091 }
5092
5093 /* When returning SSE vector types, we have a choice of either
5094      (1) being abi incompatible with a -march switch, or
5095      (2) generating an error.
5096    Given no good solution, I think the safest thing is one warning.
5097    The user won't be able to use -Werror, but....
5098
5099    Choose the STRUCT_VALUE_RTX hook because that's (at present) only
5100    called in response to actually generating a caller or callee that
5101    uses such a type.  As opposed to TARGET_RETURN_IN_MEMORY, which is called
5102    via aggregate_value_p for general type probing from tree-ssa.  */
5103
5104 static rtx
5105 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
5106 {
5107   static bool warnedsse, warnedmmx;
5108
5109   if (!TARGET_64BIT && type)
5110     {
5111       /* Look at the return type of the function, not the function type.  */
5112       enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
5113
5114       if (!TARGET_SSE && !warnedsse)
5115         {
5116           if (mode == TImode
5117               || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
5118             {
5119               warnedsse = true;
5120               warning (0, "SSE vector return without SSE enabled "
5121                        "changes the ABI");
5122             }
5123         }
5124
5125       if (!TARGET_MMX && !warnedmmx)
5126         {
5127           if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
5128             {
5129               warnedmmx = true;
5130               warning (0, "MMX vector return without MMX enabled "
5131                        "changes the ABI");
5132             }
5133         }
5134     }
5135
5136   return NULL;
5137 }
5138
5139 \f
5140 /* Create the va_list data type.  */
5141
5142 static tree
5143 ix86_build_builtin_va_list (void)
5144 {
5145   tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
5146
5147   /* For i386 we use plain pointer to argument area.  */
5148   if (!TARGET_64BIT || ix86_cfun_abi () == MS_ABI)
5149     return build_pointer_type (char_type_node);
5150
5151   record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5152   type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
5153
5154   f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
5155                       unsigned_type_node);
5156   f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
5157                       unsigned_type_node);
5158   f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
5159                       ptr_type_node);
5160   f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
5161                       ptr_type_node);
5162
5163   va_list_gpr_counter_field = f_gpr;
5164   va_list_fpr_counter_field = f_fpr;
5165
5166   DECL_FIELD_CONTEXT (f_gpr) = record;
5167   DECL_FIELD_CONTEXT (f_fpr) = record;
5168   DECL_FIELD_CONTEXT (f_ovf) = record;
5169   DECL_FIELD_CONTEXT (f_sav) = record;
5170
5171   TREE_CHAIN (record) = type_decl;
5172   TYPE_NAME (record) = type_decl;
5173   TYPE_FIELDS (record) = f_gpr;
5174   TREE_CHAIN (f_gpr) = f_fpr;
5175   TREE_CHAIN (f_fpr) = f_ovf;
5176   TREE_CHAIN (f_ovf) = f_sav;
5177
5178   layout_type (record);
5179
5180   /* The correct type is an array type of one element.  */
5181   return build_array_type (record, build_index_type (size_zero_node));
5182 }
5183
5184 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
5185
5186 static void
5187 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
5188 {
5189   rtx save_area, mem;
5190   rtx label;
5191   rtx label_ref;
5192   rtx tmp_reg;
5193   rtx nsse_reg;
5194   alias_set_type set;
5195   int i;
5196   int regparm = ix86_regparm;
5197
5198   if((cum ? cum->call_abi : ix86_cfun_abi ()) != DEFAULT_ABI)
5199     regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
5200
5201   if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
5202     return;
5203
5204   /* Indicate to allocate space on the stack for varargs save area.  */
5205   ix86_save_varrargs_registers = 1;
5206   /* We need 16-byte stack alignment to save SSE registers.  If user
5207      asked for lower preferred_stack_boundary, lets just hope that he knows
5208      what he is doing and won't varargs SSE values.
5209
5210      We also may end up assuming that only 64bit values are stored in SSE
5211      register let some floating point program work.  */
5212   if (ix86_preferred_stack_boundary >= BIGGEST_ALIGNMENT)
5213     crtl->stack_alignment_needed = BIGGEST_ALIGNMENT;
5214
5215   save_area = frame_pointer_rtx;
5216   set = get_varargs_alias_set ();
5217
5218   for (i = cum->regno;
5219        i < regparm
5220        && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
5221        i++)
5222     {
5223       mem = gen_rtx_MEM (Pmode,
5224                          plus_constant (save_area, i * UNITS_PER_WORD));
5225       MEM_NOTRAP_P (mem) = 1;
5226       set_mem_alias_set (mem, set);
5227       emit_move_insn (mem, gen_rtx_REG (Pmode,
5228                                         x86_64_int_parameter_registers[i]));
5229     }
5230
5231   if (cum->sse_nregs && cfun->va_list_fpr_size)
5232     {
5233       /* Now emit code to save SSE registers.  The AX parameter contains number
5234          of SSE parameter registers used to call this function.  We use
5235          sse_prologue_save insn template that produces computed jump across
5236          SSE saves.  We need some preparation work to get this working.  */
5237
5238       label = gen_label_rtx ();
5239       label_ref = gen_rtx_LABEL_REF (Pmode, label);
5240
5241       /* Compute address to jump to :
5242          label - 5*eax + nnamed_sse_arguments*5  */
5243       tmp_reg = gen_reg_rtx (Pmode);
5244       nsse_reg = gen_reg_rtx (Pmode);
5245       emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
5246       emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5247                               gen_rtx_MULT (Pmode, nsse_reg,
5248                                             GEN_INT (4))));
5249       if (cum->sse_regno)
5250         emit_move_insn
5251           (nsse_reg,
5252            gen_rtx_CONST (DImode,
5253                           gen_rtx_PLUS (DImode,
5254                                         label_ref,
5255                                         GEN_INT (cum->sse_regno * 4))));
5256       else
5257         emit_move_insn (nsse_reg, label_ref);
5258       emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
5259
5260       /* Compute address of memory block we save into.  We always use pointer
5261          pointing 127 bytes after first byte to store - this is needed to keep
5262          instruction size limited by 4 bytes.  */
5263       tmp_reg = gen_reg_rtx (Pmode);
5264       emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5265                               plus_constant (save_area,
5266                                              8 * X86_64_REGPARM_MAX + 127)));
5267       mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
5268       MEM_NOTRAP_P (mem) = 1;
5269       set_mem_alias_set (mem, set);
5270       set_mem_align (mem, BITS_PER_WORD);
5271
5272       /* And finally do the dirty job!  */
5273       emit_insn (gen_sse_prologue_save (mem, nsse_reg,
5274                                         GEN_INT (cum->sse_regno), label));
5275     }
5276 }
5277
5278 static void
5279 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
5280 {
5281   alias_set_type set = get_varargs_alias_set ();
5282   int i;
5283
5284   for (i = cum->regno; i < X64_REGPARM_MAX; i++)
5285     {
5286       rtx reg, mem;
5287
5288       mem = gen_rtx_MEM (Pmode,
5289                          plus_constant (virtual_incoming_args_rtx,
5290                                         i * UNITS_PER_WORD));
5291       MEM_NOTRAP_P (mem) = 1;
5292       set_mem_alias_set (mem, set);
5293
5294       reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
5295       emit_move_insn (mem, reg);
5296     }
5297 }
5298
5299 static void
5300 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5301                              tree type, int *pretend_size ATTRIBUTE_UNUSED,
5302                              int no_rtl)
5303 {
5304   CUMULATIVE_ARGS next_cum;
5305   tree fntype;
5306
5307   /* This argument doesn't appear to be used anymore.  Which is good,
5308      because the old code here didn't suppress rtl generation.  */
5309   gcc_assert (!no_rtl);
5310
5311   if (!TARGET_64BIT)
5312     return;
5313
5314   fntype = TREE_TYPE (current_function_decl);
5315
5316   /* For varargs, we do not want to skip the dummy va_dcl argument.
5317      For stdargs, we do want to skip the last named argument.  */
5318   next_cum = *cum;
5319   if (stdarg_p (fntype))
5320     function_arg_advance (&next_cum, mode, type, 1);
5321
5322   if ((cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5323     setup_incoming_varargs_ms_64 (&next_cum);
5324   else
5325     setup_incoming_varargs_64 (&next_cum);
5326 }
5327
5328 /* Implement va_start.  */
5329
5330 static void
5331 ix86_va_start (tree valist, rtx nextarg)
5332 {
5333   HOST_WIDE_INT words, n_gpr, n_fpr;
5334   tree f_gpr, f_fpr, f_ovf, f_sav;
5335   tree gpr, fpr, ovf, sav, t;
5336   tree type;
5337
5338   /* Only 64bit target needs something special.  */
5339   if (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI)
5340     {
5341       std_expand_builtin_va_start (valist, nextarg);
5342       return;
5343     }
5344
5345   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5346   f_fpr = TREE_CHAIN (f_gpr);
5347   f_ovf = TREE_CHAIN (f_fpr);
5348   f_sav = TREE_CHAIN (f_ovf);
5349
5350   valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
5351   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5352   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5353   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5354   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5355
5356   /* Count number of gp and fp argument registers used.  */
5357   words = crtl->args.info.words;
5358   n_gpr = crtl->args.info.regno;
5359   n_fpr = crtl->args.info.sse_regno;
5360
5361   if (cfun->va_list_gpr_size)
5362     {
5363       type = TREE_TYPE (gpr);
5364       t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
5365                   build_int_cst (type, n_gpr * 8));
5366       TREE_SIDE_EFFECTS (t) = 1;
5367       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5368     }
5369
5370   if (cfun->va_list_fpr_size)
5371     {
5372       type = TREE_TYPE (fpr);
5373       t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
5374                   build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
5375       TREE_SIDE_EFFECTS (t) = 1;
5376       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5377     }
5378
5379   /* Find the overflow area.  */
5380   type = TREE_TYPE (ovf);
5381   t = make_tree (type, virtual_incoming_args_rtx);
5382   if (words != 0)
5383     t = build2 (POINTER_PLUS_EXPR, type, t,
5384                 size_int (words * UNITS_PER_WORD));
5385   t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
5386   TREE_SIDE_EFFECTS (t) = 1;
5387   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5388
5389   if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5390     {
5391       /* Find the register save area.
5392          Prologue of the function save it right above stack frame.  */
5393       type = TREE_TYPE (sav);
5394       t = make_tree (type, frame_pointer_rtx);
5395       t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
5396       TREE_SIDE_EFFECTS (t) = 1;
5397       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5398     }
5399 }
5400
5401 /* Implement va_arg.  */
5402
5403 static tree
5404 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
5405 {
5406   static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5407   tree f_gpr, f_fpr, f_ovf, f_sav;
5408   tree gpr, fpr, ovf, sav, t;
5409   int size, rsize;
5410   tree lab_false, lab_over = NULL_TREE;
5411   tree addr, t2;
5412   rtx container;
5413   int indirect_p = 0;
5414   tree ptrtype;
5415   enum machine_mode nat_mode;
5416
5417   /* Only 64bit target needs something special.  */
5418   if (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI)
5419     return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5420
5421   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5422   f_fpr = TREE_CHAIN (f_gpr);
5423   f_ovf = TREE_CHAIN (f_fpr);
5424   f_sav = TREE_CHAIN (f_ovf);
5425
5426   valist = build_va_arg_indirect_ref (valist);
5427   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5428   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5429   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5430   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5431
5432   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5433   if (indirect_p)
5434     type = build_pointer_type (type);
5435   size = int_size_in_bytes (type);
5436   rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5437
5438   nat_mode = type_natural_mode (type);
5439   container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
5440                                    X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
5441                                    intreg, 0);
5442
5443   /* Pull the value out of the saved registers.  */
5444
5445   addr = create_tmp_var (ptr_type_node, "addr");
5446   DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5447
5448   if (container)
5449     {
5450       int needed_intregs, needed_sseregs;
5451       bool need_temp;
5452       tree int_addr, sse_addr;
5453
5454       lab_false = create_artificial_label ();
5455       lab_over = create_artificial_label ();
5456
5457       examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5458
5459       need_temp = (!REG_P (container)
5460                    && ((needed_intregs && TYPE_ALIGN (type) > 64)
5461                        || TYPE_ALIGN (type) > 128));
5462
5463       /* In case we are passing structure, verify that it is consecutive block
5464          on the register save area.  If not we need to do moves.  */
5465       if (!need_temp && !REG_P (container))
5466         {
5467           /* Verify that all registers are strictly consecutive  */
5468           if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5469             {
5470               int i;
5471
5472               for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5473                 {
5474                   rtx slot = XVECEXP (container, 0, i);
5475                   if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5476                       || INTVAL (XEXP (slot, 1)) != i * 16)
5477                     need_temp = 1;
5478                 }
5479             }
5480           else
5481             {
5482               int i;
5483
5484               for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5485                 {
5486                   rtx slot = XVECEXP (container, 0, i);
5487                   if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5488                       || INTVAL (XEXP (slot, 1)) != i * 8)
5489                     need_temp = 1;
5490                 }
5491             }
5492         }
5493       if (!need_temp)
5494         {
5495           int_addr = addr;
5496           sse_addr = addr;
5497         }
5498       else
5499         {
5500           int_addr = create_tmp_var (ptr_type_node, "int_addr");
5501           DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5502           sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5503           DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5504         }
5505
5506       /* First ensure that we fit completely in registers.  */
5507       if (needed_intregs)
5508         {
5509           t = build_int_cst (TREE_TYPE (gpr),
5510                              (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
5511           t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5512           t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5513           t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5514           gimplify_and_add (t, pre_p);
5515         }
5516       if (needed_sseregs)
5517         {
5518           t = build_int_cst (TREE_TYPE (fpr),
5519                              (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5520                              + X86_64_REGPARM_MAX * 8);
5521           t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5522           t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5523           t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5524           gimplify_and_add (t, pre_p);
5525         }
5526
5527       /* Compute index to start of area used for integer regs.  */
5528       if (needed_intregs)
5529         {
5530           /* int_addr = gpr + sav; */
5531           t = fold_convert (sizetype, gpr);
5532           t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5533           t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5534           gimplify_and_add (t, pre_p);
5535         }
5536       if (needed_sseregs)
5537         {
5538           /* sse_addr = fpr + sav; */
5539           t = fold_convert (sizetype, fpr);
5540           t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5541           t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5542           gimplify_and_add (t, pre_p);
5543         }
5544       if (need_temp)
5545         {
5546           int i;
5547           tree temp = create_tmp_var (type, "va_arg_tmp");
5548
5549           /* addr = &temp; */
5550           t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5551           t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5552           gimplify_and_add (t, pre_p);
5553
5554           for (i = 0; i < XVECLEN (container, 0); i++)
5555             {
5556               rtx slot = XVECEXP (container, 0, i);
5557               rtx reg = XEXP (slot, 0);
5558               enum machine_mode mode = GET_MODE (reg);
5559               tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5560               tree addr_type = build_pointer_type (piece_type);
5561               tree src_addr, src;
5562               int src_offset;
5563               tree dest_addr, dest;
5564
5565               if (SSE_REGNO_P (REGNO (reg)))
5566                 {
5567                   src_addr = sse_addr;
5568                   src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5569                 }
5570               else
5571                 {
5572                   src_addr = int_addr;
5573                   src_offset = REGNO (reg) * 8;
5574                 }
5575               src_addr = fold_convert (addr_type, src_addr);
5576               src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5577                                       size_int (src_offset));
5578               src = build_va_arg_indirect_ref (src_addr);
5579
5580               dest_addr = fold_convert (addr_type, addr);
5581               dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5582                                        size_int (INTVAL (XEXP (slot, 1))));
5583               dest = build_va_arg_indirect_ref (dest_addr);
5584
5585               t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5586               gimplify_and_add (t, pre_p);
5587             }
5588         }
5589
5590       if (needed_intregs)
5591         {
5592           t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5593                       build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5594           t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5595           gimplify_and_add (t, pre_p);
5596         }
5597       if (needed_sseregs)
5598         {
5599           t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5600                       build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5601           t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5602           gimplify_and_add (t, pre_p);
5603         }
5604
5605       t = build1 (GOTO_EXPR, void_type_node, lab_over);
5606       gimplify_and_add (t, pre_p);
5607
5608       t = build1 (LABEL_EXPR, void_type_node, lab_false);
5609       append_to_statement_list (t, pre_p);
5610     }
5611
5612   /* ... otherwise out of the overflow area.  */
5613
5614   /* Care for on-stack alignment if needed.  */
5615   if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5616       || integer_zerop (TYPE_SIZE (type)))
5617     t = ovf;
5618  else
5619     {
5620       HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5621       t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5622                   size_int (align - 1));
5623       t = fold_convert (sizetype, t);
5624       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5625                   size_int (-align));
5626       t = fold_convert (TREE_TYPE (ovf), t);
5627     }
5628   gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5629
5630   t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5631   gimplify_and_add (t2, pre_p);
5632
5633   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5634               size_int (rsize * UNITS_PER_WORD));
5635   t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5636   gimplify_and_add (t, pre_p);
5637
5638   if (container)
5639     {
5640       t = build1 (LABEL_EXPR, void_type_node, lab_over);
5641       append_to_statement_list (t, pre_p);
5642     }
5643
5644   ptrtype = build_pointer_type (type);
5645   addr = fold_convert (ptrtype, addr);
5646
5647   if (indirect_p)
5648     addr = build_va_arg_indirect_ref (addr);
5649   return build_va_arg_indirect_ref (addr);
5650 }
5651 \f
5652 /* Return nonzero if OPNUM's MEM should be matched
5653    in movabs* patterns.  */
5654
5655 int
5656 ix86_check_movabs (rtx insn, int opnum)
5657 {
5658   rtx set, mem;
5659
5660   set = PATTERN (insn);
5661   if (GET_CODE (set) == PARALLEL)
5662     set = XVECEXP (set, 0, 0);
5663   gcc_assert (GET_CODE (set) == SET);
5664   mem = XEXP (set, opnum);
5665   while (GET_CODE (mem) == SUBREG)
5666     mem = SUBREG_REG (mem);
5667   gcc_assert (MEM_P (mem));
5668   return (volatile_ok || !MEM_VOLATILE_P (mem));
5669 }
5670 \f
5671 /* Initialize the table of extra 80387 mathematical constants.  */
5672
5673 static void
5674 init_ext_80387_constants (void)
5675 {
5676   static const char * cst[5] =
5677   {
5678     "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
5679     "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
5680     "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
5681     "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
5682     "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
5683   };
5684   int i;
5685
5686   for (i = 0; i < 5; i++)
5687     {
5688       real_from_string (&ext_80387_constants_table[i], cst[i]);
5689       /* Ensure each constant is rounded to XFmode precision.  */
5690       real_convert (&ext_80387_constants_table[i],
5691                     XFmode, &ext_80387_constants_table[i]);
5692     }
5693
5694   ext_80387_constants_init = 1;
5695 }
5696
5697 /* Return true if the constant is something that can be loaded with
5698    a special instruction.  */
5699
5700 int
5701 standard_80387_constant_p (rtx x)
5702 {
5703   enum machine_mode mode = GET_MODE (x);
5704
5705   REAL_VALUE_TYPE r;
5706
5707   if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5708     return -1;
5709
5710   if (x == CONST0_RTX (mode))
5711     return 1;
5712   if (x == CONST1_RTX (mode))
5713     return 2;
5714
5715   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5716
5717   /* For XFmode constants, try to find a special 80387 instruction when
5718      optimizing for size or on those CPUs that benefit from them.  */
5719   if (mode == XFmode
5720       && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5721     {
5722       int i;
5723
5724       if (! ext_80387_constants_init)
5725         init_ext_80387_constants ();
5726
5727       for (i = 0; i < 5; i++)
5728         if (real_identical (&r, &ext_80387_constants_table[i]))
5729           return i + 3;
5730     }
5731
5732   /* Load of the constant -0.0 or -1.0 will be split as
5733      fldz;fchs or fld1;fchs sequence.  */
5734   if (real_isnegzero (&r))
5735     return 8;
5736   if (real_identical (&r, &dconstm1))
5737     return 9;
5738
5739   return 0;
5740 }
5741
5742 /* Return the opcode of the special instruction to be used to load
5743    the constant X.  */
5744
5745 const char *
5746 standard_80387_constant_opcode (rtx x)
5747 {
5748   switch (standard_80387_constant_p (x))
5749     {
5750     case 1:
5751       return "fldz";
5752     case 2:
5753       return "fld1";
5754     case 3:
5755       return "fldlg2";
5756     case 4:
5757       return "fldln2";
5758     case 5:
5759       return "fldl2e";
5760     case 6:
5761       return "fldl2t";
5762     case 7:
5763       return "fldpi";
5764     case 8:
5765     case 9:
5766       return "#";
5767     default:
5768       gcc_unreachable ();
5769     }
5770 }
5771
5772 /* Return the CONST_DOUBLE representing the 80387 constant that is
5773    loaded by the specified special instruction.  The argument IDX
5774    matches the return value from standard_80387_constant_p.  */
5775
5776 rtx
5777 standard_80387_constant_rtx (int idx)
5778 {
5779   int i;
5780
5781   if (! ext_80387_constants_init)
5782     init_ext_80387_constants ();
5783
5784   switch (idx)
5785     {
5786     case 3:
5787     case 4:
5788     case 5:
5789     case 6:
5790     case 7:
5791       i = idx - 3;
5792       break;
5793
5794     default:
5795       gcc_unreachable ();
5796     }
5797
5798   return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5799                                        XFmode);
5800 }
5801
5802 /* Return 1 if mode is a valid mode for sse.  */
5803 static int
5804 standard_sse_mode_p (enum machine_mode mode)
5805 {
5806   switch (mode)
5807     {
5808     case V16QImode:
5809     case V8HImode:
5810     case V4SImode:
5811     case V2DImode:
5812     case V4SFmode:
5813     case V2DFmode:
5814       return 1;
5815
5816     default:
5817       return 0;
5818     }
5819 }
5820
5821 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5822  */
5823 int
5824 standard_sse_constant_p (rtx x)
5825 {
5826   enum machine_mode mode = GET_MODE (x);
5827
5828   if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5829     return 1;
5830   if (vector_all_ones_operand (x, mode)
5831       && standard_sse_mode_p (mode))
5832     return TARGET_SSE2 ? 2 : -1;
5833
5834   return 0;
5835 }
5836
5837 /* Return the opcode of the special instruction to be used to load
5838    the constant X.  */
5839
5840 const char *
5841 standard_sse_constant_opcode (rtx insn, rtx x)
5842 {
5843   switch (standard_sse_constant_p (x))
5844     {
5845     case 1:
5846       if (get_attr_mode (insn) == MODE_V4SF)
5847         return "xorps\t%0, %0";
5848       else if (get_attr_mode (insn) == MODE_V2DF)
5849         return "xorpd\t%0, %0";
5850       else
5851         return "pxor\t%0, %0";
5852     case 2:
5853       return "pcmpeqd\t%0, %0";
5854     }
5855   gcc_unreachable ();
5856 }
5857
5858 /* Returns 1 if OP contains a symbol reference */
5859
5860 int
5861 symbolic_reference_mentioned_p (rtx op)
5862 {
5863   const char *fmt;
5864   int i;
5865
5866   if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5867     return 1;
5868
5869   fmt = GET_RTX_FORMAT (GET_CODE (op));
5870   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5871     {
5872       if (fmt[i] == 'E')
5873         {
5874           int j;
5875
5876           for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5877             if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5878               return 1;
5879         }
5880
5881       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5882         return 1;
5883     }
5884
5885   return 0;
5886 }
5887
5888 /* Return 1 if it is appropriate to emit `ret' instructions in the
5889    body of a function.  Do this only if the epilogue is simple, needing a
5890    couple of insns.  Prior to reloading, we can't tell how many registers
5891    must be saved, so return 0 then.  Return 0 if there is no frame
5892    marker to de-allocate.  */
5893
5894 int
5895 ix86_can_use_return_insn_p (void)
5896 {
5897   struct ix86_frame frame;
5898
5899   if (! reload_completed || frame_pointer_needed)
5900     return 0;
5901
5902   /* Don't allow more than 32 pop, since that's all we can do
5903      with one instruction.  */
5904   if (crtl->args.pops_args
5905       && crtl->args.size >= 32768)
5906     return 0;
5907
5908   ix86_compute_frame_layout (&frame);
5909   return frame.to_allocate == 0 && frame.nregs == 0;
5910 }
5911 \f
5912 /* Value should be nonzero if functions must have frame pointers.
5913    Zero means the frame pointer need not be set up (and parms may
5914    be accessed via the stack pointer) in functions that seem suitable.  */
5915
5916 int
5917 ix86_frame_pointer_required (void)
5918 {
5919   /* If we accessed previous frames, then the generated code expects
5920      to be able to access the saved ebp value in our frame.  */
5921   if (cfun->machine->accesses_prev_frame)
5922     return 1;
5923
5924   /* Several x86 os'es need a frame pointer for other reasons,
5925      usually pertaining to setjmp.  */
5926   if (SUBTARGET_FRAME_POINTER_REQUIRED)
5927     return 1;
5928
5929   /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5930      the frame pointer by default.  Turn it back on now if we've not
5931      got a leaf function.  */
5932   if (TARGET_OMIT_LEAF_FRAME_POINTER
5933       && (!current_function_is_leaf
5934           || ix86_current_function_calls_tls_descriptor))
5935     return 1;
5936
5937   if (crtl->profile)
5938     return 1;
5939
5940   return 0;
5941 }
5942
5943 /* Record that the current function accesses previous call frames.  */
5944
5945 void
5946 ix86_setup_frame_addresses (void)
5947 {
5948   cfun->machine->accesses_prev_frame = 1;
5949 }
5950 \f
5951 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5952 # define USE_HIDDEN_LINKONCE 1
5953 #else
5954 # define USE_HIDDEN_LINKONCE 0
5955 #endif
5956
5957 static int pic_labels_used;
5958
5959 /* Fills in the label name that should be used for a pc thunk for
5960    the given register.  */
5961
5962 static void
5963 get_pc_thunk_name (char name[32], unsigned int regno)
5964 {
5965   gcc_assert (!TARGET_64BIT);
5966
5967   if (USE_HIDDEN_LINKONCE)
5968     sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5969   else
5970     ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5971 }
5972
5973
5974 /* This function generates code for -fpic that loads %ebx with
5975    the return address of the caller and then returns.  */
5976
5977 void
5978 ix86_file_end (void)
5979 {
5980   rtx xops[2];
5981   int regno;
5982
5983   for (regno = 0; regno < 8; ++regno)
5984     {
5985       char name[32];
5986
5987       if (! ((pic_labels_used >> regno) & 1))
5988         continue;
5989
5990       get_pc_thunk_name (name, regno);
5991
5992 #if TARGET_MACHO
5993       if (TARGET_MACHO)
5994         {
5995           switch_to_section (darwin_sections[text_coal_section]);
5996           fputs ("\t.weak_definition\t", asm_out_file);
5997           assemble_name (asm_out_file, name);
5998           fputs ("\n\t.private_extern\t", asm_out_file);
5999           assemble_name (asm_out_file, name);
6000           fputs ("\n", asm_out_file);
6001           ASM_OUTPUT_LABEL (asm_out_file, name);
6002         }
6003       else
6004 #endif
6005       if (USE_HIDDEN_LINKONCE)
6006         {
6007           tree decl;
6008
6009           decl = build_decl (FUNCTION_DECL, get_identifier (name),
6010                              error_mark_node);
6011           TREE_PUBLIC (decl) = 1;
6012           TREE_STATIC (decl) = 1;
6013           DECL_ONE_ONLY (decl) = 1;
6014
6015           (*targetm.asm_out.unique_section) (decl, 0);
6016           switch_to_section (get_named_section (decl, NULL, 0));
6017
6018           (*targetm.asm_out.globalize_label) (asm_out_file, name);
6019           fputs ("\t.hidden\t", asm_out_file);
6020           assemble_name (asm_out_file, name);
6021           fputc ('\n', asm_out_file);
6022           ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6023         }
6024       else
6025         {
6026           switch_to_section (text_section);
6027           ASM_OUTPUT_LABEL (asm_out_file, name);
6028         }
6029
6030       xops[0] = gen_rtx_REG (Pmode, regno);
6031       xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
6032       if (TARGET_64BIT)
6033         output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
6034       else
6035         output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
6036       output_asm_insn ("ret", xops);
6037     }
6038
6039   if (NEED_INDICATE_EXEC_STACK)
6040     file_end_indicate_exec_stack ();
6041 }
6042
6043 /* Emit code for the SET_GOT patterns.  */
6044
6045 const char *
6046 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
6047 {
6048   rtx xops[3];
6049
6050   xops[0] = dest;
6051
6052   if (TARGET_VXWORKS_RTP && flag_pic)
6053     {
6054       /* Load (*VXWORKS_GOTT_BASE) into the PIC register.  */
6055       xops[2] = gen_rtx_MEM (Pmode,
6056                              gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
6057       output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6058
6059       /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6060          Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6061          an unadorned address.  */
6062       xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6063       SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
6064       output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
6065       return "";
6066     }
6067
6068   xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
6069
6070   if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
6071     {
6072       xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
6073
6074       if (!flag_pic)
6075         {
6076           if (TARGET_64BIT)
6077             output_asm_insn ("mov{q}\t{%2, %0|%0, %2}", xops);
6078           else
6079             output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6080         }
6081       else
6082         output_asm_insn ("call\t%a2", xops);
6083
6084 #if TARGET_MACHO
6085       /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
6086          is what will be referenced by the Mach-O PIC subsystem.  */
6087       if (!label)
6088         ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
6089 #endif
6090
6091       (*targetm.asm_out.internal_label) (asm_out_file, "L",
6092                                  CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
6093
6094       if (flag_pic)
6095         {
6096           if (TARGET_64BIT)
6097             output_asm_insn ("pop{q}\t%0", xops);
6098           else
6099             output_asm_insn ("pop{l}\t%0", xops);
6100         }
6101     }
6102   else
6103     {
6104       char name[32];
6105       get_pc_thunk_name (name, REGNO (dest));
6106       pic_labels_used |= 1 << REGNO (dest);
6107
6108       xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
6109       xops[2] = gen_rtx_MEM (QImode, xops[2]);
6110       output_asm_insn ("call\t%X2", xops);
6111       /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
6112          is what will be referenced by the Mach-O PIC subsystem.  */
6113 #if TARGET_MACHO
6114       if (!label)
6115         ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
6116       else
6117         targetm.asm_out.internal_label (asm_out_file, "L",
6118                                            CODE_LABEL_NUMBER (label));
6119 #endif
6120     }
6121
6122   if (TARGET_MACHO)
6123     return "";
6124
6125   if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
6126     {
6127       if (TARGET_64BIT)
6128         output_asm_insn ("add{q}\t{%1, %0|%0, %1}", xops);
6129       else
6130         output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
6131     }
6132   else
6133     {
6134       if (TARGET_64BIT)
6135         output_asm_insn ("add{q}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
6136       else
6137         output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
6138     }
6139
6140   return "";
6141 }
6142
6143 /* Generate an "push" pattern for input ARG.  */
6144
6145 static rtx
6146 gen_push (rtx arg)
6147 {
6148   return gen_rtx_SET (VOIDmode,
6149                       gen_rtx_MEM (Pmode,
6150                                    gen_rtx_PRE_DEC (Pmode,
6151                                                     stack_pointer_rtx)),
6152                       arg);
6153 }
6154
6155 /* Return >= 0 if there is an unused call-clobbered register available
6156    for the entire function.  */
6157
6158 static unsigned int
6159 ix86_select_alt_pic_regnum (void)
6160 {
6161   if (current_function_is_leaf && !crtl->profile
6162       && !ix86_current_function_calls_tls_descriptor)
6163     {
6164       int i;
6165       for (i = 2; i >= 0; --i)
6166         if (!df_regs_ever_live_p (i))
6167           return i;
6168     }
6169
6170   return INVALID_REGNUM;
6171 }
6172
6173 /* Return 1 if we need to save REGNO.  */
6174 static int
6175 ix86_save_reg (unsigned int regno, int maybe_eh_return)
6176 {
6177   if (pic_offset_table_rtx
6178       && regno == REAL_PIC_OFFSET_TABLE_REGNUM
6179       && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6180           || crtl->profile
6181           || crtl->calls_eh_return
6182           || crtl->uses_const_pool))
6183     {
6184       if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
6185         return 0;
6186       return 1;
6187     }
6188
6189   if (crtl->calls_eh_return && maybe_eh_return)
6190     {
6191       unsigned i;
6192       for (i = 0; ; i++)
6193         {
6194           unsigned test = EH_RETURN_DATA_REGNO (i);
6195           if (test == INVALID_REGNUM)
6196             break;
6197           if (test == regno)
6198             return 1;
6199         }
6200     }
6201
6202   if (cfun->machine->force_align_arg_pointer
6203       && regno == REGNO (cfun->machine->force_align_arg_pointer))
6204     return 1;
6205
6206   return (df_regs_ever_live_p (regno)
6207           && !call_used_regs[regno]
6208           && !fixed_regs[regno]
6209           && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6210 }
6211
6212 /* Return number of registers to be saved on the stack.  */
6213
6214 static int
6215 ix86_nsaved_regs (void)
6216 {
6217   int nregs = 0;
6218   int regno;
6219
6220   for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
6221     if (ix86_save_reg (regno, true))
6222       nregs++;
6223   return nregs;
6224 }
6225
6226 /* Return the offset between two registers, one to be eliminated, and the other
6227    its replacement, at the start of a routine.  */
6228
6229 HOST_WIDE_INT
6230 ix86_initial_elimination_offset (int from, int to)
6231 {
6232   struct ix86_frame frame;
6233   ix86_compute_frame_layout (&frame);
6234
6235   if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6236     return frame.hard_frame_pointer_offset;
6237   else if (from == FRAME_POINTER_REGNUM
6238            && to == HARD_FRAME_POINTER_REGNUM)
6239     return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6240   else
6241     {
6242       gcc_assert (to == STACK_POINTER_REGNUM);
6243
6244       if (from == ARG_POINTER_REGNUM)
6245         return frame.stack_pointer_offset;
6246
6247       gcc_assert (from == FRAME_POINTER_REGNUM);
6248       return frame.stack_pointer_offset - frame.frame_pointer_offset;
6249     }
6250 }
6251
6252 /* Fill structure ix86_frame about frame of currently computed function.  */
6253
6254 static void
6255 ix86_compute_frame_layout (struct ix86_frame *frame)
6256 {
6257   HOST_WIDE_INT total_size;
6258   unsigned int stack_alignment_needed;
6259   HOST_WIDE_INT offset;
6260   unsigned int preferred_alignment;
6261   HOST_WIDE_INT size = get_frame_size ();
6262
6263   frame->nregs = ix86_nsaved_regs ();
6264   total_size = size;
6265
6266   stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6267   preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6268
6269   /* During reload iteration the amount of registers saved can change.
6270      Recompute the value as needed.  Do not recompute when amount of registers
6271      didn't change as reload does multiple calls to the function and does not
6272      expect the decision to change within single iteration.  */
6273   if (!optimize_size
6274       && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
6275     {
6276       int count = frame->nregs;
6277
6278       cfun->machine->use_fast_prologue_epilogue_nregs = count;
6279       /* The fast prologue uses move instead of push to save registers.  This
6280          is significantly longer, but also executes faster as modern hardware
6281          can execute the moves in parallel, but can't do that for push/pop.
6282
6283          Be careful about choosing what prologue to emit:  When function takes
6284          many instructions to execute we may use slow version as well as in
6285          case function is known to be outside hot spot (this is known with
6286          feedback only).  Weight the size of function by number of registers
6287          to save as it is cheap to use one or two push instructions but very
6288          slow to use many of them.  */
6289       if (count)
6290         count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6291       if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
6292           || (flag_branch_probabilities
6293               && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
6294         cfun->machine->use_fast_prologue_epilogue = false;
6295       else
6296         cfun->machine->use_fast_prologue_epilogue
6297            = !expensive_function_p (count);
6298     }
6299   if (TARGET_PROLOGUE_USING_MOVE
6300       && cfun->machine->use_fast_prologue_epilogue)
6301     frame->save_regs_using_mov = true;
6302   else
6303     frame->save_regs_using_mov = false;
6304
6305
6306   /* Skip return address and saved base pointer.  */
6307   offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
6308
6309   frame->hard_frame_pointer_offset = offset;
6310
6311   /* Do some sanity checking of stack_alignment_needed and
6312      preferred_alignment, since i386 port is the only using those features
6313      that may break easily.  */
6314
6315   gcc_assert (!size || stack_alignment_needed);
6316   gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6317   gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6318   gcc_assert (stack_alignment_needed
6319               <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6320
6321   if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
6322     stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
6323
6324   /* Register save area */
6325   offset += frame->nregs * UNITS_PER_WORD;
6326
6327   /* Va-arg area */
6328   if (ix86_save_varrargs_registers)
6329     {
6330       offset += X86_64_VARARGS_SIZE;
6331       frame->va_arg_size = X86_64_VARARGS_SIZE;
6332     }
6333   else
6334     frame->va_arg_size = 0;
6335
6336   /* Align start of frame for local function.  */
6337   frame->padding1 = ((offset + stack_alignment_needed - 1)
6338                      & -stack_alignment_needed) - offset;
6339
6340   offset += frame->padding1;
6341
6342   /* Frame pointer points here.  */
6343   frame->frame_pointer_offset = offset;
6344
6345   offset += size;
6346
6347   /* Add outgoing arguments area.  Can be skipped if we eliminated
6348      all the function calls as dead code.
6349      Skipping is however impossible when function calls alloca.  Alloca
6350      expander assumes that last crtl->outgoing_args_size
6351      of stack frame are unused.  */
6352   if (ACCUMULATE_OUTGOING_ARGS
6353       && (!current_function_is_leaf || cfun->calls_alloca
6354           || ix86_current_function_calls_tls_descriptor))
6355     {
6356       offset += crtl->outgoing_args_size;
6357       frame->outgoing_arguments_size = crtl->outgoing_args_size;
6358     }
6359   else
6360     frame->outgoing_arguments_size = 0;
6361
6362   /* Align stack boundary.  Only needed if we're calling another function
6363      or using alloca.  */
6364   if (!current_function_is_leaf || cfun->calls_alloca
6365       || ix86_current_function_calls_tls_descriptor)
6366     frame->padding2 = ((offset + preferred_alignment - 1)
6367                        & -preferred_alignment) - offset;
6368   else
6369     frame->padding2 = 0;
6370
6371   offset += frame->padding2;
6372
6373   /* We've reached end of stack frame.  */
6374   frame->stack_pointer_offset = offset;
6375
6376   /* Size prologue needs to allocate.  */
6377   frame->to_allocate =
6378     (size + frame->padding1 + frame->padding2
6379      + frame->outgoing_arguments_size + frame->va_arg_size);
6380
6381   if ((!frame->to_allocate && frame->nregs <= 1)
6382       || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
6383     frame->save_regs_using_mov = false;
6384
6385   if (TARGET_RED_ZONE && current_function_sp_is_unchanging
6386       && current_function_is_leaf
6387       && !ix86_current_function_calls_tls_descriptor)
6388     {
6389       frame->red_zone_size = frame->to_allocate;
6390       if (frame->save_regs_using_mov)
6391         frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6392       if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6393         frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6394     }
6395   else
6396     frame->red_zone_size = 0;
6397   frame->to_allocate -= frame->red_zone_size;
6398   frame->stack_pointer_offset -= frame->red_zone_size;
6399 #if 0
6400   fprintf (stderr, "\n");
6401   fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6402   fprintf (stderr, "size: %ld\n", (long)size);
6403   fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6404   fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6405   fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6406   fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6407   fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6408   fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6409   fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6410   fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6411            (long)frame->hard_frame_pointer_offset);
6412   fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6413   fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
6414   fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
6415   fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
6416 #endif
6417 }
6418
6419 /* Emit code to save registers in the prologue.  */
6420
6421 static void
6422 ix86_emit_save_regs (void)
6423 {
6424   unsigned int regno;
6425   rtx insn;
6426
6427   for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
6428     if (ix86_save_reg (regno, true))
6429       {
6430         insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
6431         RTX_FRAME_RELATED_P (insn) = 1;
6432       }
6433 }
6434
6435 /* Emit code to save registers using MOV insns.  First register
6436    is restored from POINTER + OFFSET.  */
6437 static void
6438 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
6439 {
6440   unsigned int regno;
6441   rtx insn;
6442
6443   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6444     if (ix86_save_reg (regno, true))
6445       {
6446         insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6447                                                Pmode, offset),
6448                                gen_rtx_REG (Pmode, regno));
6449         RTX_FRAME_RELATED_P (insn) = 1;
6450         offset += UNITS_PER_WORD;
6451       }
6452 }
6453
6454 /* Expand prologue or epilogue stack adjustment.
6455    The pattern exist to put a dependency on all ebp-based memory accesses.
6456    STYLE should be negative if instructions should be marked as frame related,
6457    zero if %r11 register is live and cannot be freely used and positive
6458    otherwise.  */
6459
6460 static void
6461 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6462 {
6463   rtx insn;
6464
6465   if (! TARGET_64BIT)
6466     insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6467   else if (x86_64_immediate_operand (offset, DImode))
6468     insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6469   else
6470     {
6471       rtx r11;
6472       /* r11 is used by indirect sibcall return as well, set before the
6473          epilogue and used after the epilogue.  ATM indirect sibcall
6474          shouldn't be used together with huge frame sizes in one
6475          function because of the frame_size check in sibcall.c.  */
6476       gcc_assert (style);
6477       r11 = gen_rtx_REG (DImode, R11_REG);
6478       insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6479       if (style < 0)
6480         RTX_FRAME_RELATED_P (insn) = 1;
6481       insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6482                                                                offset));
6483     }
6484   if (style < 0)
6485     RTX_FRAME_RELATED_P (insn) = 1;
6486 }
6487
6488 /* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
6489
6490 static rtx
6491 ix86_internal_arg_pointer (void)
6492 {
6493   bool has_force_align_arg_pointer =
6494     (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6495                             TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6496   if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6497        && DECL_NAME (current_function_decl)
6498        && MAIN_NAME_P (DECL_NAME (current_function_decl))
6499        && DECL_FILE_SCOPE_P (current_function_decl))
6500       || ix86_force_align_arg_pointer
6501       || has_force_align_arg_pointer)
6502     {
6503       /* Nested functions can't realign the stack due to a register
6504          conflict.  */
6505       if (DECL_CONTEXT (current_function_decl)
6506           && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6507         {
6508           if (ix86_force_align_arg_pointer)
6509             warning (0, "-mstackrealign ignored for nested functions");
6510           if (has_force_align_arg_pointer)
6511             error ("%s not supported for nested functions",
6512                    ix86_force_align_arg_pointer_string);
6513           return virtual_incoming_args_rtx;
6514         }
6515       cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
6516       return copy_to_reg (cfun->machine->force_align_arg_pointer);
6517     }
6518   else
6519     return virtual_incoming_args_rtx;
6520 }
6521
6522 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6523    This is called from dwarf2out.c to emit call frame instructions
6524    for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6525 static void
6526 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6527 {
6528   rtx unspec = SET_SRC (pattern);
6529   gcc_assert (GET_CODE (unspec) == UNSPEC);
6530
6531   switch (index)
6532     {
6533     case UNSPEC_REG_SAVE:
6534       dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6535                               SET_DEST (pattern));
6536       break;
6537     case UNSPEC_DEF_CFA:
6538       dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6539                          INTVAL (XVECEXP (unspec, 0, 0)));
6540       break;
6541     default:
6542       gcc_unreachable ();
6543     }
6544 }
6545
6546 /* Expand the prologue into a bunch of separate insns.  */
6547
6548 void
6549 ix86_expand_prologue (void)
6550 {
6551   rtx insn;
6552   bool pic_reg_used;
6553   struct ix86_frame frame;
6554   HOST_WIDE_INT allocate;
6555
6556   ix86_compute_frame_layout (&frame);
6557
6558   if (cfun->machine->force_align_arg_pointer)
6559     {
6560       rtx x, y;
6561
6562       /* Grab the argument pointer.  */
6563       x = plus_constant (stack_pointer_rtx, 4);
6564       y = cfun->machine->force_align_arg_pointer;
6565       insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6566       RTX_FRAME_RELATED_P (insn) = 1;
6567
6568       /* The unwind info consists of two parts: install the fafp as the cfa,
6569          and record the fafp as the "save register" of the stack pointer.
6570          The later is there in order that the unwinder can see where it
6571          should restore the stack pointer across the and insn.  */
6572       x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6573       x = gen_rtx_SET (VOIDmode, y, x);
6574       RTX_FRAME_RELATED_P (x) = 1;
6575       y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6576                           UNSPEC_REG_SAVE);
6577       y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6578       RTX_FRAME_RELATED_P (y) = 1;
6579       x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6580       x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6581       REG_NOTES (insn) = x;
6582
6583       /* Align the stack.  */
6584       emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6585                              GEN_INT (-16)));
6586
6587       /* And here we cheat like madmen with the unwind info.  We force the
6588          cfa register back to sp+4, which is exactly what it was at the
6589          start of the function.  Re-pushing the return address results in
6590          the return at the same spot relative to the cfa, and thus is
6591          correct wrt the unwind info.  */
6592       x = cfun->machine->force_align_arg_pointer;
6593       x = gen_frame_mem (Pmode, plus_constant (x, -4));
6594       insn = emit_insn (gen_push (x));
6595       RTX_FRAME_RELATED_P (insn) = 1;
6596
6597       x = GEN_INT (4);
6598       x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6599       x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6600       x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6601       REG_NOTES (insn) = x;
6602     }
6603
6604   /* Note: AT&T enter does NOT have reversed args.  Enter is probably
6605      slower on all targets.  Also sdb doesn't like it.  */
6606
6607   if (frame_pointer_needed)
6608     {
6609       insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6610       RTX_FRAME_RELATED_P (insn) = 1;
6611
6612       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6613       RTX_FRAME_RELATED_P (insn) = 1;
6614     }
6615
6616   allocate = frame.to_allocate;
6617
6618   if (!frame.save_regs_using_mov)
6619     ix86_emit_save_regs ();
6620   else
6621     allocate += frame.nregs * UNITS_PER_WORD;
6622
6623   /* When using red zone we may start register saving before allocating
6624      the stack frame saving one cycle of the prologue. However I will
6625      avoid doing this if I am going to have to probe the stack since
6626      at least on x86_64 the stack probe can turn into a call that clobbers
6627      a red zone location */
6628   if (TARGET_RED_ZONE && frame.save_regs_using_mov
6629       && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
6630     ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6631                                    : stack_pointer_rtx,
6632                                    -frame.nregs * UNITS_PER_WORD);
6633
6634   if (allocate == 0)
6635     ;
6636   else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6637     pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6638                                GEN_INT (-allocate), -1);
6639   else
6640     {
6641       /* Only valid for Win32.  */
6642       rtx eax = gen_rtx_REG (Pmode, AX_REG);
6643       bool eax_live;
6644       rtx t;
6645
6646       gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
6647
6648       if (cfun->machine->call_abi == MS_ABI)
6649         eax_live = false;
6650       else
6651         eax_live = ix86_eax_live_at_start_p ();
6652
6653       if (eax_live)
6654         {
6655           emit_insn (gen_push (eax));
6656           allocate -= UNITS_PER_WORD;
6657         }
6658
6659       emit_move_insn (eax, GEN_INT (allocate));
6660
6661       if (TARGET_64BIT)
6662         insn = gen_allocate_stack_worker_64 (eax);
6663       else
6664         insn = gen_allocate_stack_worker_32 (eax);
6665       insn = emit_insn (insn);
6666       RTX_FRAME_RELATED_P (insn) = 1;
6667       t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6668       t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6669       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6670                                             t, REG_NOTES (insn));
6671
6672       if (eax_live)
6673         {
6674           if (frame_pointer_needed)
6675             t = plus_constant (hard_frame_pointer_rtx,
6676                                allocate
6677                                - frame.to_allocate
6678                                - frame.nregs * UNITS_PER_WORD);
6679           else
6680             t = plus_constant (stack_pointer_rtx, allocate);
6681           emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6682         }
6683     }
6684
6685   if (frame.save_regs_using_mov
6686       && !(TARGET_RED_ZONE
6687          && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
6688     {
6689       if (!frame_pointer_needed || !frame.to_allocate)
6690         ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6691       else
6692         ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6693                                        -frame.nregs * UNITS_PER_WORD);
6694     }
6695
6696   pic_reg_used = false;
6697   if (pic_offset_table_rtx
6698       && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6699           || crtl->profile))
6700     {
6701       unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6702
6703       if (alt_pic_reg_used != INVALID_REGNUM)
6704         SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6705
6706       pic_reg_used = true;
6707     }
6708
6709   if (pic_reg_used)
6710     {
6711       if (TARGET_64BIT)
6712         {
6713           if (ix86_cmodel == CM_LARGE_PIC)
6714             {
6715               rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
6716               rtx label = gen_label_rtx ();
6717               emit_label (label);
6718               LABEL_PRESERVE_P (label) = 1;
6719               gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6720               insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6721               insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6722               insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6723                                             pic_offset_table_rtx, tmp_reg));
6724             }
6725           else
6726             insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6727         }
6728       else
6729         insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6730     }
6731
6732   /* Prevent function calls from being scheduled before the call to mcount.
6733      In the pic_reg_used case, make sure that the got load isn't deleted.  */
6734   if (crtl->profile)
6735     {
6736       if (pic_reg_used)
6737         emit_insn (gen_prologue_use (pic_offset_table_rtx));
6738       emit_insn (gen_blockage ());
6739     }
6740
6741   /* Emit cld instruction if stringops are used in the function.  */
6742   if (TARGET_CLD && ix86_current_function_needs_cld)
6743     emit_insn (gen_cld ());
6744 }
6745
6746 /* Emit code to restore saved registers using MOV insns.  First register
6747    is restored from POINTER + OFFSET.  */
6748 static void
6749 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6750                                   int maybe_eh_return)
6751 {
6752   int regno;
6753   rtx base_address = gen_rtx_MEM (Pmode, pointer);
6754
6755   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6756     if (ix86_save_reg (regno, maybe_eh_return))
6757       {
6758         /* Ensure that adjust_address won't be forced to produce pointer
6759            out of range allowed by x86-64 instruction set.  */
6760         if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6761           {
6762             rtx r11;
6763
6764             r11 = gen_rtx_REG (DImode, R11_REG);
6765             emit_move_insn (r11, GEN_INT (offset));
6766             emit_insn (gen_adddi3 (r11, r11, pointer));
6767             base_address = gen_rtx_MEM (Pmode, r11);
6768             offset = 0;
6769           }
6770         emit_move_insn (gen_rtx_REG (Pmode, regno),
6771                         adjust_address (base_address, Pmode, offset));
6772         offset += UNITS_PER_WORD;
6773       }
6774 }
6775
6776 /* Restore function stack, frame, and registers.  */
6777
6778 void
6779 ix86_expand_epilogue (int style)
6780 {
6781   int regno;
6782   int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6783   struct ix86_frame frame;
6784   HOST_WIDE_INT offset;
6785
6786   ix86_compute_frame_layout (&frame);
6787
6788   /* Calculate start of saved registers relative to ebp.  Special care
6789      must be taken for the normal return case of a function using
6790      eh_return: the eax and edx registers are marked as saved, but not
6791      restored along this path.  */
6792   offset = frame.nregs;
6793   if (crtl->calls_eh_return && style != 2)
6794     offset -= 2;
6795   offset *= -UNITS_PER_WORD;
6796
6797   /* If we're only restoring one register and sp is not valid then
6798      using a move instruction to restore the register since it's
6799      less work than reloading sp and popping the register.
6800
6801      The default code result in stack adjustment using add/lea instruction,
6802      while this code results in LEAVE instruction (or discrete equivalent),
6803      so it is profitable in some other cases as well.  Especially when there
6804      are no registers to restore.  We also use this code when TARGET_USE_LEAVE
6805      and there is exactly one register to pop. This heuristic may need some
6806      tuning in future.  */
6807   if ((!sp_valid && frame.nregs <= 1)
6808       || (TARGET_EPILOGUE_USING_MOVE
6809           && cfun->machine->use_fast_prologue_epilogue
6810           && (frame.nregs > 1 || frame.to_allocate))
6811       || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6812       || (frame_pointer_needed && TARGET_USE_LEAVE
6813           && cfun->machine->use_fast_prologue_epilogue
6814           && frame.nregs == 1)
6815       || crtl->calls_eh_return)
6816     {
6817       /* Restore registers.  We can use ebp or esp to address the memory
6818          locations.  If both are available, default to ebp, since offsets
6819          are known to be small.  Only exception is esp pointing directly to the
6820          end of block of saved registers, where we may simplify addressing
6821          mode.  */
6822
6823       if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6824         ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6825                                           frame.to_allocate, style == 2);
6826       else
6827         ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6828                                           offset, style == 2);
6829
6830       /* eh_return epilogues need %ecx added to the stack pointer.  */
6831       if (style == 2)
6832         {
6833           rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6834
6835           if (frame_pointer_needed)
6836             {
6837               tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6838               tmp = plus_constant (tmp, UNITS_PER_WORD);
6839               emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6840
6841               tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6842               emit_move_insn (hard_frame_pointer_rtx, tmp);
6843
6844               pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6845                                          const0_rtx, style);
6846             }
6847           else
6848             {
6849               tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6850               tmp = plus_constant (tmp, (frame.to_allocate
6851                                          + frame.nregs * UNITS_PER_WORD));
6852               emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6853             }
6854         }
6855       else if (!frame_pointer_needed)
6856         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6857                                    GEN_INT (frame.to_allocate
6858                                             + frame.nregs * UNITS_PER_WORD),
6859                                    style);
6860       /* If not an i386, mov & pop is faster than "leave".  */
6861       else if (TARGET_USE_LEAVE || optimize_size
6862                || !cfun->machine->use_fast_prologue_epilogue)
6863         emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6864       else
6865         {
6866           pro_epilogue_adjust_stack (stack_pointer_rtx,
6867                                      hard_frame_pointer_rtx,
6868                                      const0_rtx, style);
6869           if (TARGET_64BIT)
6870             emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6871           else
6872             emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6873         }
6874     }
6875   else
6876     {
6877       /* First step is to deallocate the stack frame so that we can
6878          pop the registers.  */
6879       if (!sp_valid)
6880         {
6881           gcc_assert (frame_pointer_needed);
6882           pro_epilogue_adjust_stack (stack_pointer_rtx,
6883                                      hard_frame_pointer_rtx,
6884                                      GEN_INT (offset), style);
6885         }
6886       else if (frame.to_allocate)
6887         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6888                                    GEN_INT (frame.to_allocate), style);
6889
6890       for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6891         if (ix86_save_reg (regno, false))
6892           {
6893             if (TARGET_64BIT)
6894               emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6895             else
6896               emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6897           }
6898       if (frame_pointer_needed)
6899         {
6900           /* Leave results in shorter dependency chains on CPUs that are
6901              able to grok it fast.  */
6902           if (TARGET_USE_LEAVE)
6903             emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6904           else if (TARGET_64BIT)
6905             emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6906           else
6907             emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6908         }
6909     }
6910
6911   if (cfun->machine->force_align_arg_pointer)
6912     {
6913       emit_insn (gen_addsi3 (stack_pointer_rtx,
6914                              cfun->machine->force_align_arg_pointer,
6915                              GEN_INT (-4)));
6916     }
6917
6918   /* Sibcall epilogues don't want a return instruction.  */
6919   if (style == 0)
6920     return;
6921
6922   if (crtl->args.pops_args && crtl->args.size)
6923     {
6924       rtx popc = GEN_INT (crtl->args.pops_args);
6925
6926       /* i386 can only pop 64K bytes.  If asked to pop more, pop
6927          return address, do explicit add, and jump indirectly to the
6928          caller.  */
6929
6930       if (crtl->args.pops_args >= 65536)
6931         {
6932           rtx ecx = gen_rtx_REG (SImode, CX_REG);
6933
6934           /* There is no "pascal" calling convention in any 64bit ABI.  */
6935           gcc_assert (!TARGET_64BIT);
6936
6937           emit_insn (gen_popsi1 (ecx));
6938           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6939           emit_jump_insn (gen_return_indirect_internal (ecx));
6940         }
6941       else
6942         emit_jump_insn (gen_return_pop_internal (popc));
6943     }
6944   else
6945     emit_jump_insn (gen_return_internal ());
6946 }
6947
6948 /* Reset from the function's potential modifications.  */
6949
6950 static void
6951 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6952                                HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6953 {
6954   if (pic_offset_table_rtx)
6955     SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6956 #if TARGET_MACHO
6957   /* Mach-O doesn't support labels at the end of objects, so if
6958      it looks like we might want one, insert a NOP.  */
6959   {
6960     rtx insn = get_last_insn ();
6961     while (insn
6962            && NOTE_P (insn)
6963            && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6964       insn = PREV_INSN (insn);
6965     if (insn
6966         && (LABEL_P (insn)
6967             || (NOTE_P (insn)
6968                 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6969       fputs ("\tnop\n", file);
6970   }
6971 #endif
6972
6973 }
6974 \f
6975 /* Extract the parts of an RTL expression that is a valid memory address
6976    for an instruction.  Return 0 if the structure of the address is
6977    grossly off.  Return -1 if the address contains ASHIFT, so it is not
6978    strictly valid, but still used for computing length of lea instruction.  */
6979
6980 int
6981 ix86_decompose_address (rtx addr, struct ix86_address *out)
6982 {
6983   rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6984   rtx base_reg, index_reg;
6985   HOST_WIDE_INT scale = 1;
6986   rtx scale_rtx = NULL_RTX;
6987   int retval = 1;
6988   enum ix86_address_seg seg = SEG_DEFAULT;
6989
6990   if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6991     base = addr;
6992   else if (GET_CODE (addr) == PLUS)
6993     {
6994       rtx addends[4], op;
6995       int n = 0, i;
6996
6997       op = addr;
6998       do
6999         {
7000           if (n >= 4)
7001             return 0;
7002           addends[n++] = XEXP (op, 1);
7003           op = XEXP (op, 0);
7004         }
7005       while (GET_CODE (op) == PLUS);
7006       if (n >= 4)
7007         return 0;
7008       addends[n] = op;
7009
7010       for (i = n; i >= 0; --i)
7011         {
7012           op = addends[i];
7013           switch (GET_CODE (op))
7014             {
7015             case MULT:
7016               if (index)
7017                 return 0;
7018               index = XEXP (op, 0);
7019               scale_rtx = XEXP (op, 1);
7020               break;
7021
7022             case UNSPEC:
7023               if (XINT (op, 1) == UNSPEC_TP
7024                   && TARGET_TLS_DIRECT_SEG_REFS
7025                   && seg == SEG_DEFAULT)
7026                 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
7027               else
7028                 return 0;
7029               break;
7030
7031             case REG:
7032             case SUBREG:
7033               if (!base)
7034                 base = op;
7035               else if (!index)
7036                 index = op;
7037               else
7038                 return 0;
7039               break;
7040
7041             case CONST:
7042             case CONST_INT:
7043             case SYMBOL_REF:
7044             case LABEL_REF:
7045               if (disp)
7046                 return 0;
7047               disp = op;
7048               break;
7049
7050             default:
7051               return 0;
7052             }
7053         }
7054     }
7055   else if (GET_CODE (addr) == MULT)
7056     {
7057       index = XEXP (addr, 0);           /* index*scale */
7058       scale_rtx = XEXP (addr, 1);
7059     }
7060   else if (GET_CODE (addr) == ASHIFT)
7061     {
7062       rtx tmp;
7063
7064       /* We're called for lea too, which implements ashift on occasion.  */
7065       index = XEXP (addr, 0);
7066       tmp = XEXP (addr, 1);
7067       if (!CONST_INT_P (tmp))
7068         return 0;
7069       scale = INTVAL (tmp);
7070       if ((unsigned HOST_WIDE_INT) scale > 3)
7071         return 0;
7072       scale = 1 << scale;
7073       retval = -1;
7074     }
7075   else
7076     disp = addr;                        /* displacement */
7077
7078   /* Extract the integral value of scale.  */
7079   if (scale_rtx)
7080     {
7081       if (!CONST_INT_P (scale_rtx))
7082         return 0;
7083       scale = INTVAL (scale_rtx);
7084     }
7085
7086   base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
7087   index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
7088
7089   /* Allow arg pointer and stack pointer as index if there is not scaling.  */
7090   if (base_reg && index_reg && scale == 1
7091       && (index_reg == arg_pointer_rtx
7092           || index_reg == frame_pointer_rtx
7093           || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
7094     {
7095       rtx tmp;
7096       tmp = base, base = index, index = tmp;
7097       tmp = base_reg, base_reg = index_reg, index_reg = tmp;
7098     }
7099
7100   /* Special case: %ebp cannot be encoded as a base without a displacement.  */
7101   if ((base_reg == hard_frame_pointer_rtx
7102        || base_reg == frame_pointer_rtx
7103        || base_reg == arg_pointer_rtx) && !disp)
7104     disp = const0_rtx;
7105
7106   /* Special case: on K6, [%esi] makes the instruction vector decoded.
7107      Avoid this by transforming to [%esi+0].  */
7108   if (TARGET_K6 && !optimize_size
7109       && base_reg && !index_reg && !disp
7110       && REG_P (base_reg)
7111       && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
7112     disp = const0_rtx;
7113
7114   /* Special case: encode reg+reg instead of reg*2.  */
7115   if (!base && index && scale && scale == 2)
7116     base = index, base_reg = index_reg, scale = 1;
7117
7118   /* Special case: scaling cannot be encoded without base or displacement.  */
7119   if (!base && !disp && index && scale != 1)
7120     disp = const0_rtx;
7121
7122   out->base = base;
7123   out->index = index;
7124   out->disp = disp;
7125   out->scale = scale;
7126   out->seg = seg;
7127
7128   return retval;
7129 }
7130 \f
7131 /* Return cost of the memory address x.
7132    For i386, it is better to use a complex address than let gcc copy
7133    the address into a reg and make a new pseudo.  But not if the address
7134    requires to two regs - that would mean more pseudos with longer
7135    lifetimes.  */
7136 static int
7137 ix86_address_cost (rtx x)
7138 {
7139   struct ix86_address parts;
7140   int cost = 1;
7141   int ok = ix86_decompose_address (x, &parts);
7142
7143   gcc_assert (ok);
7144
7145   if (parts.base && GET_CODE (parts.base) == SUBREG)
7146     parts.base = SUBREG_REG (parts.base);
7147   if (parts.index && GET_CODE (parts.index) == SUBREG)
7148     parts.index = SUBREG_REG (parts.index);
7149
7150   /* Attempt to minimize number of registers in the address.  */
7151   if ((parts.base
7152        && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
7153       || (parts.index
7154           && (!REG_P (parts.index)
7155               || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
7156     cost++;
7157
7158   if (parts.base
7159       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
7160       && parts.index
7161       && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
7162       && parts.base != parts.index)
7163     cost++;
7164
7165   /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
7166      since it's predecode logic can't detect the length of instructions
7167      and it degenerates to vector decoded.  Increase cost of such
7168      addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
7169      to split such addresses or even refuse such addresses at all.
7170
7171      Following addressing modes are affected:
7172       [base+scale*index]
7173       [scale*index+disp]
7174       [base+index]
7175
7176      The first and last case  may be avoidable by explicitly coding the zero in
7177      memory address, but I don't have AMD-K6 machine handy to check this
7178      theory.  */
7179
7180   if (TARGET_K6
7181       && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
7182           || (parts.disp && !parts.base && parts.index && parts.scale != 1)
7183           || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
7184     cost += 10;
7185
7186   return cost;
7187 }
7188 \f
7189 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
7190    this is used for to form addresses to local data when -fPIC is in
7191    use.  */
7192
7193 static bool
7194 darwin_local_data_pic (rtx disp)
7195 {
7196   if (GET_CODE (disp) == MINUS)
7197     {
7198       if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
7199           || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
7200         if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
7201           {
7202             const char *sym_name = XSTR (XEXP (disp, 1), 0);
7203             if (! strcmp (sym_name, "<pic base>"))
7204               return true;
7205           }
7206     }
7207
7208   return false;
7209 }
7210
7211 /* Determine if a given RTX is a valid constant.  We already know this
7212    satisfies CONSTANT_P.  */
7213
7214 bool
7215 legitimate_constant_p (rtx x)
7216 {
7217   switch (GET_CODE (x))
7218     {
7219     case CONST:
7220       x = XEXP (x, 0);
7221
7222       if (GET_CODE (x) == PLUS)
7223         {
7224           if (!CONST_INT_P (XEXP (x, 1)))
7225             return false;
7226           x = XEXP (x, 0);
7227         }
7228
7229       if (TARGET_MACHO && darwin_local_data_pic (x))
7230         return true;
7231
7232       /* Only some unspecs are valid as "constants".  */
7233       if (GET_CODE (x) == UNSPEC)
7234         switch (XINT (x, 1))
7235           {
7236           case UNSPEC_GOT:
7237           case UNSPEC_GOTOFF:
7238           case UNSPEC_PLTOFF:
7239             return TARGET_64BIT;
7240           case UNSPEC_TPOFF:
7241           case UNSPEC_NTPOFF:
7242             x = XVECEXP (x, 0, 0);
7243             return (GET_CODE (x) == SYMBOL_REF
7244                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7245           case UNSPEC_DTPOFF:
7246             x = XVECEXP (x, 0, 0);
7247             return (GET_CODE (x) == SYMBOL_REF
7248                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
7249           default:
7250             return false;
7251           }
7252
7253       /* We must have drilled down to a symbol.  */
7254       if (GET_CODE (x) == LABEL_REF)
7255         return true;
7256       if (GET_CODE (x) != SYMBOL_REF)
7257         return false;
7258       /* FALLTHRU */
7259
7260     case SYMBOL_REF:
7261       /* TLS symbols are never valid.  */
7262       if (SYMBOL_REF_TLS_MODEL (x))
7263         return false;
7264
7265       /* DLLIMPORT symbols are never valid.  */
7266       if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
7267           && SYMBOL_REF_DLLIMPORT_P (x))
7268         return false;
7269       break;
7270
7271     case CONST_DOUBLE:
7272       if (GET_MODE (x) == TImode
7273           && x != CONST0_RTX (TImode)
7274           && !TARGET_64BIT)
7275         return false;
7276       break;
7277
7278     case CONST_VECTOR:
7279       if (x == CONST0_RTX (GET_MODE (x)))
7280         return true;
7281       return false;
7282
7283     default:
7284       break;
7285     }
7286
7287   /* Otherwise we handle everything else in the move patterns.  */
7288   return true;
7289 }
7290
7291 /* Determine if it's legal to put X into the constant pool.  This
7292    is not possible for the address of thread-local symbols, which
7293    is checked above.  */
7294
7295 static bool
7296 ix86_cannot_force_const_mem (rtx x)
7297 {
7298   /* We can always put integral constants and vectors in memory.  */
7299   switch (GET_CODE (x))
7300     {
7301     case CONST_INT:
7302     case CONST_DOUBLE:
7303     case CONST_VECTOR:
7304       return false;
7305
7306     default:
7307       break;
7308     }
7309   return !legitimate_constant_p (x);
7310 }
7311
7312 /* Determine if a given RTX is a valid constant address.  */
7313
7314 bool
7315 constant_address_p (rtx x)
7316 {
7317   return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
7318 }
7319
7320 /* Nonzero if the constant value X is a legitimate general operand
7321    when generating PIC code.  It is given that flag_pic is on and
7322    that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
7323
7324 bool
7325 legitimate_pic_operand_p (rtx x)
7326 {
7327   rtx inner;
7328
7329   switch (GET_CODE (x))
7330     {
7331     case CONST:
7332       inner = XEXP (x, 0);
7333       if (GET_CODE (inner) == PLUS
7334           && CONST_INT_P (XEXP (inner, 1)))
7335         inner = XEXP (inner, 0);
7336
7337       /* Only some unspecs are valid as "constants".  */
7338       if (GET_CODE (inner) == UNSPEC)
7339         switch (XINT (inner, 1))
7340           {
7341           case UNSPEC_GOT:
7342           case UNSPEC_GOTOFF:
7343           case UNSPEC_PLTOFF:
7344             return TARGET_64BIT;
7345           case UNSPEC_TPOFF:
7346             x = XVECEXP (inner, 0, 0);
7347             return (GET_CODE (x) == SYMBOL_REF
7348                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7349           default:
7350             return false;
7351           }
7352       /* FALLTHRU */
7353
7354     case SYMBOL_REF:
7355     case LABEL_REF:
7356       return legitimate_pic_address_disp_p (x);
7357
7358     default:
7359       return true;
7360     }
7361 }
7362
7363 /* Determine if a given CONST RTX is a valid memory displacement
7364    in PIC mode.  */
7365
7366 int
7367 legitimate_pic_address_disp_p (rtx disp)
7368 {
7369   bool saw_plus;
7370
7371   /* In 64bit mode we can allow direct addresses of symbols and labels
7372      when they are not dynamic symbols.  */
7373   if (TARGET_64BIT)
7374     {
7375       rtx op0 = disp, op1;
7376
7377       switch (GET_CODE (disp))
7378         {
7379         case LABEL_REF:
7380           return true;
7381
7382         case CONST:
7383           if (GET_CODE (XEXP (disp, 0)) != PLUS)
7384             break;
7385           op0 = XEXP (XEXP (disp, 0), 0);
7386           op1 = XEXP (XEXP (disp, 0), 1);
7387           if (!CONST_INT_P (op1)
7388               || INTVAL (op1) >= 16*1024*1024
7389               || INTVAL (op1) < -16*1024*1024)
7390             break;
7391           if (GET_CODE (op0) == LABEL_REF)
7392             return true;
7393           if (GET_CODE (op0) != SYMBOL_REF)
7394             break;
7395           /* FALLTHRU */
7396
7397         case SYMBOL_REF:
7398           /* TLS references should always be enclosed in UNSPEC.  */
7399           if (SYMBOL_REF_TLS_MODEL (op0))
7400             return false;
7401           if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7402               && ix86_cmodel != CM_LARGE_PIC)
7403             return true;
7404           break;
7405
7406         default:
7407           break;
7408         }
7409     }
7410   if (GET_CODE (disp) != CONST)
7411     return 0;
7412   disp = XEXP (disp, 0);
7413
7414   if (TARGET_64BIT)
7415     {
7416       /* We are unsafe to allow PLUS expressions.  This limit allowed distance
7417          of GOT tables.  We should not need these anyway.  */
7418       if (GET_CODE (disp) != UNSPEC
7419           || (XINT (disp, 1) != UNSPEC_GOTPCREL
7420               && XINT (disp, 1) != UNSPEC_GOTOFF
7421               && XINT (disp, 1) != UNSPEC_PLTOFF))
7422         return 0;
7423
7424       if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7425           && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7426         return 0;
7427       return 1;
7428     }
7429
7430   saw_plus = false;
7431   if (GET_CODE (disp) == PLUS)
7432     {
7433       if (!CONST_INT_P (XEXP (disp, 1)))
7434         return 0;
7435       disp = XEXP (disp, 0);
7436       saw_plus = true;
7437     }
7438
7439   if (TARGET_MACHO && darwin_local_data_pic (disp))
7440     return 1;
7441
7442   if (GET_CODE (disp) != UNSPEC)
7443     return 0;
7444
7445   switch (XINT (disp, 1))
7446     {
7447     case UNSPEC_GOT:
7448       if (saw_plus)
7449         return false;
7450       /* We need to check for both symbols and labels because VxWorks loads
7451          text labels with @GOT rather than @GOTOFF.  See gotoff_operand for
7452          details.  */
7453       return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7454               || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
7455     case UNSPEC_GOTOFF:
7456       /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7457          While ABI specify also 32bit relocation but we don't produce it in
7458          small PIC model at all.  */
7459       if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7460            || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7461           && !TARGET_64BIT)
7462         return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
7463       return false;
7464     case UNSPEC_GOTTPOFF:
7465     case UNSPEC_GOTNTPOFF:
7466     case UNSPEC_INDNTPOFF:
7467       if (saw_plus)
7468         return false;
7469       disp = XVECEXP (disp, 0, 0);
7470       return (GET_CODE (disp) == SYMBOL_REF
7471               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
7472     case UNSPEC_NTPOFF:
7473       disp = XVECEXP (disp, 0, 0);
7474       return (GET_CODE (disp) == SYMBOL_REF
7475               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
7476     case UNSPEC_DTPOFF:
7477       disp = XVECEXP (disp, 0, 0);
7478       return (GET_CODE (disp) == SYMBOL_REF
7479               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
7480     }
7481
7482   return 0;
7483 }
7484
7485 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7486    memory address for an instruction.  The MODE argument is the machine mode
7487    for the MEM expression that wants to use this address.
7488
7489    It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
7490    convert common non-canonical forms to canonical form so that they will
7491    be recognized.  */
7492
7493 int
7494 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7495                       rtx addr, int strict)
7496 {
7497   struct ix86_address parts;
7498   rtx base, index, disp;
7499   HOST_WIDE_INT scale;
7500   const char *reason = NULL;
7501   rtx reason_rtx = NULL_RTX;
7502
7503   if (ix86_decompose_address (addr, &parts) <= 0)
7504     {
7505       reason = "decomposition failed";
7506       goto report_error;
7507     }
7508
7509   base = parts.base;
7510   index = parts.index;
7511   disp = parts.disp;
7512   scale = parts.scale;
7513
7514   /* Validate base register.
7515
7516      Don't allow SUBREG's that span more than a word here.  It can lead to spill
7517      failures when the base is one word out of a two word structure, which is
7518      represented internally as a DImode int.  */
7519
7520   if (base)
7521     {
7522       rtx reg;
7523       reason_rtx = base;
7524
7525       if (REG_P (base))
7526         reg = base;
7527       else if (GET_CODE (base) == SUBREG
7528                && REG_P (SUBREG_REG (base))
7529                && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7530                   <= UNITS_PER_WORD)
7531         reg = SUBREG_REG (base);
7532       else
7533         {
7534           reason = "base is not a register";
7535           goto report_error;
7536         }
7537
7538       if (GET_MODE (base) != Pmode)
7539         {
7540           reason = "base is not in Pmode";
7541           goto report_error;
7542         }
7543
7544       if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7545           || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7546         {
7547           reason = "base is not valid";
7548           goto report_error;
7549         }
7550     }
7551
7552   /* Validate index register.
7553
7554      Don't allow SUBREG's that span more than a word here -- same as above.  */
7555
7556   if (index)
7557     {
7558       rtx reg;
7559       reason_rtx = index;
7560
7561       if (REG_P (index))
7562         reg = index;
7563       else if (GET_CODE (index) == SUBREG
7564                && REG_P (SUBREG_REG (index))
7565                && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7566                   <= UNITS_PER_WORD)
7567         reg = SUBREG_REG (index);
7568       else
7569         {
7570           reason = "index is not a register";
7571           goto report_error;
7572         }
7573
7574       if (GET_MODE (index) != Pmode)
7575         {
7576           reason = "index is not in Pmode";
7577           goto report_error;
7578         }
7579
7580       if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7581           || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7582         {
7583           reason = "index is not valid";
7584           goto report_error;
7585         }
7586     }
7587
7588   /* Validate scale factor.  */
7589   if (scale != 1)
7590     {
7591       reason_rtx = GEN_INT (scale);
7592       if (!index)
7593         {
7594           reason = "scale without index";
7595           goto report_error;
7596         }
7597
7598       if (scale != 2 && scale != 4 && scale != 8)
7599         {
7600           reason = "scale is not a valid multiplier";
7601           goto report_error;
7602         }
7603     }
7604
7605   /* Validate displacement.  */
7606   if (disp)
7607     {
7608       reason_rtx = disp;
7609
7610       if (GET_CODE (disp) == CONST
7611           && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7612         switch (XINT (XEXP (disp, 0), 1))
7613           {
7614           /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7615              used.  While ABI specify also 32bit relocations, we don't produce
7616              them at all and use IP relative instead.  */
7617           case UNSPEC_GOT:
7618           case UNSPEC_GOTOFF:
7619             gcc_assert (flag_pic);
7620             if (!TARGET_64BIT)
7621               goto is_legitimate_pic;
7622             reason = "64bit address unspec";
7623             goto report_error;
7624
7625           case UNSPEC_GOTPCREL:
7626             gcc_assert (flag_pic);
7627             goto is_legitimate_pic;
7628
7629           case UNSPEC_GOTTPOFF:
7630           case UNSPEC_GOTNTPOFF:
7631           case UNSPEC_INDNTPOFF:
7632           case UNSPEC_NTPOFF:
7633           case UNSPEC_DTPOFF:
7634             break;
7635
7636           default:
7637             reason = "invalid address unspec";
7638             goto report_error;
7639           }
7640
7641       else if (SYMBOLIC_CONST (disp)
7642                && (flag_pic
7643                    || (TARGET_MACHO
7644 #if TARGET_MACHO
7645                        && MACHOPIC_INDIRECT
7646                        && !machopic_operand_p (disp)
7647 #endif
7648                )))
7649         {
7650
7651         is_legitimate_pic:
7652           if (TARGET_64BIT && (index || base))
7653             {
7654               /* foo@dtpoff(%rX) is ok.  */
7655               if (GET_CODE (disp) != CONST
7656                   || GET_CODE (XEXP (disp, 0)) != PLUS
7657                   || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7658                   || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7659                   || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7660                       && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7661                 {
7662                   reason = "non-constant pic memory reference";
7663                   goto report_error;
7664                 }
7665             }
7666           else if (! legitimate_pic_address_disp_p (disp))
7667             {
7668               reason = "displacement is an invalid pic construct";
7669               goto report_error;
7670             }
7671
7672           /* This code used to verify that a symbolic pic displacement
7673              includes the pic_offset_table_rtx register.
7674
7675              While this is good idea, unfortunately these constructs may
7676              be created by "adds using lea" optimization for incorrect
7677              code like:
7678
7679              int a;
7680              int foo(int i)
7681                {
7682                  return *(&a+i);
7683                }
7684
7685              This code is nonsensical, but results in addressing
7686              GOT table with pic_offset_table_rtx base.  We can't
7687              just refuse it easily, since it gets matched by
7688              "addsi3" pattern, that later gets split to lea in the
7689              case output register differs from input.  While this
7690              can be handled by separate addsi pattern for this case
7691              that never results in lea, this seems to be easier and
7692              correct fix for crash to disable this test.  */
7693         }
7694       else if (GET_CODE (disp) != LABEL_REF
7695                && !CONST_INT_P (disp)
7696                && (GET_CODE (disp) != CONST
7697                    || !legitimate_constant_p (disp))
7698                && (GET_CODE (disp) != SYMBOL_REF
7699                    || !legitimate_constant_p (disp)))
7700         {
7701           reason = "displacement is not constant";
7702           goto report_error;
7703         }
7704       else if (TARGET_64BIT
7705                && !x86_64_immediate_operand (disp, VOIDmode))
7706         {
7707           reason = "displacement is out of range";
7708           goto report_error;
7709         }
7710     }
7711
7712   /* Everything looks valid.  */
7713   return TRUE;
7714
7715  report_error:
7716   return FALSE;
7717 }
7718 \f
7719 /* Return a unique alias set for the GOT.  */
7720
7721 static alias_set_type
7722 ix86_GOT_alias_set (void)
7723 {
7724   static alias_set_type set = -1;
7725   if (set == -1)
7726     set = new_alias_set ();
7727   return set;
7728 }
7729
7730 /* Return a legitimate reference for ORIG (an address) using the
7731    register REG.  If REG is 0, a new pseudo is generated.
7732
7733    There are two types of references that must be handled:
7734
7735    1. Global data references must load the address from the GOT, via
7736       the PIC reg.  An insn is emitted to do this load, and the reg is
7737       returned.
7738
7739    2. Static data references, constant pool addresses, and code labels
7740       compute the address as an offset from the GOT, whose base is in
7741       the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
7742       differentiate them from global data objects.  The returned
7743       address is the PIC reg + an unspec constant.
7744
7745    GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7746    reg also appears in the address.  */
7747
7748 static rtx
7749 legitimize_pic_address (rtx orig, rtx reg)
7750 {
7751   rtx addr = orig;
7752   rtx new_rtx = orig;
7753   rtx base;
7754
7755 #if TARGET_MACHO
7756   if (TARGET_MACHO && !TARGET_64BIT)
7757     {
7758       if (reg == 0)
7759         reg = gen_reg_rtx (Pmode);
7760       /* Use the generic Mach-O PIC machinery.  */
7761       return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7762     }
7763 #endif
7764
7765   if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7766     new_rtx = addr;
7767   else if (TARGET_64BIT
7768            && ix86_cmodel != CM_SMALL_PIC
7769            && gotoff_operand (addr, Pmode))
7770     {
7771       rtx tmpreg;
7772       /* This symbol may be referenced via a displacement from the PIC
7773          base address (@GOTOFF).  */
7774
7775       if (reload_in_progress)
7776         df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7777       if (GET_CODE (addr) == CONST)
7778         addr = XEXP (addr, 0);
7779       if (GET_CODE (addr) == PLUS)
7780           {
7781             new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7782                                       UNSPEC_GOTOFF);
7783             new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7784           }
7785         else
7786           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7787       new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7788       if (!reg)
7789         tmpreg = gen_reg_rtx (Pmode);
7790       else
7791         tmpreg = reg;
7792       emit_move_insn (tmpreg, new_rtx);
7793
7794       if (reg != 0)
7795         {
7796           new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7797                                          tmpreg, 1, OPTAB_DIRECT);
7798           new_rtx = reg;
7799         }
7800       else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7801     }
7802   else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7803     {
7804       /* This symbol may be referenced via a displacement from the PIC
7805          base address (@GOTOFF).  */
7806
7807       if (reload_in_progress)
7808         df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7809       if (GET_CODE (addr) == CONST)
7810         addr = XEXP (addr, 0);
7811       if (GET_CODE (addr) == PLUS)
7812           {
7813             new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7814                                       UNSPEC_GOTOFF);
7815             new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7816           }
7817         else
7818           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7819       new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7820       new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7821
7822       if (reg != 0)
7823         {
7824           emit_move_insn (reg, new_rtx);
7825           new_rtx = reg;
7826         }
7827     }
7828   else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7829            /* We can't use @GOTOFF for text labels on VxWorks;
7830               see gotoff_operand.  */
7831            || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7832     {
7833       if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7834         {
7835           if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
7836             return legitimize_dllimport_symbol (addr, true);
7837           if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
7838               && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7839               && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
7840             {
7841               rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
7842               return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
7843             }
7844         }
7845
7846       if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7847         {
7848           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7849           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7850           new_rtx = gen_const_mem (Pmode, new_rtx);
7851           set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7852
7853           if (reg == 0)
7854             reg = gen_reg_rtx (Pmode);
7855           /* Use directly gen_movsi, otherwise the address is loaded
7856              into register for CSE.  We don't want to CSE this addresses,
7857              instead we CSE addresses from the GOT table, so skip this.  */
7858           emit_insn (gen_movsi (reg, new_rtx));
7859           new_rtx = reg;
7860         }
7861       else
7862         {
7863           /* This symbol must be referenced via a load from the
7864              Global Offset Table (@GOT).  */
7865
7866           if (reload_in_progress)
7867             df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7868           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7869           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7870           if (TARGET_64BIT)
7871             new_rtx = force_reg (Pmode, new_rtx);
7872           new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7873           new_rtx = gen_const_mem (Pmode, new_rtx);
7874           set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7875
7876           if (reg == 0)
7877             reg = gen_reg_rtx (Pmode);
7878           emit_move_insn (reg, new_rtx);
7879           new_rtx = reg;
7880         }
7881     }
7882   else
7883     {
7884       if (CONST_INT_P (addr)
7885           && !x86_64_immediate_operand (addr, VOIDmode))
7886         {
7887           if (reg)
7888             {
7889               emit_move_insn (reg, addr);
7890               new_rtx = reg;
7891             }
7892           else
7893             new_rtx = force_reg (Pmode, addr);
7894         }
7895       else if (GET_CODE (addr) == CONST)
7896         {
7897           addr = XEXP (addr, 0);
7898
7899           /* We must match stuff we generate before.  Assume the only
7900              unspecs that can get here are ours.  Not that we could do
7901              anything with them anyway....  */
7902           if (GET_CODE (addr) == UNSPEC
7903               || (GET_CODE (addr) == PLUS
7904                   && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7905             return orig;
7906           gcc_assert (GET_CODE (addr) == PLUS);
7907         }
7908       if (GET_CODE (addr) == PLUS)
7909         {
7910           rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7911
7912           /* Check first to see if this is a constant offset from a @GOTOFF
7913              symbol reference.  */
7914           if (gotoff_operand (op0, Pmode)
7915               && CONST_INT_P (op1))
7916             {
7917               if (!TARGET_64BIT)
7918                 {
7919                   if (reload_in_progress)
7920                     df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7921                   new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7922                                             UNSPEC_GOTOFF);
7923                   new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7924                   new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7925                   new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7926
7927                   if (reg != 0)
7928                     {
7929                       emit_move_insn (reg, new_rtx);
7930                       new_rtx = reg;
7931                     }
7932                 }
7933               else
7934                 {
7935                   if (INTVAL (op1) < -16*1024*1024
7936                       || INTVAL (op1) >= 16*1024*1024)
7937                     {
7938                       if (!x86_64_immediate_operand (op1, Pmode))
7939                         op1 = force_reg (Pmode, op1);
7940                       new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7941                     }
7942                 }
7943             }
7944           else
7945             {
7946               base = legitimize_pic_address (XEXP (addr, 0), reg);
7947               new_rtx  = legitimize_pic_address (XEXP (addr, 1),
7948                                                  base == reg ? NULL_RTX : reg);
7949
7950               if (CONST_INT_P (new_rtx))
7951                 new_rtx = plus_constant (base, INTVAL (new_rtx));
7952               else
7953                 {
7954                   if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7955                     {
7956                       base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7957                       new_rtx = XEXP (new_rtx, 1);
7958                     }
7959                   new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7960                 }
7961             }
7962         }
7963     }
7964   return new_rtx;
7965 }
7966 \f
7967 /* Load the thread pointer.  If TO_REG is true, force it into a register.  */
7968
7969 static rtx
7970 get_thread_pointer (int to_reg)
7971 {
7972   rtx tp, reg, insn;
7973
7974   tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7975   if (!to_reg)
7976     return tp;
7977
7978   reg = gen_reg_rtx (Pmode);
7979   insn = gen_rtx_SET (VOIDmode, reg, tp);
7980   insn = emit_insn (insn);
7981
7982   return reg;
7983 }
7984
7985 /* A subroutine of legitimize_address and ix86_expand_move.  FOR_MOV is
7986    false if we expect this to be used for a memory address and true if
7987    we expect to load the address into a register.  */
7988
7989 static rtx
7990 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7991 {
7992   rtx dest, base, off, pic, tp;
7993   int type;
7994
7995   switch (model)
7996     {
7997     case TLS_MODEL_GLOBAL_DYNAMIC:
7998       dest = gen_reg_rtx (Pmode);
7999       tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
8000
8001       if (TARGET_64BIT && ! TARGET_GNU2_TLS)
8002         {
8003           rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
8004
8005           start_sequence ();
8006           emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
8007           insns = get_insns ();
8008           end_sequence ();
8009
8010           RTL_CONST_CALL_P (insns) = 1;
8011           emit_libcall_block (insns, dest, rax, x);
8012         }
8013       else if (TARGET_64BIT && TARGET_GNU2_TLS)
8014         emit_insn (gen_tls_global_dynamic_64 (dest, x));
8015       else
8016         emit_insn (gen_tls_global_dynamic_32 (dest, x));
8017
8018       if (TARGET_GNU2_TLS)
8019         {
8020           dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
8021
8022           set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
8023         }
8024       break;
8025
8026     case TLS_MODEL_LOCAL_DYNAMIC:
8027       base = gen_reg_rtx (Pmode);
8028       tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
8029
8030       if (TARGET_64BIT && ! TARGET_GNU2_TLS)
8031         {
8032           rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
8033
8034           start_sequence ();
8035           emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
8036           insns = get_insns ();
8037           end_sequence ();
8038
8039           note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
8040           note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
8041           RTL_CONST_CALL_P (insns) = 1;
8042           emit_libcall_block (insns, base, rax, note);
8043         }
8044       else if (TARGET_64BIT && TARGET_GNU2_TLS)
8045         emit_insn (gen_tls_local_dynamic_base_64 (base));
8046       else
8047         emit_insn (gen_tls_local_dynamic_base_32 (base));
8048
8049       if (TARGET_GNU2_TLS)
8050         {
8051           rtx x = ix86_tls_module_base ();
8052
8053           set_unique_reg_note (get_last_insn (), REG_EQUIV,
8054                                gen_rtx_MINUS (Pmode, x, tp));
8055         }
8056
8057       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
8058       off = gen_rtx_CONST (Pmode, off);
8059
8060       dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
8061
8062       if (TARGET_GNU2_TLS)
8063         {
8064           dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
8065
8066           set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
8067         }
8068
8069       break;
8070
8071     case TLS_MODEL_INITIAL_EXEC:
8072       if (TARGET_64BIT)
8073         {
8074           pic = NULL;
8075           type = UNSPEC_GOTNTPOFF;
8076         }
8077       else if (flag_pic)
8078         {
8079           if (reload_in_progress)
8080             df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
8081           pic = pic_offset_table_rtx;
8082           type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
8083         }
8084       else if (!TARGET_ANY_GNU_TLS)
8085         {
8086           pic = gen_reg_rtx (Pmode);
8087           emit_insn (gen_set_got (pic));
8088           type = UNSPEC_GOTTPOFF;
8089         }
8090       else
8091         {
8092           pic = NULL;
8093           type = UNSPEC_INDNTPOFF;
8094         }
8095
8096       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
8097       off = gen_rtx_CONST (Pmode, off);
8098       if (pic)
8099         off = gen_rtx_PLUS (Pmode, pic, off);
8100       off = gen_const_mem (Pmode, off);
8101       set_mem_alias_set (off, ix86_GOT_alias_set ());
8102
8103       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
8104         {
8105           base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
8106           off = force_reg (Pmode, off);
8107           return gen_rtx_PLUS (Pmode, base, off);
8108         }
8109       else
8110         {
8111           base = get_thread_pointer (true);
8112           dest = gen_reg_rtx (Pmode);
8113           emit_insn (gen_subsi3 (dest, base, off));
8114         }
8115       break;
8116
8117     case TLS_MODEL_LOCAL_EXEC:
8118       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
8119                             (TARGET_64BIT || TARGET_ANY_GNU_TLS)
8120                             ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
8121       off = gen_rtx_CONST (Pmode, off);
8122
8123       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
8124         {
8125           base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
8126           return gen_rtx_PLUS (Pmode, base, off);
8127         }
8128       else
8129         {
8130           base = get_thread_pointer (true);
8131           dest = gen_reg_rtx (Pmode);
8132           emit_insn (gen_subsi3 (dest, base, off));
8133         }
8134       break;
8135
8136     default:
8137       gcc_unreachable ();
8138     }
8139
8140   return dest;
8141 }
8142
8143 /* Create or return the unique __imp_DECL dllimport symbol corresponding
8144    to symbol DECL.  */
8145
8146 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
8147   htab_t dllimport_map;
8148
8149 static tree
8150 get_dllimport_decl (tree decl)
8151 {
8152   struct tree_map *h, in;
8153   void **loc;
8154   const char *name;
8155   const char *prefix;
8156   size_t namelen, prefixlen;
8157   char *imp_name;
8158   tree to;
8159   rtx rtl;
8160
8161   if (!dllimport_map)
8162     dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
8163
8164   in.hash = htab_hash_pointer (decl);
8165   in.base.from = decl;
8166   loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
8167   h = (struct tree_map *) *loc;
8168   if (h)
8169     return h->to;
8170
8171   *loc = h = GGC_NEW (struct tree_map);
8172   h->hash = in.hash;
8173   h->base.from = decl;
8174   h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
8175   DECL_ARTIFICIAL (to) = 1;
8176   DECL_IGNORED_P (to) = 1;
8177   DECL_EXTERNAL (to) = 1;
8178   TREE_READONLY (to) = 1;
8179
8180   name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
8181   name = targetm.strip_name_encoding (name);
8182   prefix = name[0] == FASTCALL_PREFIX  ?  "*__imp_": "*__imp__";
8183   namelen = strlen (name);
8184   prefixlen = strlen (prefix);
8185   imp_name = (char *) alloca (namelen + prefixlen + 1);
8186   memcpy (imp_name, prefix, prefixlen);
8187   memcpy (imp_name + prefixlen, name, namelen + 1);
8188
8189   name = ggc_alloc_string (imp_name, namelen + prefixlen);
8190   rtl = gen_rtx_SYMBOL_REF (Pmode, name);
8191   SET_SYMBOL_REF_DECL (rtl, to);
8192   SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
8193
8194   rtl = gen_const_mem (Pmode, rtl);
8195   set_mem_alias_set (rtl, ix86_GOT_alias_set ());
8196
8197   SET_DECL_RTL (to, rtl);
8198   SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
8199
8200   return to;
8201 }
8202
8203 /* Expand SYMBOL into its corresponding dllimport symbol.  WANT_REG is
8204    true if we require the result be a register.  */
8205
8206 static rtx
8207 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
8208 {
8209   tree imp_decl;
8210   rtx x;
8211
8212   gcc_assert (SYMBOL_REF_DECL (symbol));
8213   imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
8214
8215   x = DECL_RTL (imp_decl);
8216   if (want_reg)
8217     x = force_reg (Pmode, x);
8218   return x;
8219 }
8220
8221 /* Try machine-dependent ways of modifying an illegitimate address
8222    to be legitimate.  If we find one, return the new, valid address.
8223    This macro is used in only one place: `memory_address' in explow.c.
8224
8225    OLDX is the address as it was before break_out_memory_refs was called.
8226    In some cases it is useful to look at this to decide what needs to be done.
8227
8228    MODE and WIN are passed so that this macro can use
8229    GO_IF_LEGITIMATE_ADDRESS.
8230
8231    It is always safe for this macro to do nothing.  It exists to recognize
8232    opportunities to optimize the output.
8233
8234    For the 80386, we handle X+REG by loading X into a register R and
8235    using R+REG.  R will go in a general reg and indexing will be used.
8236    However, if REG is a broken-out memory address or multiplication,
8237    nothing needs to be done because REG can certainly go in a general reg.
8238
8239    When -fpic is used, special handling is needed for symbolic references.
8240    See comments by legitimize_pic_address in i386.c for details.  */
8241
8242 rtx
8243 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
8244 {
8245   int changed = 0;
8246   unsigned log;
8247
8248   log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
8249   if (log)
8250     return legitimize_tls_address (x, (enum tls_model) log, false);
8251   if (GET_CODE (x) == CONST
8252       && GET_CODE (XEXP (x, 0)) == PLUS
8253       && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8254       && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
8255     {
8256       rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
8257                                       (enum tls_model) log, false);
8258       return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8259     }
8260
8261   if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
8262     {
8263       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
8264         return legitimize_dllimport_symbol (x, true);
8265       if (GET_CODE (x) == CONST
8266           && GET_CODE (XEXP (x, 0)) == PLUS
8267           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8268           && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
8269         {
8270           rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
8271           return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8272         }
8273     }
8274
8275   if (flag_pic && SYMBOLIC_CONST (x))
8276     return legitimize_pic_address (x, 0);
8277
8278   /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
8279   if (GET_CODE (x) == ASHIFT
8280       && CONST_INT_P (XEXP (x, 1))
8281       && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
8282     {
8283       changed = 1;
8284       log = INTVAL (XEXP (x, 1));
8285       x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
8286                         GEN_INT (1 << log));
8287     }
8288
8289   if (GET_CODE (x) == PLUS)
8290     {
8291       /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
8292
8293       if (GET_CODE (XEXP (x, 0)) == ASHIFT
8294           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
8295           && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
8296         {
8297           changed = 1;
8298           log = INTVAL (XEXP (XEXP (x, 0), 1));
8299           XEXP (x, 0) = gen_rtx_MULT (Pmode,
8300                                       force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
8301                                       GEN_INT (1 << log));
8302         }
8303
8304       if (GET_CODE (XEXP (x, 1)) == ASHIFT
8305           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
8306           && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
8307         {
8308           changed = 1;
8309           log = INTVAL (XEXP (XEXP (x, 1), 1));
8310           XEXP (x, 1) = gen_rtx_MULT (Pmode,
8311                                       force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
8312                                       GEN_INT (1 << log));
8313         }
8314
8315       /* Put multiply first if it isn't already.  */
8316       if (GET_CODE (XEXP (x, 1)) == MULT)
8317         {
8318           rtx tmp = XEXP (x, 0);
8319           XEXP (x, 0) = XEXP (x, 1);
8320           XEXP (x, 1) = tmp;
8321           changed = 1;
8322         }
8323
8324       /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
8325          into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
8326          created by virtual register instantiation, register elimination, and
8327          similar optimizations.  */
8328       if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
8329         {
8330           changed = 1;
8331           x = gen_rtx_PLUS (Pmode,
8332                             gen_rtx_PLUS (Pmode, XEXP (x, 0),
8333                                           XEXP (XEXP (x, 1), 0)),
8334                             XEXP (XEXP (x, 1), 1));
8335         }
8336
8337       /* Canonicalize
8338          (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
8339          into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
8340       else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8341                && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8342                && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
8343                && CONSTANT_P (XEXP (x, 1)))
8344         {
8345           rtx constant;
8346           rtx other = NULL_RTX;
8347
8348           if (CONST_INT_P (XEXP (x, 1)))
8349             {
8350               constant = XEXP (x, 1);
8351               other = XEXP (XEXP (XEXP (x, 0), 1), 1);
8352             }
8353           else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
8354             {
8355               constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
8356               other = XEXP (x, 1);
8357             }
8358           else
8359             constant = 0;
8360
8361           if (constant)
8362             {
8363               changed = 1;
8364               x = gen_rtx_PLUS (Pmode,
8365                                 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8366                                               XEXP (XEXP (XEXP (x, 0), 1), 0)),
8367                                 plus_constant (other, INTVAL (constant)));
8368             }
8369         }
8370
8371       if (changed && legitimate_address_p (mode, x, FALSE))
8372         return x;
8373
8374       if (GET_CODE (XEXP (x, 0)) == MULT)
8375         {
8376           changed = 1;
8377           XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8378         }
8379
8380       if (GET_CODE (XEXP (x, 1)) == MULT)
8381         {
8382           changed = 1;
8383           XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8384         }
8385
8386       if (changed
8387           && REG_P (XEXP (x, 1))
8388           && REG_P (XEXP (x, 0)))
8389         return x;
8390
8391       if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8392         {
8393           changed = 1;
8394           x = legitimize_pic_address (x, 0);
8395         }
8396
8397       if (changed && legitimate_address_p (mode, x, FALSE))
8398         return x;
8399
8400       if (REG_P (XEXP (x, 0)))
8401         {
8402           rtx temp = gen_reg_rtx (Pmode);
8403           rtx val  = force_operand (XEXP (x, 1), temp);
8404           if (val != temp)
8405             emit_move_insn (temp, val);
8406
8407           XEXP (x, 1) = temp;
8408           return x;
8409         }
8410
8411       else if (REG_P (XEXP (x, 1)))
8412         {
8413           rtx temp = gen_reg_rtx (Pmode);
8414           rtx val  = force_operand (XEXP (x, 0), temp);
8415           if (val != temp)
8416             emit_move_insn (temp, val);
8417
8418           XEXP (x, 0) = temp;
8419           return x;
8420         }
8421     }
8422
8423   return x;
8424 }
8425 \f
8426 /* Print an integer constant expression in assembler syntax.  Addition
8427    and subtraction are the only arithmetic that may appear in these
8428    expressions.  FILE is the stdio stream to write to, X is the rtx, and
8429    CODE is the operand print code from the output string.  */
8430
8431 static void
8432 output_pic_addr_const (FILE *file, rtx x, int code)
8433 {
8434   char buf[256];
8435
8436   switch (GET_CODE (x))
8437     {
8438     case PC:
8439       gcc_assert (flag_pic);
8440       putc ('.', file);
8441       break;
8442
8443     case SYMBOL_REF:
8444       if (! TARGET_MACHO || TARGET_64BIT)
8445         output_addr_const (file, x);
8446       else
8447         {
8448           const char *name = XSTR (x, 0);
8449
8450           /* Mark the decl as referenced so that cgraph will
8451              output the function.  */
8452           if (SYMBOL_REF_DECL (x))
8453             mark_decl_referenced (SYMBOL_REF_DECL (x));
8454
8455 #if TARGET_MACHO
8456           if (MACHOPIC_INDIRECT
8457               && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
8458             name = machopic_indirection_name (x, /*stub_p=*/true);
8459 #endif
8460           assemble_name (file, name);
8461         }
8462       if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
8463           && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
8464         fputs ("@PLT", file);
8465       break;
8466
8467     case LABEL_REF:
8468       x = XEXP (x, 0);
8469       /* FALLTHRU */
8470     case CODE_LABEL:
8471       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8472       assemble_name (asm_out_file, buf);
8473       break;
8474
8475     case CONST_INT:
8476       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8477       break;
8478
8479     case CONST:
8480       /* This used to output parentheses around the expression,
8481          but that does not work on the 386 (either ATT or BSD assembler).  */
8482       output_pic_addr_const (file, XEXP (x, 0), code);
8483       break;
8484
8485     case CONST_DOUBLE:
8486       if (GET_MODE (x) == VOIDmode)
8487         {
8488           /* We can use %d if the number is <32 bits and positive.  */
8489           if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
8490             fprintf (file, "0x%lx%08lx",
8491                      (unsigned long) CONST_DOUBLE_HIGH (x),
8492                      (unsigned long) CONST_DOUBLE_LOW (x));
8493           else
8494             fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
8495         }
8496       else
8497         /* We can't handle floating point constants;
8498            PRINT_OPERAND must handle them.  */
8499         output_operand_lossage ("floating constant misused");
8500       break;
8501
8502     case PLUS:
8503       /* Some assemblers need integer constants to appear first.  */
8504       if (CONST_INT_P (XEXP (x, 0)))
8505         {
8506           output_pic_addr_const (file, XEXP (x, 0), code);
8507           putc ('+', file);
8508           output_pic_addr_const (file, XEXP (x, 1), code);
8509         }
8510       else
8511         {
8512           gcc_assert (CONST_INT_P (XEXP (x, 1)));
8513           output_pic_addr_const (file, XEXP (x, 1), code);
8514           putc ('+', file);
8515           output_pic_addr_const (file, XEXP (x, 0), code);
8516         }
8517       break;
8518
8519     case MINUS:
8520       if (!TARGET_MACHO)
8521         putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
8522       output_pic_addr_const (file, XEXP (x, 0), code);
8523       putc ('-', file);
8524       output_pic_addr_const (file, XEXP (x, 1), code);
8525       if (!TARGET_MACHO)
8526         putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8527       break;
8528
8529      case UNSPEC:
8530        gcc_assert (XVECLEN (x, 0) == 1);
8531        output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8532        switch (XINT (x, 1))
8533         {
8534         case UNSPEC_GOT:
8535           fputs ("@GOT", file);
8536           break;
8537         case UNSPEC_GOTOFF:
8538           fputs ("@GOTOFF", file);
8539           break;
8540         case UNSPEC_PLTOFF:
8541           fputs ("@PLTOFF", file);
8542           break;
8543         case UNSPEC_GOTPCREL:
8544           fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8545                  "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
8546           break;
8547         case UNSPEC_GOTTPOFF:
8548           /* FIXME: This might be @TPOFF in Sun ld too.  */
8549           fputs ("@GOTTPOFF", file);
8550           break;
8551         case UNSPEC_TPOFF:
8552           fputs ("@TPOFF", file);
8553           break;
8554         case UNSPEC_NTPOFF:
8555           if (TARGET_64BIT)
8556             fputs ("@TPOFF", file);
8557           else
8558             fputs ("@NTPOFF", file);
8559           break;
8560         case UNSPEC_DTPOFF:
8561           fputs ("@DTPOFF", file);
8562           break;
8563         case UNSPEC_GOTNTPOFF:
8564           if (TARGET_64BIT)
8565             fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8566                    "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
8567           else
8568             fputs ("@GOTNTPOFF", file);
8569           break;
8570         case UNSPEC_INDNTPOFF:
8571           fputs ("@INDNTPOFF", file);
8572           break;
8573         default:
8574           output_operand_lossage ("invalid UNSPEC as operand");
8575           break;
8576         }
8577        break;
8578
8579     default:
8580       output_operand_lossage ("invalid expression as operand");
8581     }
8582 }
8583
8584 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8585    We need to emit DTP-relative relocations.  */
8586
8587 static void ATTRIBUTE_UNUSED
8588 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8589 {
8590   fputs (ASM_LONG, file);
8591   output_addr_const (file, x);
8592   fputs ("@DTPOFF", file);
8593   switch (size)
8594     {
8595     case 4:
8596       break;
8597     case 8:
8598       fputs (", 0", file);
8599       break;
8600     default:
8601       gcc_unreachable ();
8602    }
8603 }
8604
8605 /* In the name of slightly smaller debug output, and to cater to
8606    general assembler lossage, recognize PIC+GOTOFF and turn it back
8607    into a direct symbol reference.
8608
8609    On Darwin, this is necessary to avoid a crash, because Darwin
8610    has a different PIC label for each routine but the DWARF debugging
8611    information is not associated with any particular routine, so it's
8612    necessary to remove references to the PIC label from RTL stored by
8613    the DWARF output code.  */
8614
8615 static rtx
8616 ix86_delegitimize_address (rtx orig_x)
8617 {
8618   rtx x = orig_x;
8619   /* reg_addend is NULL or a multiple of some register.  */
8620   rtx reg_addend = NULL_RTX;
8621   /* const_addend is NULL or a const_int.  */
8622   rtx const_addend = NULL_RTX;
8623   /* This is the result, or NULL.  */
8624   rtx result = NULL_RTX;
8625
8626   if (MEM_P (x))
8627     x = XEXP (x, 0);
8628
8629   if (TARGET_64BIT)
8630     {
8631       if (GET_CODE (x) != CONST
8632           || GET_CODE (XEXP (x, 0)) != UNSPEC
8633           || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8634           || !MEM_P (orig_x))
8635         return orig_x;
8636       return XVECEXP (XEXP (x, 0), 0, 0);
8637     }
8638
8639   if (GET_CODE (x) != PLUS
8640       || GET_CODE (XEXP (x, 1)) != CONST)
8641     return orig_x;
8642
8643   if (REG_P (XEXP (x, 0))
8644       && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8645     /* %ebx + GOT/GOTOFF */
8646     ;
8647   else if (GET_CODE (XEXP (x, 0)) == PLUS)
8648     {
8649       /* %ebx + %reg * scale + GOT/GOTOFF */
8650       reg_addend = XEXP (x, 0);
8651       if (REG_P (XEXP (reg_addend, 0))
8652           && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8653         reg_addend = XEXP (reg_addend, 1);
8654       else if (REG_P (XEXP (reg_addend, 1))
8655                && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8656         reg_addend = XEXP (reg_addend, 0);
8657       else
8658         return orig_x;
8659       if (!REG_P (reg_addend)
8660           && GET_CODE (reg_addend) != MULT
8661           && GET_CODE (reg_addend) != ASHIFT)
8662         return orig_x;
8663     }
8664   else
8665     return orig_x;
8666
8667   x = XEXP (XEXP (x, 1), 0);
8668   if (GET_CODE (x) == PLUS
8669       && CONST_INT_P (XEXP (x, 1)))
8670     {
8671       const_addend = XEXP (x, 1);
8672       x = XEXP (x, 0);
8673     }
8674
8675   if (GET_CODE (x) == UNSPEC
8676       && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8677           || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8678     result = XVECEXP (x, 0, 0);
8679
8680   if (TARGET_MACHO && darwin_local_data_pic (x)
8681       && !MEM_P (orig_x))
8682     result = XEXP (x, 0);
8683
8684   if (! result)
8685     return orig_x;
8686
8687   if (const_addend)
8688     result = gen_rtx_PLUS (Pmode, result, const_addend);
8689   if (reg_addend)
8690     result = gen_rtx_PLUS (Pmode, reg_addend, result);
8691   return result;
8692 }
8693
8694 /* If X is a machine specific address (i.e. a symbol or label being
8695    referenced as a displacement from the GOT implemented using an
8696    UNSPEC), then return the base term.  Otherwise return X.  */
8697
8698 rtx
8699 ix86_find_base_term (rtx x)
8700 {
8701   rtx term;
8702
8703   if (TARGET_64BIT)
8704     {
8705       if (GET_CODE (x) != CONST)
8706         return x;
8707       term = XEXP (x, 0);
8708       if (GET_CODE (term) == PLUS
8709           && (CONST_INT_P (XEXP (term, 1))
8710               || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8711         term = XEXP (term, 0);
8712       if (GET_CODE (term) != UNSPEC
8713           || XINT (term, 1) != UNSPEC_GOTPCREL)
8714         return x;
8715
8716       term = XVECEXP (term, 0, 0);
8717
8718       if (GET_CODE (term) != SYMBOL_REF
8719           && GET_CODE (term) != LABEL_REF)
8720         return x;
8721
8722       return term;
8723     }
8724
8725   term = ix86_delegitimize_address (x);
8726
8727   if (GET_CODE (term) != SYMBOL_REF
8728       && GET_CODE (term) != LABEL_REF)
8729     return x;
8730
8731   return term;
8732 }
8733 \f
8734 static void
8735 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8736                     int fp, FILE *file)
8737 {
8738   const char *suffix;
8739
8740   if (mode == CCFPmode || mode == CCFPUmode)
8741     {
8742       enum rtx_code second_code, bypass_code;
8743       ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8744       gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8745       code = ix86_fp_compare_code_to_integer (code);
8746       mode = CCmode;
8747     }
8748   if (reverse)
8749     code = reverse_condition (code);
8750
8751   switch (code)
8752     {
8753     case EQ:
8754       switch (mode)
8755         {
8756         case CCAmode:
8757           suffix = "a";
8758           break;
8759
8760         case CCCmode:
8761           suffix = "c";
8762           break;
8763
8764         case CCOmode:
8765           suffix = "o";
8766           break;
8767
8768         case CCSmode:
8769           suffix = "s";
8770           break;
8771
8772         default:
8773           suffix = "e";
8774         }
8775       break;
8776     case NE:
8777       switch (mode)
8778         {
8779         case CCAmode:
8780           suffix = "na";
8781           break;
8782
8783         case CCCmode:
8784           suffix = "nc";
8785           break;
8786
8787         case CCOmode:
8788           suffix = "no";
8789           break;
8790
8791         case CCSmode:
8792           suffix = "ns";
8793           break;
8794
8795         default:
8796           suffix = "ne";
8797         }
8798       break;
8799     case GT:
8800       gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8801       suffix = "g";
8802       break;
8803     case GTU:
8804       /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8805          Those same assemblers have the same but opposite lossage on cmov.  */
8806       if (mode == CCmode)
8807         suffix = fp ? "nbe" : "a";
8808       else if (mode == CCCmode)
8809         suffix = "b";
8810       else
8811         gcc_unreachable ();
8812       break;
8813     case LT:
8814       switch (mode)
8815         {
8816         case CCNOmode:
8817         case CCGOCmode:
8818           suffix = "s";
8819           break;
8820
8821         case CCmode:
8822         case CCGCmode:
8823           suffix = "l";
8824           break;
8825
8826         default:
8827           gcc_unreachable ();
8828         }
8829       break;
8830     case LTU:
8831       gcc_assert (mode == CCmode || mode == CCCmode);
8832       suffix = "b";
8833       break;
8834     case GE:
8835       switch (mode)
8836         {
8837         case CCNOmode:
8838         case CCGOCmode:
8839           suffix = "ns";
8840           break;
8841
8842         case CCmode:
8843         case CCGCmode:
8844           suffix = "ge";
8845           break;
8846
8847         default:
8848           gcc_unreachable ();
8849         }
8850       break;
8851     case GEU:
8852       /* ??? As above.  */
8853       gcc_assert (mode == CCmode || mode == CCCmode);
8854       suffix = fp ? "nb" : "ae";
8855       break;
8856     case LE:
8857       gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8858       suffix = "le";
8859       break;
8860     case LEU:
8861       /* ??? As above.  */
8862       if (mode == CCmode)
8863         suffix = "be";
8864       else if (mode == CCCmode)
8865         suffix = fp ? "nb" : "ae";
8866       else
8867         gcc_unreachable ();
8868       break;
8869     case UNORDERED:
8870       suffix = fp ? "u" : "p";
8871       break;
8872     case ORDERED:
8873       suffix = fp ? "nu" : "np";
8874       break;
8875     default:
8876       gcc_unreachable ();
8877     }
8878   fputs (suffix, file);
8879 }
8880
8881 /* Print the name of register X to FILE based on its machine mode and number.
8882    If CODE is 'w', pretend the mode is HImode.
8883    If CODE is 'b', pretend the mode is QImode.
8884    If CODE is 'k', pretend the mode is SImode.
8885    If CODE is 'q', pretend the mode is DImode.
8886    If CODE is 'h', pretend the reg is the 'high' byte register.
8887    If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.  */
8888
8889 void
8890 print_reg (rtx x, int code, FILE *file)
8891 {
8892   gcc_assert (x == pc_rtx
8893               || (REGNO (x) != ARG_POINTER_REGNUM
8894                   && REGNO (x) != FRAME_POINTER_REGNUM
8895                   && REGNO (x) != FLAGS_REG
8896                   && REGNO (x) != FPSR_REG
8897                   && REGNO (x) != FPCR_REG));
8898
8899   if (ASSEMBLER_DIALECT == ASM_ATT)
8900     putc ('%', file);
8901
8902   if (x == pc_rtx)
8903     {
8904       gcc_assert (TARGET_64BIT);
8905       fputs ("rip", file);
8906       return;
8907     }
8908
8909   if (code == 'w' || MMX_REG_P (x))
8910     code = 2;
8911   else if (code == 'b')
8912     code = 1;
8913   else if (code == 'k')
8914     code = 4;
8915   else if (code == 'q')
8916     code = 8;
8917   else if (code == 'y')
8918     code = 3;
8919   else if (code == 'h')
8920     code = 0;
8921   else
8922     code = GET_MODE_SIZE (GET_MODE (x));
8923
8924   /* Irritatingly, AMD extended registers use different naming convention
8925      from the normal registers.  */
8926   if (REX_INT_REG_P (x))
8927     {
8928       gcc_assert (TARGET_64BIT);
8929       switch (code)
8930         {
8931           case 0:
8932             error ("extended registers have no high halves");
8933             break;
8934           case 1:
8935             fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8936             break;
8937           case 2:
8938             fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8939             break;
8940           case 4:
8941             fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8942             break;
8943           case 8:
8944             fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8945             break;
8946           default:
8947             error ("unsupported operand size for extended register");
8948             break;
8949         }
8950       return;
8951     }
8952   switch (code)
8953     {
8954     case 3:
8955       if (STACK_TOP_P (x))
8956         {
8957           fputs ("st(0)", file);
8958           break;
8959         }
8960       /* FALLTHRU */
8961     case 8:
8962     case 4:
8963     case 12:
8964       if (! ANY_FP_REG_P (x))
8965         putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8966       /* FALLTHRU */
8967     case 16:
8968     case 2:
8969     normal:
8970       fputs (hi_reg_name[REGNO (x)], file);
8971       break;
8972     case 1:
8973       if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8974         goto normal;
8975       fputs (qi_reg_name[REGNO (x)], file);
8976       break;
8977     case 0:
8978       if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8979         goto normal;
8980       fputs (qi_high_reg_name[REGNO (x)], file);
8981       break;
8982     default:
8983       gcc_unreachable ();
8984     }
8985 }
8986
8987 /* Locate some local-dynamic symbol still in use by this function
8988    so that we can print its name in some tls_local_dynamic_base
8989    pattern.  */
8990
8991 static int
8992 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8993 {
8994   rtx x = *px;
8995
8996   if (GET_CODE (x) == SYMBOL_REF
8997       && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8998     {
8999       cfun->machine->some_ld_name = XSTR (x, 0);
9000       return 1;
9001     }
9002
9003   return 0;
9004 }
9005
9006 static const char *
9007 get_some_local_dynamic_name (void)
9008 {
9009   rtx insn;
9010
9011   if (cfun->machine->some_ld_name)
9012     return cfun->machine->some_ld_name;
9013
9014   for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
9015     if (INSN_P (insn)
9016         && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
9017       return cfun->machine->some_ld_name;
9018
9019   gcc_unreachable ();
9020 }
9021
9022 /* Meaning of CODE:
9023    L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
9024    C -- print opcode suffix for set/cmov insn.
9025    c -- like C, but print reversed condition
9026    F,f -- likewise, but for floating-point.
9027    O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
9028         otherwise nothing
9029    R -- print the prefix for register names.
9030    z -- print the opcode suffix for the size of the current operand.
9031    * -- print a star (in certain assembler syntax)
9032    A -- print an absolute memory reference.
9033    w -- print the operand as if it's a "word" (HImode) even if it isn't.
9034    s -- print a shift double count, followed by the assemblers argument
9035         delimiter.
9036    b -- print the QImode name of the register for the indicated operand.
9037         %b0 would print %al if operands[0] is reg 0.
9038    w --  likewise, print the HImode name of the register.
9039    k --  likewise, print the SImode name of the register.
9040    q --  likewise, print the DImode name of the register.
9041    h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
9042    y -- print "st(0)" instead of "st" as a register.
9043    D -- print condition for SSE cmp instruction.
9044    P -- if PIC, print an @PLT suffix.
9045    X -- don't print any sort of PIC '@' suffix for a symbol.
9046    & -- print some in-use local-dynamic symbol name.
9047    H -- print a memory address offset by 8; used for sse high-parts
9048    Y -- print condition for SSE5 com* instruction.
9049    + -- print a branch hint as 'cs' or 'ds' prefix
9050    ; -- print a semicolon (after prefixes due to bug in older gas).
9051  */
9052
9053 void
9054 print_operand (FILE *file, rtx x, int code)
9055 {
9056   if (code)
9057     {
9058       switch (code)
9059         {
9060         case '*':
9061           if (ASSEMBLER_DIALECT == ASM_ATT)
9062             putc ('*', file);
9063           return;
9064
9065         case '&':
9066           assemble_name (file, get_some_local_dynamic_name ());
9067           return;
9068
9069         case 'A':
9070           switch (ASSEMBLER_DIALECT)
9071             {
9072             case ASM_ATT:
9073               putc ('*', file);
9074               break;
9075
9076             case ASM_INTEL:
9077               /* Intel syntax. For absolute addresses, registers should not
9078                  be surrounded by braces.  */
9079               if (!REG_P (x))
9080                 {
9081                   putc ('[', file);
9082                   PRINT_OPERAND (file, x, 0);
9083                   putc (']', file);
9084                   return;
9085                 }
9086               break;
9087
9088             default:
9089               gcc_unreachable ();
9090             }
9091
9092           PRINT_OPERAND (file, x, 0);
9093           return;
9094
9095
9096         case 'L':
9097           if (ASSEMBLER_DIALECT == ASM_ATT)
9098             putc ('l', file);
9099           return;
9100
9101         case 'W':
9102           if (ASSEMBLER_DIALECT == ASM_ATT)
9103             putc ('w', file);
9104           return;
9105
9106         case 'B':
9107           if (ASSEMBLER_DIALECT == ASM_ATT)
9108             putc ('b', file);
9109           return;
9110
9111         case 'Q':
9112           if (ASSEMBLER_DIALECT == ASM_ATT)
9113             putc ('l', file);
9114           return;
9115
9116         case 'S':
9117           if (ASSEMBLER_DIALECT == ASM_ATT)
9118             putc ('s', file);
9119           return;
9120
9121         case 'T':
9122           if (ASSEMBLER_DIALECT == ASM_ATT)
9123             putc ('t', file);
9124           return;
9125
9126         case 'z':
9127           /* 387 opcodes don't get size suffixes if the operands are
9128              registers.  */
9129           if (STACK_REG_P (x))
9130             return;
9131
9132           /* Likewise if using Intel opcodes.  */
9133           if (ASSEMBLER_DIALECT == ASM_INTEL)
9134             return;
9135
9136           /* This is the size of op from size of operand.  */
9137           switch (GET_MODE_SIZE (GET_MODE (x)))
9138             {
9139             case 1:
9140               putc ('b', file);
9141               return;
9142
9143             case 2:
9144               if (MEM_P (x))
9145                 {
9146 #ifdef HAVE_GAS_FILDS_FISTS
9147                   putc ('s', file);
9148 #endif
9149                   return;
9150                 }
9151               else
9152                 putc ('w', file);
9153               return;
9154
9155             case 4:
9156               if (GET_MODE (x) == SFmode)
9157                 {
9158                   putc ('s', file);
9159                   return;
9160                 }
9161               else
9162                 putc ('l', file);
9163               return;
9164
9165             case 12:
9166             case 16:
9167               putc ('t', file);
9168               return;
9169
9170             case 8:
9171               if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
9172                 {
9173 #ifdef GAS_MNEMONICS
9174                   putc ('q', file);
9175 #else
9176                   putc ('l', file);
9177                   putc ('l', file);
9178 #endif
9179                 }
9180               else
9181                 putc ('l', file);
9182               return;
9183
9184             default:
9185               gcc_unreachable ();
9186             }
9187
9188         case 'b':
9189         case 'w':
9190         case 'k':
9191         case 'q':
9192         case 'h':
9193         case 'y':
9194         case 'X':
9195         case 'P':
9196           break;
9197
9198         case 's':
9199           if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
9200             {
9201               PRINT_OPERAND (file, x, 0);
9202               putc (',', file);
9203             }
9204           return;
9205
9206         case 'D':
9207           /* Little bit of braindamage here.  The SSE compare instructions
9208              does use completely different names for the comparisons that the
9209              fp conditional moves.  */
9210           switch (GET_CODE (x))
9211             {
9212             case EQ:
9213             case UNEQ:
9214               fputs ("eq", file);
9215               break;
9216             case LT:
9217             case UNLT:
9218               fputs ("lt", file);
9219               break;
9220             case LE:
9221             case UNLE:
9222               fputs ("le", file);
9223               break;
9224             case UNORDERED:
9225               fputs ("unord", file);
9226               break;
9227             case NE:
9228             case LTGT:
9229               fputs ("neq", file);
9230               break;
9231             case UNGE:
9232             case GE:
9233               fputs ("nlt", file);
9234               break;
9235             case UNGT:
9236             case GT:
9237               fputs ("nle", file);
9238               break;
9239             case ORDERED:
9240               fputs ("ord", file);
9241               break;
9242             default:
9243               gcc_unreachable ();
9244             }
9245           return;
9246         case 'O':
9247 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9248           if (ASSEMBLER_DIALECT == ASM_ATT)
9249             {
9250               switch (GET_MODE (x))
9251                 {
9252                 case HImode: putc ('w', file); break;
9253                 case SImode:
9254                 case SFmode: putc ('l', file); break;
9255                 case DImode:
9256                 case DFmode: putc ('q', file); break;
9257                 default: gcc_unreachable ();
9258                 }
9259               putc ('.', file);
9260             }
9261 #endif
9262           return;
9263         case 'C':
9264           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
9265           return;
9266         case 'F':
9267 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9268           if (ASSEMBLER_DIALECT == ASM_ATT)
9269             putc ('.', file);
9270 #endif
9271           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
9272           return;
9273
9274           /* Like above, but reverse condition */
9275         case 'c':
9276           /* Check to see if argument to %c is really a constant
9277              and not a condition code which needs to be reversed.  */
9278           if (!COMPARISON_P (x))
9279           {
9280             output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
9281              return;
9282           }
9283           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
9284           return;
9285         case 'f':
9286 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9287           if (ASSEMBLER_DIALECT == ASM_ATT)
9288             putc ('.', file);
9289 #endif
9290           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
9291           return;
9292
9293         case 'H':
9294           /* It doesn't actually matter what mode we use here, as we're
9295              only going to use this for printing.  */
9296           x = adjust_address_nv (x, DImode, 8);
9297           break;
9298
9299         case '+':
9300           {
9301             rtx x;
9302
9303             if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
9304               return;
9305
9306             x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
9307             if (x)
9308               {
9309                 int pred_val = INTVAL (XEXP (x, 0));
9310
9311                 if (pred_val < REG_BR_PROB_BASE * 45 / 100
9312                     || pred_val > REG_BR_PROB_BASE * 55 / 100)
9313                   {
9314                     int taken = pred_val > REG_BR_PROB_BASE / 2;
9315                     int cputaken = final_forward_branch_p (current_output_insn) == 0;
9316
9317                     /* Emit hints only in the case default branch prediction
9318                        heuristics would fail.  */
9319                     if (taken != cputaken)
9320                       {
9321                         /* We use 3e (DS) prefix for taken branches and
9322                            2e (CS) prefix for not taken branches.  */
9323                         if (taken)
9324                           fputs ("ds ; ", file);
9325                         else
9326                           fputs ("cs ; ", file);
9327                       }
9328                   }
9329               }
9330             return;
9331           }
9332
9333         case 'Y':
9334           switch (GET_CODE (x))
9335             {
9336             case NE:
9337               fputs ("neq", file);
9338               break;
9339             case EQ:
9340               fputs ("eq", file);
9341               break;
9342             case GE:
9343             case GEU:
9344               fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
9345               break;
9346             case GT:
9347             case GTU:
9348               fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
9349               break;
9350             case LE:
9351             case LEU:
9352               fputs ("le", file);
9353               break;
9354             case LT:
9355             case LTU:
9356               fputs ("lt", file);
9357               break;
9358             case UNORDERED:
9359               fputs ("unord", file);
9360               break;
9361             case ORDERED:
9362               fputs ("ord", file);
9363               break;
9364             case UNEQ:
9365               fputs ("ueq", file);
9366               break;
9367             case UNGE:
9368               fputs ("nlt", file);
9369               break;
9370             case UNGT:
9371               fputs ("nle", file);
9372               break;
9373             case UNLE:
9374               fputs ("ule", file);
9375               break;
9376             case UNLT:
9377               fputs ("ult", file);
9378               break;
9379             case LTGT:
9380               fputs ("une", file);
9381               break;
9382             default:
9383               gcc_unreachable ();
9384             }
9385           return;
9386
9387         case ';':
9388 #if TARGET_MACHO
9389           fputs (" ; ", file);
9390 #else
9391           fputc (' ', file);
9392 #endif
9393           return;
9394
9395         default:
9396             output_operand_lossage ("invalid operand code '%c'", code);
9397         }
9398     }
9399
9400   if (REG_P (x))
9401     print_reg (x, code, file);
9402
9403   else if (MEM_P (x))
9404     {
9405       /* No `byte ptr' prefix for call instructions or BLKmode operands.  */
9406       if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
9407           && GET_MODE (x) != BLKmode)
9408         {
9409           const char * size;
9410           switch (GET_MODE_SIZE (GET_MODE (x)))
9411             {
9412             case 1: size = "BYTE"; break;
9413             case 2: size = "WORD"; break;
9414             case 4: size = "DWORD"; break;
9415             case 8: size = "QWORD"; break;
9416             case 12: size = "XWORD"; break;
9417             case 16:
9418               if (GET_MODE (x) == XFmode)
9419                 size = "XWORD";
9420               else
9421                 size = "XMMWORD";
9422               break;
9423             default:
9424               gcc_unreachable ();
9425             }
9426
9427           /* Check for explicit size override (codes 'b', 'w' and 'k')  */
9428           if (code == 'b')
9429             size = "BYTE";
9430           else if (code == 'w')
9431             size = "WORD";
9432           else if (code == 'k')
9433             size = "DWORD";
9434
9435           fputs (size, file);
9436           fputs (" PTR ", file);
9437         }
9438
9439       x = XEXP (x, 0);
9440       /* Avoid (%rip) for call operands.  */
9441       if (CONSTANT_ADDRESS_P (x) && code == 'P'
9442           && !CONST_INT_P (x))
9443         output_addr_const (file, x);
9444       else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9445         output_operand_lossage ("invalid constraints for operand");
9446       else
9447         output_address (x);
9448     }
9449
9450   else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9451     {
9452       REAL_VALUE_TYPE r;
9453       long l;
9454
9455       REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9456       REAL_VALUE_TO_TARGET_SINGLE (r, l);
9457
9458       if (ASSEMBLER_DIALECT == ASM_ATT)
9459         putc ('$', file);
9460       fprintf (file, "0x%08lx", (long unsigned int) l);
9461     }
9462
9463   /* These float cases don't actually occur as immediate operands.  */
9464   else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
9465     {
9466       char dstr[30];
9467
9468       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9469       fprintf (file, "%s", dstr);
9470     }
9471
9472   else if (GET_CODE (x) == CONST_DOUBLE
9473            && GET_MODE (x) == XFmode)
9474     {
9475       char dstr[30];
9476
9477       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9478       fprintf (file, "%s", dstr);
9479     }
9480
9481   else
9482     {
9483       /* We have patterns that allow zero sets of memory, for instance.
9484          In 64-bit mode, we should probably support all 8-byte vectors,
9485          since we can in fact encode that into an immediate.  */
9486       if (GET_CODE (x) == CONST_VECTOR)
9487         {
9488           gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9489           x = const0_rtx;
9490         }
9491
9492       if (code != 'P')
9493         {
9494           if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
9495             {
9496               if (ASSEMBLER_DIALECT == ASM_ATT)
9497                 putc ('$', file);
9498             }
9499           else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9500                    || GET_CODE (x) == LABEL_REF)
9501             {
9502               if (ASSEMBLER_DIALECT == ASM_ATT)
9503                 putc ('$', file);
9504               else
9505                 fputs ("OFFSET FLAT:", file);
9506             }
9507         }
9508       if (CONST_INT_P (x))
9509         fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9510       else if (flag_pic)
9511         output_pic_addr_const (file, x, code);
9512       else
9513         output_addr_const (file, x);
9514     }
9515 }
9516 \f
9517 /* Print a memory operand whose address is ADDR.  */
9518
9519 void
9520 print_operand_address (FILE *file, rtx addr)
9521 {
9522   struct ix86_address parts;
9523   rtx base, index, disp;
9524   int scale;
9525   int ok = ix86_decompose_address (addr, &parts);
9526
9527   gcc_assert (ok);
9528
9529   base = parts.base;
9530   index = parts.index;
9531   disp = parts.disp;
9532   scale = parts.scale;
9533
9534   switch (parts.seg)
9535     {
9536     case SEG_DEFAULT:
9537       break;
9538     case SEG_FS:
9539     case SEG_GS:
9540       if (ASSEMBLER_DIALECT == ASM_ATT)
9541         putc ('%', file);
9542       fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9543       break;
9544     default:
9545       gcc_unreachable ();
9546     }
9547
9548   /* Use one byte shorter RIP relative addressing for 64bit mode.  */
9549   if (TARGET_64BIT && !base && !index)
9550     {
9551       rtx symbol = disp;
9552
9553       if (GET_CODE (disp) == CONST
9554           && GET_CODE (XEXP (disp, 0)) == PLUS
9555           && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9556         symbol = XEXP (XEXP (disp, 0), 0);
9557
9558       if (GET_CODE (symbol) == LABEL_REF
9559           || (GET_CODE (symbol) == SYMBOL_REF
9560               && SYMBOL_REF_TLS_MODEL (symbol) == 0))
9561         base = pc_rtx;
9562     }
9563   if (!base && !index)
9564     {
9565       /* Displacement only requires special attention.  */
9566
9567       if (CONST_INT_P (disp))
9568         {
9569           if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9570             fputs ("ds:", file);
9571           fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
9572         }
9573       else if (flag_pic)
9574         output_pic_addr_const (file, disp, 0);
9575       else
9576         output_addr_const (file, disp);
9577     }
9578   else
9579     {
9580       if (ASSEMBLER_DIALECT == ASM_ATT)
9581         {
9582           if (disp)
9583             {
9584               if (flag_pic)
9585                 output_pic_addr_const (file, disp, 0);
9586               else if (GET_CODE (disp) == LABEL_REF)
9587                 output_asm_label (disp);
9588               else
9589                 output_addr_const (file, disp);
9590             }
9591
9592           putc ('(', file);
9593           if (base)
9594             print_reg (base, 0, file);
9595           if (index)
9596             {
9597               putc (',', file);
9598               print_reg (index, 0, file);
9599               if (scale != 1)
9600                 fprintf (file, ",%d", scale);
9601             }
9602           putc (')', file);
9603         }
9604       else
9605         {
9606           rtx offset = NULL_RTX;
9607
9608           if (disp)
9609             {
9610               /* Pull out the offset of a symbol; print any symbol itself.  */
9611               if (GET_CODE (disp) == CONST
9612                   && GET_CODE (XEXP (disp, 0)) == PLUS
9613                   && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9614                 {
9615                   offset = XEXP (XEXP (disp, 0), 1);
9616                   disp = gen_rtx_CONST (VOIDmode,
9617                                         XEXP (XEXP (disp, 0), 0));
9618                 }
9619
9620               if (flag_pic)
9621                 output_pic_addr_const (file, disp, 0);
9622               else if (GET_CODE (disp) == LABEL_REF)
9623                 output_asm_label (disp);
9624               else if (CONST_INT_P (disp))
9625                 offset = disp;
9626               else
9627                 output_addr_const (file, disp);
9628             }
9629
9630           putc ('[', file);
9631           if (base)
9632             {
9633               print_reg (base, 0, file);
9634               if (offset)
9635                 {
9636                   if (INTVAL (offset) >= 0)
9637                     putc ('+', file);
9638                   fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9639                 }
9640             }
9641           else if (offset)
9642             fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9643           else
9644             putc ('0', file);
9645
9646           if (index)
9647             {
9648               putc ('+', file);
9649               print_reg (index, 0, file);
9650               if (scale != 1)
9651                 fprintf (file, "*%d", scale);
9652             }
9653           putc (']', file);
9654         }
9655     }
9656 }
9657
9658 bool
9659 output_addr_const_extra (FILE *file, rtx x)
9660 {
9661   rtx op;
9662
9663   if (GET_CODE (x) != UNSPEC)
9664     return false;
9665
9666   op = XVECEXP (x, 0, 0);
9667   switch (XINT (x, 1))
9668     {
9669     case UNSPEC_GOTTPOFF:
9670       output_addr_const (file, op);
9671       /* FIXME: This might be @TPOFF in Sun ld.  */
9672       fputs ("@GOTTPOFF", file);
9673       break;
9674     case UNSPEC_TPOFF:
9675       output_addr_const (file, op);
9676       fputs ("@TPOFF", file);
9677       break;
9678     case UNSPEC_NTPOFF:
9679       output_addr_const (file, op);
9680       if (TARGET_64BIT)
9681         fputs ("@TPOFF", file);
9682       else
9683         fputs ("@NTPOFF", file);
9684       break;
9685     case UNSPEC_DTPOFF:
9686       output_addr_const (file, op);
9687       fputs ("@DTPOFF", file);
9688       break;
9689     case UNSPEC_GOTNTPOFF:
9690       output_addr_const (file, op);
9691       if (TARGET_64BIT)
9692         fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9693                "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
9694       else
9695         fputs ("@GOTNTPOFF", file);
9696       break;
9697     case UNSPEC_INDNTPOFF:
9698       output_addr_const (file, op);
9699       fputs ("@INDNTPOFF", file);
9700       break;
9701
9702     default:
9703       return false;
9704     }
9705
9706   return true;
9707 }
9708 \f
9709 /* Split one or more DImode RTL references into pairs of SImode
9710    references.  The RTL can be REG, offsettable MEM, integer constant, or
9711    CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
9712    split and "num" is its length.  lo_half and hi_half are output arrays
9713    that parallel "operands".  */
9714
9715 void
9716 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9717 {
9718   while (num--)
9719     {
9720       rtx op = operands[num];
9721
9722       /* simplify_subreg refuse to split volatile memory addresses,
9723          but we still have to handle it.  */
9724       if (MEM_P (op))
9725         {
9726           lo_half[num] = adjust_address (op, SImode, 0);
9727           hi_half[num] = adjust_address (op, SImode, 4);
9728         }
9729       else
9730         {
9731           lo_half[num] = simplify_gen_subreg (SImode, op,
9732                                               GET_MODE (op) == VOIDmode
9733                                               ? DImode : GET_MODE (op), 0);
9734           hi_half[num] = simplify_gen_subreg (SImode, op,
9735                                               GET_MODE (op) == VOIDmode
9736                                               ? DImode : GET_MODE (op), 4);
9737         }
9738     }
9739 }
9740 /* Split one or more TImode RTL references into pairs of DImode
9741    references.  The RTL can be REG, offsettable MEM, integer constant, or
9742    CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
9743    split and "num" is its length.  lo_half and hi_half are output arrays
9744    that parallel "operands".  */
9745
9746 void
9747 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9748 {
9749   while (num--)
9750     {
9751       rtx op = operands[num];
9752
9753       /* simplify_subreg refuse to split volatile memory addresses, but we
9754          still have to handle it.  */
9755       if (MEM_P (op))
9756         {
9757           lo_half[num] = adjust_address (op, DImode, 0);
9758           hi_half[num] = adjust_address (op, DImode, 8);
9759         }
9760       else
9761         {
9762           lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9763           hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9764         }
9765     }
9766 }
9767 \f
9768 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9769    MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
9770    is the expression of the binary operation.  The output may either be
9771    emitted here, or returned to the caller, like all output_* functions.
9772
9773    There is no guarantee that the operands are the same mode, as they
9774    might be within FLOAT or FLOAT_EXTEND expressions.  */
9775
9776 #ifndef SYSV386_COMPAT
9777 /* Set to 1 for compatibility with brain-damaged assemblers.  No-one
9778    wants to fix the assemblers because that causes incompatibility
9779    with gcc.  No-one wants to fix gcc because that causes
9780    incompatibility with assemblers...  You can use the option of
9781    -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
9782 #define SYSV386_COMPAT 1
9783 #endif
9784
9785 const char *
9786 output_387_binary_op (rtx insn, rtx *operands)
9787 {
9788   static char buf[30];
9789   const char *p;
9790   const char *ssep;
9791   int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9792
9793 #ifdef ENABLE_CHECKING
9794   /* Even if we do not want to check the inputs, this documents input
9795      constraints.  Which helps in understanding the following code.  */
9796   if (STACK_REG_P (operands[0])
9797       && ((REG_P (operands[1])
9798            && REGNO (operands[0]) == REGNO (operands[1])
9799            && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9800           || (REG_P (operands[2])
9801               && REGNO (operands[0]) == REGNO (operands[2])
9802               && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9803       && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9804     ; /* ok */
9805   else
9806     gcc_assert (is_sse);
9807 #endif
9808
9809   switch (GET_CODE (operands[3]))
9810     {
9811     case PLUS:
9812       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9813           || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9814         p = "fiadd";
9815       else
9816         p = "fadd";
9817       ssep = "add";
9818       break;
9819
9820     case MINUS:
9821       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9822           || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9823         p = "fisub";
9824       else
9825         p = "fsub";
9826       ssep = "sub";
9827       break;
9828
9829     case MULT:
9830       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9831           || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9832         p = "fimul";
9833       else
9834         p = "fmul";
9835       ssep = "mul";
9836       break;
9837
9838     case DIV:
9839       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9840           || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9841         p = "fidiv";
9842       else
9843         p = "fdiv";
9844       ssep = "div";
9845       break;
9846
9847     default:
9848       gcc_unreachable ();
9849     }
9850
9851   if (is_sse)
9852    {
9853       strcpy (buf, ssep);
9854       if (GET_MODE (operands[0]) == SFmode)
9855         strcat (buf, "ss\t{%2, %0|%0, %2}");
9856       else
9857         strcat (buf, "sd\t{%2, %0|%0, %2}");
9858       return buf;
9859    }
9860   strcpy (buf, p);
9861
9862   switch (GET_CODE (operands[3]))
9863     {
9864     case MULT:
9865     case PLUS:
9866       if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9867         {
9868           rtx temp = operands[2];
9869           operands[2] = operands[1];
9870           operands[1] = temp;
9871         }
9872
9873       /* know operands[0] == operands[1].  */
9874
9875       if (MEM_P (operands[2]))
9876         {
9877           p = "%z2\t%2";
9878           break;
9879         }
9880
9881       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9882         {
9883           if (STACK_TOP_P (operands[0]))
9884             /* How is it that we are storing to a dead operand[2]?
9885                Well, presumably operands[1] is dead too.  We can't
9886                store the result to st(0) as st(0) gets popped on this
9887                instruction.  Instead store to operands[2] (which I
9888                think has to be st(1)).  st(1) will be popped later.
9889                gcc <= 2.8.1 didn't have this check and generated
9890                assembly code that the Unixware assembler rejected.  */
9891             p = "p\t{%0, %2|%2, %0}";   /* st(1) = st(0) op st(1); pop */
9892           else
9893             p = "p\t{%2, %0|%0, %2}";   /* st(r1) = st(r1) op st(0); pop */
9894           break;
9895         }
9896
9897       if (STACK_TOP_P (operands[0]))
9898         p = "\t{%y2, %0|%0, %y2}";      /* st(0) = st(0) op st(r2) */
9899       else
9900         p = "\t{%2, %0|%0, %2}";        /* st(r1) = st(r1) op st(0) */
9901       break;
9902
9903     case MINUS:
9904     case DIV:
9905       if (MEM_P (operands[1]))
9906         {
9907           p = "r%z1\t%1";
9908           break;
9909         }
9910
9911       if (MEM_P (operands[2]))
9912         {
9913           p = "%z2\t%2";
9914           break;
9915         }
9916
9917       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9918         {
9919 #if SYSV386_COMPAT
9920           /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9921              derived assemblers, confusingly reverse the direction of
9922              the operation for fsub{r} and fdiv{r} when the
9923              destination register is not st(0).  The Intel assembler
9924              doesn't have this brain damage.  Read !SYSV386_COMPAT to
9925              figure out what the hardware really does.  */
9926           if (STACK_TOP_P (operands[0]))
9927             p = "{p\t%0, %2|rp\t%2, %0}";
9928           else
9929             p = "{rp\t%2, %0|p\t%0, %2}";
9930 #else
9931           if (STACK_TOP_P (operands[0]))
9932             /* As above for fmul/fadd, we can't store to st(0).  */
9933             p = "rp\t{%0, %2|%2, %0}";  /* st(1) = st(0) op st(1); pop */
9934           else
9935             p = "p\t{%2, %0|%0, %2}";   /* st(r1) = st(r1) op st(0); pop */
9936 #endif
9937           break;
9938         }
9939
9940       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9941         {
9942 #if SYSV386_COMPAT
9943           if (STACK_TOP_P (operands[0]))
9944             p = "{rp\t%0, %1|p\t%1, %0}";
9945           else
9946             p = "{p\t%1, %0|rp\t%0, %1}";
9947 #else
9948           if (STACK_TOP_P (operands[0]))
9949             p = "p\t{%0, %1|%1, %0}";   /* st(1) = st(1) op st(0); pop */
9950           else
9951             p = "rp\t{%1, %0|%0, %1}";  /* st(r2) = st(0) op st(r2); pop */
9952 #endif
9953           break;
9954         }
9955
9956       if (STACK_TOP_P (operands[0]))
9957         {
9958           if (STACK_TOP_P (operands[1]))
9959             p = "\t{%y2, %0|%0, %y2}";  /* st(0) = st(0) op st(r2) */
9960           else
9961             p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9962           break;
9963         }
9964       else if (STACK_TOP_P (operands[1]))
9965         {
9966 #if SYSV386_COMPAT
9967           p = "{\t%1, %0|r\t%0, %1}";
9968 #else
9969           p = "r\t{%1, %0|%0, %1}";     /* st(r2) = st(0) op st(r2) */
9970 #endif
9971         }
9972       else
9973         {
9974 #if SYSV386_COMPAT
9975           p = "{r\t%2, %0|\t%0, %2}";
9976 #else
9977           p = "\t{%2, %0|%0, %2}";      /* st(r1) = st(r1) op st(0) */
9978 #endif
9979         }
9980       break;
9981
9982     default:
9983       gcc_unreachable ();
9984     }
9985
9986   strcat (buf, p);
9987   return buf;
9988 }
9989
9990 /* Return needed mode for entity in optimize_mode_switching pass.  */
9991
9992 int
9993 ix86_mode_needed (int entity, rtx insn)
9994 {
9995   enum attr_i387_cw mode;
9996
9997   /* The mode UNINITIALIZED is used to store control word after a
9998      function call or ASM pattern.  The mode ANY specify that function
9999      has no requirements on the control word and make no changes in the
10000      bits we are interested in.  */
10001
10002   if (CALL_P (insn)
10003       || (NONJUMP_INSN_P (insn)
10004           && (asm_noperands (PATTERN (insn)) >= 0
10005               || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
10006     return I387_CW_UNINITIALIZED;
10007
10008   if (recog_memoized (insn) < 0)
10009     return I387_CW_ANY;
10010
10011   mode = get_attr_i387_cw (insn);
10012
10013   switch (entity)
10014     {
10015     case I387_TRUNC:
10016       if (mode == I387_CW_TRUNC)
10017         return mode;
10018       break;
10019
10020     case I387_FLOOR:
10021       if (mode == I387_CW_FLOOR)
10022         return mode;
10023       break;
10024
10025     case I387_CEIL:
10026       if (mode == I387_CW_CEIL)
10027         return mode;
10028       break;
10029
10030     case I387_MASK_PM:
10031       if (mode == I387_CW_MASK_PM)
10032         return mode;
10033       break;
10034
10035     default:
10036       gcc_unreachable ();
10037     }
10038
10039   return I387_CW_ANY;
10040 }
10041
10042 /* Output code to initialize control word copies used by trunc?f?i and
10043    rounding patterns.  CURRENT_MODE is set to current control word,
10044    while NEW_MODE is set to new control word.  */
10045
10046 void
10047 emit_i387_cw_initialization (int mode)
10048 {
10049   rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
10050   rtx new_mode;
10051
10052   enum ix86_stack_slot slot;
10053
10054   rtx reg = gen_reg_rtx (HImode);
10055
10056   emit_insn (gen_x86_fnstcw_1 (stored_mode));
10057   emit_move_insn (reg, copy_rtx (stored_mode));
10058
10059   if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
10060     {
10061       switch (mode)
10062         {
10063         case I387_CW_TRUNC:
10064           /* round toward zero (truncate) */
10065           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
10066           slot = SLOT_CW_TRUNC;
10067           break;
10068
10069         case I387_CW_FLOOR:
10070           /* round down toward -oo */
10071           emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
10072           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
10073           slot = SLOT_CW_FLOOR;
10074           break;
10075
10076         case I387_CW_CEIL:
10077           /* round up toward +oo */
10078           emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
10079           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
10080           slot = SLOT_CW_CEIL;
10081           break;
10082
10083         case I387_CW_MASK_PM:
10084           /* mask precision exception for nearbyint() */
10085           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
10086           slot = SLOT_CW_MASK_PM;
10087           break;
10088
10089         default:
10090           gcc_unreachable ();
10091         }
10092     }
10093   else
10094     {
10095       switch (mode)
10096         {
10097         case I387_CW_TRUNC:
10098           /* round toward zero (truncate) */
10099           emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
10100           slot = SLOT_CW_TRUNC;
10101           break;
10102
10103         case I387_CW_FLOOR:
10104           /* round down toward -oo */
10105           emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
10106           slot = SLOT_CW_FLOOR;
10107           break;
10108
10109         case I387_CW_CEIL:
10110           /* round up toward +oo */
10111           emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
10112           slot = SLOT_CW_CEIL;
10113           break;
10114
10115         case I387_CW_MASK_PM:
10116           /* mask precision exception for nearbyint() */
10117           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
10118           slot = SLOT_CW_MASK_PM;
10119           break;
10120
10121         default:
10122           gcc_unreachable ();
10123         }
10124     }
10125
10126   gcc_assert (slot < MAX_386_STACK_LOCALS);
10127
10128   new_mode = assign_386_stack_local (HImode, slot);
10129   emit_move_insn (new_mode, reg);
10130 }
10131
10132 /* Output code for INSN to convert a float to a signed int.  OPERANDS
10133    are the insn operands.  The output may be [HSD]Imode and the input
10134    operand may be [SDX]Fmode.  */
10135
10136 const char *
10137 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
10138 {
10139   int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
10140   int dimode_p = GET_MODE (operands[0]) == DImode;
10141   int round_mode = get_attr_i387_cw (insn);
10142
10143   /* Jump through a hoop or two for DImode, since the hardware has no
10144      non-popping instruction.  We used to do this a different way, but
10145      that was somewhat fragile and broke with post-reload splitters.  */
10146   if ((dimode_p || fisttp) && !stack_top_dies)
10147     output_asm_insn ("fld\t%y1", operands);
10148
10149   gcc_assert (STACK_TOP_P (operands[1]));
10150   gcc_assert (MEM_P (operands[0]));
10151   gcc_assert (GET_MODE (operands[1]) != TFmode);
10152
10153   if (fisttp)
10154       output_asm_insn ("fisttp%z0\t%0", operands);
10155   else
10156     {
10157       if (round_mode != I387_CW_ANY)
10158         output_asm_insn ("fldcw\t%3", operands);
10159       if (stack_top_dies || dimode_p)
10160         output_asm_insn ("fistp%z0\t%0", operands);
10161       else
10162         output_asm_insn ("fist%z0\t%0", operands);
10163       if (round_mode != I387_CW_ANY)
10164         output_asm_insn ("fldcw\t%2", operands);
10165     }
10166
10167   return "";
10168 }
10169
10170 /* Output code for x87 ffreep insn.  The OPNO argument, which may only
10171    have the values zero or one, indicates the ffreep insn's operand
10172    from the OPERANDS array.  */
10173
10174 static const char *
10175 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
10176 {
10177   if (TARGET_USE_FFREEP)
10178 #if HAVE_AS_IX86_FFREEP
10179     return opno ? "ffreep\t%y1" : "ffreep\t%y0";
10180 #else
10181     {
10182       static char retval[] = ".word\t0xc_df";
10183       int regno = REGNO (operands[opno]);
10184
10185       gcc_assert (FP_REGNO_P (regno));
10186
10187       retval[9] = '0' + (regno - FIRST_STACK_REG);
10188       return retval;
10189     }
10190 #endif
10191
10192   return opno ? "fstp\t%y1" : "fstp\t%y0";
10193 }
10194
10195
10196 /* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
10197    should be used.  UNORDERED_P is true when fucom should be used.  */
10198
10199 const char *
10200 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
10201 {
10202   int stack_top_dies;
10203   rtx cmp_op0, cmp_op1;
10204   int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
10205
10206   if (eflags_p)
10207     {
10208       cmp_op0 = operands[0];
10209       cmp_op1 = operands[1];
10210     }
10211   else
10212     {
10213       cmp_op0 = operands[1];
10214       cmp_op1 = operands[2];
10215     }
10216
10217   if (is_sse)
10218     {
10219       if (GET_MODE (operands[0]) == SFmode)
10220         if (unordered_p)
10221           return "ucomiss\t{%1, %0|%0, %1}";
10222         else
10223           return "comiss\t{%1, %0|%0, %1}";
10224       else
10225         if (unordered_p)
10226           return "ucomisd\t{%1, %0|%0, %1}";
10227         else
10228           return "comisd\t{%1, %0|%0, %1}";
10229     }
10230
10231   gcc_assert (STACK_TOP_P (cmp_op0));
10232
10233   stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
10234
10235   if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
10236     {
10237       if (stack_top_dies)
10238         {
10239           output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
10240           return output_387_ffreep (operands, 1);
10241         }
10242       else
10243         return "ftst\n\tfnstsw\t%0";
10244     }
10245
10246   if (STACK_REG_P (cmp_op1)
10247       && stack_top_dies
10248       && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
10249       && REGNO (cmp_op1) != FIRST_STACK_REG)
10250     {
10251       /* If both the top of the 387 stack dies, and the other operand
10252          is also a stack register that dies, then this must be a
10253          `fcompp' float compare */
10254
10255       if (eflags_p)
10256         {
10257           /* There is no double popping fcomi variant.  Fortunately,
10258              eflags is immune from the fstp's cc clobbering.  */
10259           if (unordered_p)
10260             output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
10261           else
10262             output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
10263           return output_387_ffreep (operands, 0);
10264         }
10265       else
10266         {
10267           if (unordered_p)
10268             return "fucompp\n\tfnstsw\t%0";
10269           else
10270             return "fcompp\n\tfnstsw\t%0";
10271         }
10272     }
10273   else
10274     {
10275       /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
10276
10277       static const char * const alt[16] =
10278       {
10279         "fcom%z2\t%y2\n\tfnstsw\t%0",
10280         "fcomp%z2\t%y2\n\tfnstsw\t%0",
10281         "fucom%z2\t%y2\n\tfnstsw\t%0",
10282         "fucomp%z2\t%y2\n\tfnstsw\t%0",
10283
10284         "ficom%z2\t%y2\n\tfnstsw\t%0",
10285         "ficomp%z2\t%y2\n\tfnstsw\t%0",
10286         NULL,
10287         NULL,
10288
10289         "fcomi\t{%y1, %0|%0, %y1}",
10290         "fcomip\t{%y1, %0|%0, %y1}",
10291         "fucomi\t{%y1, %0|%0, %y1}",
10292         "fucomip\t{%y1, %0|%0, %y1}",
10293
10294         NULL,
10295         NULL,
10296         NULL,
10297         NULL
10298       };
10299
10300       int mask;
10301       const char *ret;
10302
10303       mask  = eflags_p << 3;
10304       mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
10305       mask |= unordered_p << 1;
10306       mask |= stack_top_dies;
10307
10308       gcc_assert (mask < 16);
10309       ret = alt[mask];
10310       gcc_assert (ret);
10311
10312       return ret;
10313     }
10314 }
10315
10316 void
10317 ix86_output_addr_vec_elt (FILE *file, int value)
10318 {
10319   const char *directive = ASM_LONG;
10320
10321 #ifdef ASM_QUAD
10322   if (TARGET_64BIT)
10323     directive = ASM_QUAD;
10324 #else
10325   gcc_assert (!TARGET_64BIT);
10326 #endif
10327
10328   fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
10329 }
10330
10331 void
10332 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
10333 {
10334   const char *directive = ASM_LONG;
10335
10336 #ifdef ASM_QUAD
10337   if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
10338     directive = ASM_QUAD;
10339 #else
10340   gcc_assert (!TARGET_64BIT);
10341 #endif
10342   /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand.  */
10343   if (TARGET_64BIT || TARGET_VXWORKS_RTP)
10344     fprintf (file, "%s%s%d-%s%d\n",
10345              directive, LPREFIX, value, LPREFIX, rel);
10346   else if (HAVE_AS_GOTOFF_IN_DATA)
10347     fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
10348 #if TARGET_MACHO
10349   else if (TARGET_MACHO)
10350     {
10351       fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
10352       machopic_output_function_base_name (file);
10353       fprintf(file, "\n");
10354     }
10355 #endif
10356   else
10357     asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
10358                  ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
10359 }
10360 \f
10361 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
10362    for the target.  */
10363
10364 void
10365 ix86_expand_clear (rtx dest)
10366 {
10367   rtx tmp;
10368
10369   /* We play register width games, which are only valid after reload.  */
10370   gcc_assert (reload_completed);
10371
10372   /* Avoid HImode and its attendant prefix byte.  */
10373   if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
10374     dest = gen_rtx_REG (SImode, REGNO (dest));
10375   tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
10376
10377   /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
10378   if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
10379     {
10380       rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10381       tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10382     }
10383
10384   emit_insn (tmp);
10385 }
10386
10387 /* X is an unchanging MEM.  If it is a constant pool reference, return
10388    the constant pool rtx, else NULL.  */
10389
10390 rtx
10391 maybe_get_pool_constant (rtx x)
10392 {
10393   x = ix86_delegitimize_address (XEXP (x, 0));
10394
10395   if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
10396     return get_pool_constant (x);
10397
10398   return NULL_RTX;
10399 }
10400
10401 void
10402 ix86_expand_move (enum machine_mode mode, rtx operands[])
10403 {
10404   rtx op0, op1;
10405   enum tls_model model;
10406
10407   op0 = operands[0];
10408   op1 = operands[1];
10409
10410   if (GET_CODE (op1) == SYMBOL_REF)
10411     {
10412       model = SYMBOL_REF_TLS_MODEL (op1);
10413       if (model)
10414         {
10415           op1 = legitimize_tls_address (op1, model, true);
10416           op1 = force_operand (op1, op0);
10417           if (op1 == op0)
10418             return;
10419         }
10420       else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10421                && SYMBOL_REF_DLLIMPORT_P (op1))
10422         op1 = legitimize_dllimport_symbol (op1, false);
10423     }
10424   else if (GET_CODE (op1) == CONST
10425            && GET_CODE (XEXP (op1, 0)) == PLUS
10426            && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10427     {
10428       rtx addend = XEXP (XEXP (op1, 0), 1);
10429       rtx symbol = XEXP (XEXP (op1, 0), 0);
10430       rtx tmp = NULL;
10431
10432       model = SYMBOL_REF_TLS_MODEL (symbol);
10433       if (model)
10434         tmp = legitimize_tls_address (symbol, model, true);
10435       else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10436                && SYMBOL_REF_DLLIMPORT_P (symbol))
10437         tmp = legitimize_dllimport_symbol (symbol, true);
10438
10439       if (tmp)
10440         {
10441           tmp = force_operand (tmp, NULL);
10442           tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
10443                                      op0, 1, OPTAB_DIRECT);
10444           if (tmp == op0)
10445             return;
10446         }
10447     }
10448
10449   if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
10450     {
10451       if (TARGET_MACHO && !TARGET_64BIT)
10452         {
10453 #if TARGET_MACHO
10454           if (MACHOPIC_PURE)
10455             {
10456               rtx temp = ((reload_in_progress
10457                            || ((op0 && REG_P (op0))
10458                                && mode == Pmode))
10459                           ? op0 : gen_reg_rtx (Pmode));
10460               op1 = machopic_indirect_data_reference (op1, temp);
10461               op1 = machopic_legitimize_pic_address (op1, mode,
10462                                                      temp == op1 ? 0 : temp);
10463             }
10464           else if (MACHOPIC_INDIRECT)
10465             op1 = machopic_indirect_data_reference (op1, 0);
10466           if (op0 == op1)
10467             return;
10468 #endif
10469         }
10470       else
10471         {
10472           if (MEM_P (op0))
10473             op1 = force_reg (Pmode, op1);
10474           else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
10475             {
10476               rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
10477               op1 = legitimize_pic_address (op1, reg);
10478               if (op0 == op1)
10479                 return;
10480             }
10481         }
10482     }
10483   else
10484     {
10485       if (MEM_P (op0)
10486           && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
10487               || !push_operand (op0, mode))
10488           && MEM_P (op1))
10489         op1 = force_reg (mode, op1);
10490
10491       if (push_operand (op0, mode)
10492           && ! general_no_elim_operand (op1, mode))
10493         op1 = copy_to_mode_reg (mode, op1);
10494
10495       /* Force large constants in 64bit compilation into register
10496          to get them CSEed.  */
10497       if (can_create_pseudo_p ()
10498           && (mode == DImode) && TARGET_64BIT
10499           && immediate_operand (op1, mode)
10500           && !x86_64_zext_immediate_operand (op1, VOIDmode)
10501           && !register_operand (op0, mode)
10502           && optimize)
10503         op1 = copy_to_mode_reg (mode, op1);
10504
10505       if (can_create_pseudo_p ()
10506           && FLOAT_MODE_P (mode)
10507           && GET_CODE (op1) == CONST_DOUBLE)
10508         {
10509           /* If we are loading a floating point constant to a register,
10510              force the value to memory now, since we'll get better code
10511              out the back end.  */
10512
10513           op1 = validize_mem (force_const_mem (mode, op1));
10514           if (!register_operand (op0, mode))
10515             {
10516               rtx temp = gen_reg_rtx (mode);
10517               emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10518               emit_move_insn (op0, temp);
10519               return;
10520             }
10521         }
10522     }
10523
10524   emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10525 }
10526
10527 void
10528 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
10529 {
10530   rtx op0 = operands[0], op1 = operands[1];
10531   unsigned int align = GET_MODE_ALIGNMENT (mode);
10532
10533   /* Force constants other than zero into memory.  We do not know how
10534      the instructions used to build constants modify the upper 64 bits
10535      of the register, once we have that information we may be able
10536      to handle some of them more efficiently.  */
10537   if (can_create_pseudo_p ()
10538       && register_operand (op0, mode)
10539       && (CONSTANT_P (op1)
10540           || (GET_CODE (op1) == SUBREG
10541               && CONSTANT_P (SUBREG_REG (op1))))
10542       && standard_sse_constant_p (op1) <= 0)
10543     op1 = validize_mem (force_const_mem (mode, op1));
10544
10545   /* We need to check memory alignment for SSE mode since attribute
10546      can make operands unaligned.  */
10547   if (can_create_pseudo_p ()
10548       && SSE_REG_MODE_P (mode)
10549       && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10550           || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10551     {
10552       rtx tmp[2];
10553
10554       /* ix86_expand_vector_move_misalign() does not like constants ... */
10555       if (CONSTANT_P (op1)
10556           || (GET_CODE (op1) == SUBREG
10557               && CONSTANT_P (SUBREG_REG (op1))))
10558         op1 = validize_mem (force_const_mem (mode, op1));
10559
10560       /* ... nor both arguments in memory.  */
10561       if (!register_operand (op0, mode)
10562           && !register_operand (op1, mode))
10563         op1 = force_reg (mode, op1);
10564
10565       tmp[0] = op0; tmp[1] = op1;
10566       ix86_expand_vector_move_misalign (mode, tmp);
10567       return;
10568     }
10569
10570   /* Make operand1 a register if it isn't already.  */
10571   if (can_create_pseudo_p ()
10572       && !register_operand (op0, mode)
10573       && !register_operand (op1, mode))
10574     {
10575       emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
10576       return;
10577     }
10578
10579   emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10580 }
10581
10582 /* Implement the movmisalign patterns for SSE.  Non-SSE modes go
10583    straight to ix86_expand_vector_move.  */
10584 /* Code generation for scalar reg-reg moves of single and double precision data:
10585      if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10586        movaps reg, reg
10587      else
10588        movss reg, reg
10589      if (x86_sse_partial_reg_dependency == true)
10590        movapd reg, reg
10591      else
10592        movsd reg, reg
10593
10594    Code generation for scalar loads of double precision data:
10595      if (x86_sse_split_regs == true)
10596        movlpd mem, reg      (gas syntax)
10597      else
10598        movsd mem, reg
10599
10600    Code generation for unaligned packed loads of single precision data
10601    (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10602      if (x86_sse_unaligned_move_optimal)
10603        movups mem, reg
10604
10605      if (x86_sse_partial_reg_dependency == true)
10606        {
10607          xorps  reg, reg
10608          movlps mem, reg
10609          movhps mem+8, reg
10610        }
10611      else
10612        {
10613          movlps mem, reg
10614          movhps mem+8, reg
10615        }
10616
10617    Code generation for unaligned packed loads of double precision data
10618    (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10619      if (x86_sse_unaligned_move_optimal)
10620        movupd mem, reg
10621
10622      if (x86_sse_split_regs == true)
10623        {
10624          movlpd mem, reg
10625          movhpd mem+8, reg
10626        }
10627      else
10628        {
10629          movsd  mem, reg
10630          movhpd mem+8, reg
10631        }
10632  */
10633
10634 void
10635 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10636 {
10637   rtx op0, op1, m;
10638
10639   op0 = operands[0];
10640   op1 = operands[1];
10641
10642   if (MEM_P (op1))
10643     {
10644       /* If we're optimizing for size, movups is the smallest.  */
10645       if (optimize_size)
10646         {
10647           op0 = gen_lowpart (V4SFmode, op0);
10648           op1 = gen_lowpart (V4SFmode, op1);
10649           emit_insn (gen_sse_movups (op0, op1));
10650           return;
10651         }
10652
10653       /* ??? If we have typed data, then it would appear that using
10654          movdqu is the only way to get unaligned data loaded with
10655          integer type.  */
10656       if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10657         {
10658           op0 = gen_lowpart (V16QImode, op0);
10659           op1 = gen_lowpart (V16QImode, op1);
10660           emit_insn (gen_sse2_movdqu (op0, op1));
10661           return;
10662         }
10663
10664       if (TARGET_SSE2 && mode == V2DFmode)
10665         {
10666           rtx zero;
10667
10668           if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10669             {
10670               op0 = gen_lowpart (V2DFmode, op0);
10671               op1 = gen_lowpart (V2DFmode, op1);
10672               emit_insn (gen_sse2_movupd (op0, op1));
10673               return;
10674             }
10675
10676           /* When SSE registers are split into halves, we can avoid
10677              writing to the top half twice.  */
10678           if (TARGET_SSE_SPLIT_REGS)
10679             {
10680               emit_clobber (op0);
10681               zero = op0;
10682             }
10683           else
10684             {
10685               /* ??? Not sure about the best option for the Intel chips.
10686                  The following would seem to satisfy; the register is
10687                  entirely cleared, breaking the dependency chain.  We
10688                  then store to the upper half, with a dependency depth
10689                  of one.  A rumor has it that Intel recommends two movsd
10690                  followed by an unpacklpd, but this is unconfirmed.  And
10691                  given that the dependency depth of the unpacklpd would
10692                  still be one, I'm not sure why this would be better.  */
10693               zero = CONST0_RTX (V2DFmode);
10694             }
10695
10696           m = adjust_address (op1, DFmode, 0);
10697           emit_insn (gen_sse2_loadlpd (op0, zero, m));
10698           m = adjust_address (op1, DFmode, 8);
10699           emit_insn (gen_sse2_loadhpd (op0, op0, m));
10700         }
10701       else
10702         {
10703           if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10704             {
10705               op0 = gen_lowpart (V4SFmode, op0);
10706               op1 = gen_lowpart (V4SFmode, op1);
10707               emit_insn (gen_sse_movups (op0, op1));
10708               return;
10709             }
10710
10711           if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10712             emit_move_insn (op0, CONST0_RTX (mode));
10713           else
10714             emit_clobber (op0);
10715
10716           if (mode != V4SFmode)
10717             op0 = gen_lowpart (V4SFmode, op0);
10718           m = adjust_address (op1, V2SFmode, 0);
10719           emit_insn (gen_sse_loadlps (op0, op0, m));
10720           m = adjust_address (op1, V2SFmode, 8);
10721           emit_insn (gen_sse_loadhps (op0, op0, m));
10722         }
10723     }
10724   else if (MEM_P (op0))
10725     {
10726       /* If we're optimizing for size, movups is the smallest.  */
10727       if (optimize_size)
10728         {
10729           op0 = gen_lowpart (V4SFmode, op0);
10730           op1 = gen_lowpart (V4SFmode, op1);
10731           emit_insn (gen_sse_movups (op0, op1));
10732           return;
10733         }
10734
10735       /* ??? Similar to above, only less clear because of quote
10736          typeless stores unquote.  */
10737       if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10738           && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10739         {
10740           op0 = gen_lowpart (V16QImode, op0);
10741           op1 = gen_lowpart (V16QImode, op1);
10742           emit_insn (gen_sse2_movdqu (op0, op1));
10743           return;
10744         }
10745
10746       if (TARGET_SSE2 && mode == V2DFmode)
10747         {
10748           m = adjust_address (op0, DFmode, 0);
10749           emit_insn (gen_sse2_storelpd (m, op1));
10750           m = adjust_address (op0, DFmode, 8);
10751           emit_insn (gen_sse2_storehpd (m, op1));
10752         }
10753       else
10754         {
10755           if (mode != V4SFmode)
10756             op1 = gen_lowpart (V4SFmode, op1);
10757           m = adjust_address (op0, V2SFmode, 0);
10758           emit_insn (gen_sse_storelps (m, op1));
10759           m = adjust_address (op0, V2SFmode, 8);
10760           emit_insn (gen_sse_storehps (m, op1));
10761         }
10762     }
10763   else
10764     gcc_unreachable ();
10765 }
10766
10767 /* Expand a push in MODE.  This is some mode for which we do not support
10768    proper push instructions, at least from the registers that we expect
10769    the value to live in.  */
10770
10771 void
10772 ix86_expand_push (enum machine_mode mode, rtx x)
10773 {
10774   rtx tmp;
10775
10776   tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10777                              GEN_INT (-GET_MODE_SIZE (mode)),
10778                              stack_pointer_rtx, 1, OPTAB_DIRECT);
10779   if (tmp != stack_pointer_rtx)
10780     emit_move_insn (stack_pointer_rtx, tmp);
10781
10782   tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10783   emit_move_insn (tmp, x);
10784 }
10785
10786 /* Helper function of ix86_fixup_binary_operands to canonicalize
10787    operand order.  Returns true if the operands should be swapped.  */
10788
10789 static bool
10790 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10791                              rtx operands[])
10792 {
10793   rtx dst = operands[0];
10794   rtx src1 = operands[1];
10795   rtx src2 = operands[2];
10796
10797   /* If the operation is not commutative, we can't do anything.  */
10798   if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10799     return false;
10800
10801   /* Highest priority is that src1 should match dst.  */
10802   if (rtx_equal_p (dst, src1))
10803     return false;
10804   if (rtx_equal_p (dst, src2))
10805     return true;
10806
10807   /* Next highest priority is that immediate constants come second.  */
10808   if (immediate_operand (src2, mode))
10809     return false;
10810   if (immediate_operand (src1, mode))
10811     return true;
10812
10813   /* Lowest priority is that memory references should come second.  */
10814   if (MEM_P (src2))
10815     return false;
10816   if (MEM_P (src1))
10817     return true;
10818
10819   return false;
10820 }
10821
10822
10823 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok.  Return the
10824    destination to use for the operation.  If different from the true
10825    destination in operands[0], a copy operation will be required.  */
10826
10827 rtx
10828 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10829                             rtx operands[])
10830 {
10831   rtx dst = operands[0];
10832   rtx src1 = operands[1];
10833   rtx src2 = operands[2];
10834
10835   /* Canonicalize operand order.  */
10836   if (ix86_swap_binary_operands_p (code, mode, operands))
10837     {
10838       rtx temp;
10839
10840       /* It is invalid to swap operands of different modes.  */
10841       gcc_assert (GET_MODE (src1) == GET_MODE (src2));
10842
10843       temp = src1;
10844       src1 = src2;
10845       src2 = temp;
10846     }
10847
10848   /* Both source operands cannot be in memory.  */
10849   if (MEM_P (src1) && MEM_P (src2))
10850     {
10851       /* Optimization: Only read from memory once.  */
10852       if (rtx_equal_p (src1, src2))
10853         {
10854           src2 = force_reg (mode, src2);
10855           src1 = src2;
10856         }
10857       else
10858         src2 = force_reg (mode, src2);
10859     }
10860
10861   /* If the destination is memory, and we do not have matching source
10862      operands, do things in registers.  */
10863   if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10864     dst = gen_reg_rtx (mode);
10865
10866   /* Source 1 cannot be a constant.  */
10867   if (CONSTANT_P (src1))
10868     src1 = force_reg (mode, src1);
10869
10870   /* Source 1 cannot be a non-matching memory.  */
10871   if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10872     src1 = force_reg (mode, src1);
10873
10874   operands[1] = src1;
10875   operands[2] = src2;
10876   return dst;
10877 }
10878
10879 /* Similarly, but assume that the destination has already been
10880    set up properly.  */
10881
10882 void
10883 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10884                                     enum machine_mode mode, rtx operands[])
10885 {
10886   rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10887   gcc_assert (dst == operands[0]);
10888 }
10889
10890 /* Attempt to expand a binary operator.  Make the expansion closer to the
10891    actual machine, then just general_operand, which will allow 3 separate
10892    memory references (one output, two input) in a single insn.  */
10893
10894 void
10895 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10896                              rtx operands[])
10897 {
10898   rtx src1, src2, dst, op, clob;
10899
10900   dst = ix86_fixup_binary_operands (code, mode, operands);
10901   src1 = operands[1];
10902   src2 = operands[2];
10903
10904  /* Emit the instruction.  */
10905
10906   op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10907   if (reload_in_progress)
10908     {
10909       /* Reload doesn't know about the flags register, and doesn't know that
10910          it doesn't want to clobber it.  We can only do this with PLUS.  */
10911       gcc_assert (code == PLUS);
10912       emit_insn (op);
10913     }
10914   else
10915     {
10916       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10917       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10918     }
10919
10920   /* Fix up the destination if needed.  */
10921   if (dst != operands[0])
10922     emit_move_insn (operands[0], dst);
10923 }
10924
10925 /* Return TRUE or FALSE depending on whether the binary operator meets the
10926    appropriate constraints.  */
10927
10928 int
10929 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10930                          rtx operands[3])
10931 {
10932   rtx dst = operands[0];
10933   rtx src1 = operands[1];
10934   rtx src2 = operands[2];
10935
10936   /* Both source operands cannot be in memory.  */
10937   if (MEM_P (src1) && MEM_P (src2))
10938     return 0;
10939
10940   /* Canonicalize operand order for commutative operators.  */
10941   if (ix86_swap_binary_operands_p (code, mode, operands))
10942     {
10943       rtx temp = src1;
10944       src1 = src2;
10945       src2 = temp;
10946     }
10947
10948   /* If the destination is memory, we must have a matching source operand.  */
10949   if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10950       return 0;
10951
10952   /* Source 1 cannot be a constant.  */
10953   if (CONSTANT_P (src1))
10954     return 0;
10955
10956   /* Source 1 cannot be a non-matching memory.  */
10957   if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10958     return 0;
10959
10960   return 1;
10961 }
10962
10963 /* Attempt to expand a unary operator.  Make the expansion closer to the
10964    actual machine, then just general_operand, which will allow 2 separate
10965    memory references (one output, one input) in a single insn.  */
10966
10967 void
10968 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10969                             rtx operands[])
10970 {
10971   int matching_memory;
10972   rtx src, dst, op, clob;
10973
10974   dst = operands[0];
10975   src = operands[1];
10976
10977   /* If the destination is memory, and we do not have matching source
10978      operands, do things in registers.  */
10979   matching_memory = 0;
10980   if (MEM_P (dst))
10981     {
10982       if (rtx_equal_p (dst, src))
10983         matching_memory = 1;
10984       else
10985         dst = gen_reg_rtx (mode);
10986     }
10987
10988   /* When source operand is memory, destination must match.  */
10989   if (MEM_P (src) && !matching_memory)
10990     src = force_reg (mode, src);
10991
10992   /* Emit the instruction.  */
10993
10994   op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10995   if (reload_in_progress || code == NOT)
10996     {
10997       /* Reload doesn't know about the flags register, and doesn't know that
10998          it doesn't want to clobber it.  */
10999       gcc_assert (code == NOT);
11000       emit_insn (op);
11001     }
11002   else
11003     {
11004       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11005       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
11006     }
11007
11008   /* Fix up the destination if needed.  */
11009   if (dst != operands[0])
11010     emit_move_insn (operands[0], dst);
11011 }
11012
11013 /* Return TRUE or FALSE depending on whether the unary operator meets the
11014    appropriate constraints.  */
11015
11016 int
11017 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
11018                         enum machine_mode mode ATTRIBUTE_UNUSED,
11019                         rtx operands[2] ATTRIBUTE_UNUSED)
11020 {
11021   /* If one of operands is memory, source and destination must match.  */
11022   if ((MEM_P (operands[0])
11023        || MEM_P (operands[1]))
11024       && ! rtx_equal_p (operands[0], operands[1]))
11025     return FALSE;
11026   return TRUE;
11027 }
11028
11029 /* Post-reload splitter for converting an SF or DFmode value in an
11030    SSE register into an unsigned SImode.  */
11031
11032 void
11033 ix86_split_convert_uns_si_sse (rtx operands[])
11034 {
11035   enum machine_mode vecmode;
11036   rtx value, large, zero_or_two31, input, two31, x;
11037
11038   large = operands[1];
11039   zero_or_two31 = operands[2];
11040   input = operands[3];
11041   two31 = operands[4];
11042   vecmode = GET_MODE (large);
11043   value = gen_rtx_REG (vecmode, REGNO (operands[0]));
11044
11045   /* Load up the value into the low element.  We must ensure that the other
11046      elements are valid floats -- zero is the easiest such value.  */
11047   if (MEM_P (input))
11048     {
11049       if (vecmode == V4SFmode)
11050         emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
11051       else
11052         emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
11053     }
11054   else
11055     {
11056       input = gen_rtx_REG (vecmode, REGNO (input));
11057       emit_move_insn (value, CONST0_RTX (vecmode));
11058       if (vecmode == V4SFmode)
11059         emit_insn (gen_sse_movss (value, value, input));
11060       else
11061         emit_insn (gen_sse2_movsd (value, value, input));
11062     }
11063
11064   emit_move_insn (large, two31);
11065   emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
11066
11067   x = gen_rtx_fmt_ee (LE, vecmode, large, value);
11068   emit_insn (gen_rtx_SET (VOIDmode, large, x));
11069
11070   x = gen_rtx_AND (vecmode, zero_or_two31, large);
11071   emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
11072
11073   x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
11074   emit_insn (gen_rtx_SET (VOIDmode, value, x));
11075
11076   large = gen_rtx_REG (V4SImode, REGNO (large));
11077   emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
11078
11079   x = gen_rtx_REG (V4SImode, REGNO (value));
11080   if (vecmode == V4SFmode)
11081     emit_insn (gen_sse2_cvttps2dq (x, value));
11082   else
11083     emit_insn (gen_sse2_cvttpd2dq (x, value));
11084   value = x;
11085
11086   emit_insn (gen_xorv4si3 (value, value, large));
11087 }
11088
11089 /* Convert an unsigned DImode value into a DFmode, using only SSE.
11090    Expects the 64-bit DImode to be supplied in a pair of integral
11091    registers.  Requires SSE2; will use SSE3 if available.  For x86_32,
11092    -mfpmath=sse, !optimize_size only.  */
11093
11094 void
11095 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
11096 {
11097   REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
11098   rtx int_xmm, fp_xmm;
11099   rtx biases, exponents;
11100   rtx x;
11101
11102   int_xmm = gen_reg_rtx (V4SImode);
11103   if (TARGET_INTER_UNIT_MOVES)
11104     emit_insn (gen_movdi_to_sse (int_xmm, input));
11105   else if (TARGET_SSE_SPLIT_REGS)
11106     {
11107       emit_clobber (int_xmm);
11108       emit_move_insn (gen_lowpart (DImode, int_xmm), input);
11109     }
11110   else
11111     {
11112       x = gen_reg_rtx (V2DImode);
11113       ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
11114       emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
11115     }
11116
11117   x = gen_rtx_CONST_VECTOR (V4SImode,
11118                             gen_rtvec (4, GEN_INT (0x43300000UL),
11119                                        GEN_INT (0x45300000UL),
11120                                        const0_rtx, const0_rtx));
11121   exponents = validize_mem (force_const_mem (V4SImode, x));
11122
11123   /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
11124   emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
11125
11126   /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
11127      yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
11128      Similarly (0x45300000UL ## fp_value_hi_xmm) yields
11129      (0x1.0p84 + double(fp_value_hi_xmm)).
11130      Note these exponents differ by 32.  */
11131
11132   fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
11133
11134   /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
11135      in [0,2**32-1] and [0]+[2**32,2**64-1] respectively.  */
11136   real_ldexp (&bias_lo_rvt, &dconst1, 52);
11137   real_ldexp (&bias_hi_rvt, &dconst1, 84);
11138   biases = const_double_from_real_value (bias_lo_rvt, DFmode);
11139   x = const_double_from_real_value (bias_hi_rvt, DFmode);
11140   biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
11141   biases = validize_mem (force_const_mem (V2DFmode, biases));
11142   emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
11143
11144   /* Add the upper and lower DFmode values together.  */
11145   if (TARGET_SSE3)
11146     emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
11147   else
11148     {
11149       x = copy_to_mode_reg (V2DFmode, fp_xmm);
11150       emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
11151       emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
11152     }
11153
11154   ix86_expand_vector_extract (false, target, fp_xmm, 0);
11155 }
11156
11157 /* Not used, but eases macroization of patterns.  */
11158 void
11159 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
11160                                   rtx input ATTRIBUTE_UNUSED)
11161 {
11162   gcc_unreachable ();
11163 }
11164
11165 /* Convert an unsigned SImode value into a DFmode.  Only currently used
11166    for SSE, but applicable anywhere.  */
11167
11168 void
11169 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
11170 {
11171   REAL_VALUE_TYPE TWO31r;
11172   rtx x, fp;
11173
11174   x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
11175                            NULL, 1, OPTAB_DIRECT);
11176
11177   fp = gen_reg_rtx (DFmode);
11178   emit_insn (gen_floatsidf2 (fp, x));
11179
11180   real_ldexp (&TWO31r, &dconst1, 31);
11181   x = const_double_from_real_value (TWO31r, DFmode);
11182
11183   x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
11184   if (x != target)
11185     emit_move_insn (target, x);
11186 }
11187
11188 /* Convert a signed DImode value into a DFmode.  Only used for SSE in
11189    32-bit mode; otherwise we have a direct convert instruction.  */
11190
11191 void
11192 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
11193 {
11194   REAL_VALUE_TYPE TWO32r;
11195   rtx fp_lo, fp_hi, x;
11196
11197   fp_lo = gen_reg_rtx (DFmode);
11198   fp_hi = gen_reg_rtx (DFmode);
11199
11200   emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
11201
11202   real_ldexp (&TWO32r, &dconst1, 32);
11203   x = const_double_from_real_value (TWO32r, DFmode);
11204   fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
11205
11206   ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
11207
11208   x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
11209                            0, OPTAB_DIRECT);
11210   if (x != target)
11211     emit_move_insn (target, x);
11212 }
11213
11214 /* Convert an unsigned SImode value into a SFmode, using only SSE.
11215    For x86_32, -mfpmath=sse, !optimize_size only.  */
11216 void
11217 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
11218 {
11219   REAL_VALUE_TYPE ONE16r;
11220   rtx fp_hi, fp_lo, int_hi, int_lo, x;
11221
11222   real_ldexp (&ONE16r, &dconst1, 16);
11223   x = const_double_from_real_value (ONE16r, SFmode);
11224   int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
11225                                       NULL, 0, OPTAB_DIRECT);
11226   int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
11227                                       NULL, 0, OPTAB_DIRECT);
11228   fp_hi = gen_reg_rtx (SFmode);
11229   fp_lo = gen_reg_rtx (SFmode);
11230   emit_insn (gen_floatsisf2 (fp_hi, int_hi));
11231   emit_insn (gen_floatsisf2 (fp_lo, int_lo));
11232   fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
11233                                0, OPTAB_DIRECT);
11234   fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
11235                                0, OPTAB_DIRECT);
11236   if (!rtx_equal_p (target, fp_hi))
11237     emit_move_insn (target, fp_hi);
11238 }
11239
11240 /* A subroutine of ix86_build_signbit_mask_vector.  If VECT is true,
11241    then replicate the value for all elements of the vector
11242    register.  */
11243
11244 rtx
11245 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
11246 {
11247   rtvec v;
11248   switch (mode)
11249     {
11250     case SImode:
11251       gcc_assert (vect);
11252       v = gen_rtvec (4, value, value, value, value);
11253       return gen_rtx_CONST_VECTOR (V4SImode, v);
11254
11255     case DImode:
11256       gcc_assert (vect);
11257       v = gen_rtvec (2, value, value);
11258       return gen_rtx_CONST_VECTOR (V2DImode, v);
11259
11260     case SFmode:
11261       if (vect)
11262         v = gen_rtvec (4, value, value, value, value);
11263       else
11264         v = gen_rtvec (4, value, CONST0_RTX (SFmode),
11265                        CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11266       return gen_rtx_CONST_VECTOR (V4SFmode, v);
11267
11268     case DFmode:
11269       if (vect)
11270         v = gen_rtvec (2, value, value);
11271       else
11272         v = gen_rtvec (2, value, CONST0_RTX (DFmode));
11273       return gen_rtx_CONST_VECTOR (V2DFmode, v);
11274
11275     default:
11276       gcc_unreachable ();
11277     }
11278 }
11279
11280 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
11281    and ix86_expand_int_vcond.  Create a mask for the sign bit in MODE
11282    for an SSE register.  If VECT is true, then replicate the mask for
11283    all elements of the vector register.  If INVERT is true, then create
11284    a mask excluding the sign bit.  */
11285
11286 rtx
11287 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
11288 {
11289   enum machine_mode vec_mode, imode;
11290   HOST_WIDE_INT hi, lo;
11291   int shift = 63;
11292   rtx v;
11293   rtx mask;
11294
11295   /* Find the sign bit, sign extended to 2*HWI.  */
11296   switch (mode)
11297     {
11298     case SImode:
11299     case SFmode:
11300       imode = SImode;
11301       vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
11302       lo = 0x80000000, hi = lo < 0;
11303       break;
11304
11305     case DImode:
11306     case DFmode:
11307       imode = DImode;
11308       vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
11309       if (HOST_BITS_PER_WIDE_INT >= 64)
11310         lo = (HOST_WIDE_INT)1 << shift, hi = -1;
11311       else
11312         lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
11313       break;
11314
11315     case TImode:
11316     case TFmode:
11317       imode = TImode;
11318       vec_mode = VOIDmode;
11319       gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
11320       lo = 0, hi = (HOST_WIDE_INT)1 << shift;
11321      break;
11322
11323     default:
11324       gcc_unreachable ();
11325     }
11326
11327   if (invert)
11328     lo = ~lo, hi = ~hi;
11329
11330   /* Force this value into the low part of a fp vector constant.  */
11331   mask = immed_double_const (lo, hi, imode);
11332   mask = gen_lowpart (mode, mask);
11333
11334   if (vec_mode == VOIDmode)
11335     return force_reg (mode, mask);
11336
11337   v = ix86_build_const_vector (mode, vect, mask);
11338   return force_reg (vec_mode, v);
11339 }
11340
11341 /* Generate code for floating point ABS or NEG.  */
11342
11343 void
11344 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
11345                                 rtx operands[])
11346 {
11347   rtx mask, set, use, clob, dst, src;
11348   bool use_sse = false;
11349   bool vector_mode = VECTOR_MODE_P (mode);
11350   enum machine_mode elt_mode = mode;
11351
11352   if (vector_mode)
11353     {
11354       elt_mode = GET_MODE_INNER (mode);
11355       use_sse = true;
11356     }
11357   else if (mode == TFmode)
11358     use_sse = true;
11359   else if (TARGET_SSE_MATH)
11360     use_sse = SSE_FLOAT_MODE_P (mode);
11361
11362   /* NEG and ABS performed with SSE use bitwise mask operations.
11363      Create the appropriate mask now.  */
11364   if (use_sse)
11365     mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
11366   else
11367     mask = NULL_RTX;
11368
11369   dst = operands[0];
11370   src = operands[1];
11371
11372   if (vector_mode)
11373     {
11374       set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
11375       set = gen_rtx_SET (VOIDmode, dst, set);
11376       emit_insn (set);
11377     }
11378   else
11379     {
11380       set = gen_rtx_fmt_e (code, mode, src);
11381       set = gen_rtx_SET (VOIDmode, dst, set);
11382       if (mask)
11383         {
11384           use = gen_rtx_USE (VOIDmode, mask);
11385           clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11386           emit_insn (gen_rtx_PARALLEL (VOIDmode,
11387                                        gen_rtvec (3, set, use, clob)));
11388         }
11389       else
11390         emit_insn (set);
11391     }
11392 }
11393
11394 /* Expand a copysign operation.  Special case operand 0 being a constant.  */
11395
11396 void
11397 ix86_expand_copysign (rtx operands[])
11398 {
11399   enum machine_mode mode;
11400   rtx dest, op0, op1, mask, nmask;
11401
11402   dest = operands[0];
11403   op0 = operands[1];
11404   op1 = operands[2];
11405
11406   mode = GET_MODE (dest);
11407
11408   if (GET_CODE (op0) == CONST_DOUBLE)
11409     {
11410       rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
11411
11412       if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
11413         op0 = simplify_unary_operation (ABS, mode, op0, mode);
11414
11415       if (mode == SFmode || mode == DFmode)
11416         {
11417           enum machine_mode vmode;
11418
11419           vmode = mode == SFmode ? V4SFmode : V2DFmode;
11420
11421           if (op0 == CONST0_RTX (mode))
11422             op0 = CONST0_RTX (vmode);
11423           else
11424             {
11425               rtvec v;
11426
11427               if (mode == SFmode)
11428                 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11429                                CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11430               else
11431                 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
11432
11433               op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11434             }
11435         }
11436       else if (op0 != CONST0_RTX (mode))
11437         op0 = force_reg (mode, op0);
11438
11439       mask = ix86_build_signbit_mask (mode, 0, 0);
11440
11441       if (mode == SFmode)
11442         copysign_insn = gen_copysignsf3_const;
11443       else if (mode == DFmode)
11444         copysign_insn = gen_copysigndf3_const;
11445       else
11446         copysign_insn = gen_copysigntf3_const;
11447
11448         emit_insn (copysign_insn (dest, op0, op1, mask));
11449     }
11450   else
11451     {
11452       rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11453
11454       nmask = ix86_build_signbit_mask (mode, 0, 1);
11455       mask = ix86_build_signbit_mask (mode, 0, 0);
11456
11457       if (mode == SFmode)
11458         copysign_insn = gen_copysignsf3_var;
11459       else if (mode == DFmode)
11460         copysign_insn = gen_copysigndf3_var;
11461       else
11462         copysign_insn = gen_copysigntf3_var;
11463
11464       emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
11465     }
11466 }
11467
11468 /* Deconstruct a copysign operation into bit masks.  Operand 0 is known to
11469    be a constant, and so has already been expanded into a vector constant.  */
11470
11471 void
11472 ix86_split_copysign_const (rtx operands[])
11473 {
11474   enum machine_mode mode, vmode;
11475   rtx dest, op0, op1, mask, x;
11476
11477   dest = operands[0];
11478   op0 = operands[1];
11479   op1 = operands[2];
11480   mask = operands[3];
11481
11482   mode = GET_MODE (dest);
11483   vmode = GET_MODE (mask);
11484
11485   dest = simplify_gen_subreg (vmode, dest, mode, 0);
11486   x = gen_rtx_AND (vmode, dest, mask);
11487   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11488
11489   if (op0 != CONST0_RTX (vmode))
11490     {
11491       x = gen_rtx_IOR (vmode, dest, op0);
11492       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11493     }
11494 }
11495
11496 /* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,
11497    so we have to do two masks.  */
11498
11499 void
11500 ix86_split_copysign_var (rtx operands[])
11501 {
11502   enum machine_mode mode, vmode;
11503   rtx dest, scratch, op0, op1, mask, nmask, x;
11504
11505   dest = operands[0];
11506   scratch = operands[1];
11507   op0 = operands[2];
11508   op1 = operands[3];
11509   nmask = operands[4];
11510   mask = operands[5];
11511
11512   mode = GET_MODE (dest);
11513   vmode = GET_MODE (mask);
11514
11515   if (rtx_equal_p (op0, op1))
11516     {
11517       /* Shouldn't happen often (it's useless, obviously), but when it does
11518          we'd generate incorrect code if we continue below.  */
11519       emit_move_insn (dest, op0);
11520       return;
11521     }
11522
11523   if (REG_P (mask) && REGNO (dest) == REGNO (mask))     /* alternative 0 */
11524     {
11525       gcc_assert (REGNO (op1) == REGNO (scratch));
11526
11527       x = gen_rtx_AND (vmode, scratch, mask);
11528       emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11529
11530       dest = mask;
11531       op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11532       x = gen_rtx_NOT (vmode, dest);
11533       x = gen_rtx_AND (vmode, x, op0);
11534       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11535     }
11536   else
11537     {
11538       if (REGNO (op1) == REGNO (scratch))               /* alternative 1,3 */
11539         {
11540           x = gen_rtx_AND (vmode, scratch, mask);
11541         }
11542       else                                              /* alternative 2,4 */
11543         {
11544           gcc_assert (REGNO (mask) == REGNO (scratch));
11545           op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11546           x = gen_rtx_AND (vmode, scratch, op1);
11547         }
11548       emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11549
11550       if (REGNO (op0) == REGNO (dest))                  /* alternative 1,2 */
11551         {
11552           dest = simplify_gen_subreg (vmode, op0, mode, 0);
11553           x = gen_rtx_AND (vmode, dest, nmask);
11554         }
11555       else                                              /* alternative 3,4 */
11556         {
11557           gcc_assert (REGNO (nmask) == REGNO (dest));
11558           dest = nmask;
11559           op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11560           x = gen_rtx_AND (vmode, dest, op0);
11561         }
11562       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11563     }
11564
11565   x = gen_rtx_IOR (vmode, dest, scratch);
11566   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11567 }
11568
11569 /* Return TRUE or FALSE depending on whether the first SET in INSN
11570    has source and destination with matching CC modes, and that the
11571    CC mode is at least as constrained as REQ_MODE.  */
11572
11573 int
11574 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
11575 {
11576   rtx set;
11577   enum machine_mode set_mode;
11578
11579   set = PATTERN (insn);
11580   if (GET_CODE (set) == PARALLEL)
11581     set = XVECEXP (set, 0, 0);
11582   gcc_assert (GET_CODE (set) == SET);
11583   gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
11584
11585   set_mode = GET_MODE (SET_DEST (set));
11586   switch (set_mode)
11587     {
11588     case CCNOmode:
11589       if (req_mode != CCNOmode
11590           && (req_mode != CCmode
11591               || XEXP (SET_SRC (set), 1) != const0_rtx))
11592         return 0;
11593       break;
11594     case CCmode:
11595       if (req_mode == CCGCmode)
11596         return 0;
11597       /* FALLTHRU */
11598     case CCGCmode:
11599       if (req_mode == CCGOCmode || req_mode == CCNOmode)
11600         return 0;
11601       /* FALLTHRU */
11602     case CCGOCmode:
11603       if (req_mode == CCZmode)
11604         return 0;
11605       /* FALLTHRU */
11606     case CCZmode:
11607       break;
11608
11609     default:
11610       gcc_unreachable ();
11611     }
11612
11613   return (GET_MODE (SET_SRC (set)) == set_mode);
11614 }
11615
11616 /* Generate insn patterns to do an integer compare of OPERANDS.  */
11617
11618 static rtx
11619 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11620 {
11621   enum machine_mode cmpmode;
11622   rtx tmp, flags;
11623
11624   cmpmode = SELECT_CC_MODE (code, op0, op1);
11625   flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11626
11627   /* This is very simple, but making the interface the same as in the
11628      FP case makes the rest of the code easier.  */
11629   tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11630   emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11631
11632   /* Return the test that should be put into the flags user, i.e.
11633      the bcc, scc, or cmov instruction.  */
11634   return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11635 }
11636
11637 /* Figure out whether to use ordered or unordered fp comparisons.
11638    Return the appropriate mode to use.  */
11639
11640 enum machine_mode
11641 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11642 {
11643   /* ??? In order to make all comparisons reversible, we do all comparisons
11644      non-trapping when compiling for IEEE.  Once gcc is able to distinguish
11645      all forms trapping and nontrapping comparisons, we can make inequality
11646      comparisons trapping again, since it results in better code when using
11647      FCOM based compares.  */
11648   return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11649 }
11650
11651 enum machine_mode
11652 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11653 {
11654   enum machine_mode mode = GET_MODE (op0);
11655
11656   if (SCALAR_FLOAT_MODE_P (mode))
11657     {
11658       gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11659       return ix86_fp_compare_mode (code);
11660     }
11661
11662   switch (code)
11663     {
11664       /* Only zero flag is needed.  */
11665     case EQ:                    /* ZF=0 */
11666     case NE:                    /* ZF!=0 */
11667       return CCZmode;
11668       /* Codes needing carry flag.  */
11669     case GEU:                   /* CF=0 */
11670     case LTU:                   /* CF=1 */
11671       /* Detect overflow checks.  They need just the carry flag.  */
11672       if (GET_CODE (op0) == PLUS
11673           && rtx_equal_p (op1, XEXP (op0, 0)))
11674         return CCCmode;
11675       else
11676         return CCmode;
11677     case GTU:                   /* CF=0 & ZF=0 */
11678     case LEU:                   /* CF=1 | ZF=1 */
11679       /* Detect overflow checks.  They need just the carry flag.  */
11680       if (GET_CODE (op0) == MINUS
11681           && rtx_equal_p (op1, XEXP (op0, 0)))
11682         return CCCmode;
11683       else
11684         return CCmode;
11685       /* Codes possibly doable only with sign flag when
11686          comparing against zero.  */
11687     case GE:                    /* SF=OF   or   SF=0 */
11688     case LT:                    /* SF<>OF  or   SF=1 */
11689       if (op1 == const0_rtx)
11690         return CCGOCmode;
11691       else
11692         /* For other cases Carry flag is not required.  */
11693         return CCGCmode;
11694       /* Codes doable only with sign flag when comparing
11695          against zero, but we miss jump instruction for it
11696          so we need to use relational tests against overflow
11697          that thus needs to be zero.  */
11698     case GT:                    /* ZF=0 & SF=OF */
11699     case LE:                    /* ZF=1 | SF<>OF */
11700       if (op1 == const0_rtx)
11701         return CCNOmode;
11702       else
11703         return CCGCmode;
11704       /* strcmp pattern do (use flags) and combine may ask us for proper
11705          mode.  */
11706     case USE:
11707       return CCmode;
11708     default:
11709       gcc_unreachable ();
11710     }
11711 }
11712
11713 /* Return the fixed registers used for condition codes.  */
11714
11715 static bool
11716 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11717 {
11718   *p1 = FLAGS_REG;
11719   *p2 = FPSR_REG;
11720   return true;
11721 }
11722
11723 /* If two condition code modes are compatible, return a condition code
11724    mode which is compatible with both.  Otherwise, return
11725    VOIDmode.  */
11726
11727 static enum machine_mode
11728 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11729 {
11730   if (m1 == m2)
11731     return m1;
11732
11733   if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11734     return VOIDmode;
11735
11736   if ((m1 == CCGCmode && m2 == CCGOCmode)
11737       || (m1 == CCGOCmode && m2 == CCGCmode))
11738     return CCGCmode;
11739
11740   switch (m1)
11741     {
11742     default:
11743       gcc_unreachable ();
11744
11745     case CCmode:
11746     case CCGCmode:
11747     case CCGOCmode:
11748     case CCNOmode:
11749     case CCAmode:
11750     case CCCmode:
11751     case CCOmode:
11752     case CCSmode:
11753     case CCZmode:
11754       switch (m2)
11755         {
11756         default:
11757           return VOIDmode;
11758
11759         case CCmode:
11760         case CCGCmode:
11761         case CCGOCmode:
11762         case CCNOmode:
11763         case CCAmode:
11764         case CCCmode:
11765         case CCOmode:
11766         case CCSmode:
11767         case CCZmode:
11768           return CCmode;
11769         }
11770
11771     case CCFPmode:
11772     case CCFPUmode:
11773       /* These are only compatible with themselves, which we already
11774          checked above.  */
11775       return VOIDmode;
11776     }
11777 }
11778
11779 /* Split comparison code CODE into comparisons we can do using branch
11780    instructions.  BYPASS_CODE is comparison code for branch that will
11781    branch around FIRST_CODE and SECOND_CODE.  If some of branches
11782    is not required, set value to UNKNOWN.
11783    We never require more than two branches.  */
11784
11785 void
11786 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11787                           enum rtx_code *first_code,
11788                           enum rtx_code *second_code)
11789 {
11790   *first_code = code;
11791   *bypass_code = UNKNOWN;
11792   *second_code = UNKNOWN;
11793
11794   /* The fcomi comparison sets flags as follows:
11795
11796      cmp    ZF PF CF
11797      >      0  0  0
11798      <      0  0  1
11799      =      1  0  0
11800      un     1  1  1 */
11801
11802   switch (code)
11803     {
11804     case GT:                    /* GTU - CF=0 & ZF=0 */
11805     case GE:                    /* GEU - CF=0 */
11806     case ORDERED:               /* PF=0 */
11807     case UNORDERED:             /* PF=1 */
11808     case UNEQ:                  /* EQ - ZF=1 */
11809     case UNLT:                  /* LTU - CF=1 */
11810     case UNLE:                  /* LEU - CF=1 | ZF=1 */
11811     case LTGT:                  /* EQ - ZF=0 */
11812       break;
11813     case LT:                    /* LTU - CF=1 - fails on unordered */
11814       *first_code = UNLT;
11815       *bypass_code = UNORDERED;
11816       break;
11817     case LE:                    /* LEU - CF=1 | ZF=1 - fails on unordered */
11818       *first_code = UNLE;
11819       *bypass_code = UNORDERED;
11820       break;
11821     case EQ:                    /* EQ - ZF=1 - fails on unordered */
11822       *first_code = UNEQ;
11823       *bypass_code = UNORDERED;
11824       break;
11825     case NE:                    /* NE - ZF=0 - fails on unordered */
11826       *first_code = LTGT;
11827       *second_code = UNORDERED;
11828       break;
11829     case UNGE:                  /* GEU - CF=0 - fails on unordered */
11830       *first_code = GE;
11831       *second_code = UNORDERED;
11832       break;
11833     case UNGT:                  /* GTU - CF=0 & ZF=0 - fails on unordered */
11834       *first_code = GT;
11835       *second_code = UNORDERED;
11836       break;
11837     default:
11838       gcc_unreachable ();
11839     }
11840   if (!TARGET_IEEE_FP)
11841     {
11842       *second_code = UNKNOWN;
11843       *bypass_code = UNKNOWN;
11844     }
11845 }
11846
11847 /* Return cost of comparison done fcom + arithmetics operations on AX.
11848    All following functions do use number of instructions as a cost metrics.
11849    In future this should be tweaked to compute bytes for optimize_size and
11850    take into account performance of various instructions on various CPUs.  */
11851 static int
11852 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11853 {
11854   if (!TARGET_IEEE_FP)
11855     return 4;
11856   /* The cost of code output by ix86_expand_fp_compare.  */
11857   switch (code)
11858     {
11859     case UNLE:
11860     case UNLT:
11861     case LTGT:
11862     case GT:
11863     case GE:
11864     case UNORDERED:
11865     case ORDERED:
11866     case UNEQ:
11867       return 4;
11868       break;
11869     case LT:
11870     case NE:
11871     case EQ:
11872     case UNGE:
11873       return 5;
11874       break;
11875     case LE:
11876     case UNGT:
11877       return 6;
11878       break;
11879     default:
11880       gcc_unreachable ();
11881     }
11882 }
11883
11884 /* Return cost of comparison done using fcomi operation.
11885    See ix86_fp_comparison_arithmetics_cost for the metrics.  */
11886 static int
11887 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11888 {
11889   enum rtx_code bypass_code, first_code, second_code;
11890   /* Return arbitrarily high cost when instruction is not supported - this
11891      prevents gcc from using it.  */
11892   if (!TARGET_CMOVE)
11893     return 1024;
11894   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11895   return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11896 }
11897
11898 /* Return cost of comparison done using sahf operation.
11899    See ix86_fp_comparison_arithmetics_cost for the metrics.  */
11900 static int
11901 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11902 {
11903   enum rtx_code bypass_code, first_code, second_code;
11904   /* Return arbitrarily high cost when instruction is not preferred - this
11905      avoids gcc from using it.  */
11906   if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11907     return 1024;
11908   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11909   return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11910 }
11911
11912 /* Compute cost of the comparison done using any method.
11913    See ix86_fp_comparison_arithmetics_cost for the metrics.  */
11914 static int
11915 ix86_fp_comparison_cost (enum rtx_code code)
11916 {
11917   int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11918   int min;
11919
11920   fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11921   sahf_cost = ix86_fp_comparison_sahf_cost (code);
11922
11923   min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11924   if (min > sahf_cost)
11925     min = sahf_cost;
11926   if (min > fcomi_cost)
11927     min = fcomi_cost;
11928   return min;
11929 }
11930
11931 /* Return true if we should use an FCOMI instruction for this
11932    fp comparison.  */
11933
11934 int
11935 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11936 {
11937   enum rtx_code swapped_code = swap_condition (code);
11938
11939   return ((ix86_fp_comparison_cost (code)
11940            == ix86_fp_comparison_fcomi_cost (code))
11941           || (ix86_fp_comparison_cost (swapped_code)
11942               == ix86_fp_comparison_fcomi_cost (swapped_code)));
11943 }
11944
11945 /* Swap, force into registers, or otherwise massage the two operands
11946    to a fp comparison.  The operands are updated in place; the new
11947    comparison code is returned.  */
11948
11949 static enum rtx_code
11950 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11951 {
11952   enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11953   rtx op0 = *pop0, op1 = *pop1;
11954   enum machine_mode op_mode = GET_MODE (op0);
11955   int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11956
11957   /* All of the unordered compare instructions only work on registers.
11958      The same is true of the fcomi compare instructions.  The XFmode
11959      compare instructions require registers except when comparing
11960      against zero or when converting operand 1 from fixed point to
11961      floating point.  */
11962
11963   if (!is_sse
11964       && (fpcmp_mode == CCFPUmode
11965           || (op_mode == XFmode
11966               && ! (standard_80387_constant_p (op0) == 1
11967                     || standard_80387_constant_p (op1) == 1)
11968               && GET_CODE (op1) != FLOAT)
11969           || ix86_use_fcomi_compare (code)))
11970     {
11971       op0 = force_reg (op_mode, op0);
11972       op1 = force_reg (op_mode, op1);
11973     }
11974   else
11975     {
11976       /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
11977          things around if they appear profitable, otherwise force op0
11978          into a register.  */
11979
11980       if (standard_80387_constant_p (op0) == 0
11981           || (MEM_P (op0)
11982               && ! (standard_80387_constant_p (op1) == 0
11983                     || MEM_P (op1))))
11984         {
11985           rtx tmp;
11986           tmp = op0, op0 = op1, op1 = tmp;
11987           code = swap_condition (code);
11988         }
11989
11990       if (!REG_P (op0))
11991         op0 = force_reg (op_mode, op0);
11992
11993       if (CONSTANT_P (op1))
11994         {
11995           int tmp = standard_80387_constant_p (op1);
11996           if (tmp == 0)
11997             op1 = validize_mem (force_const_mem (op_mode, op1));
11998           else if (tmp == 1)
11999             {
12000               if (TARGET_CMOVE)
12001                 op1 = force_reg (op_mode, op1);
12002             }
12003           else
12004             op1 = force_reg (op_mode, op1);
12005         }
12006     }
12007
12008   /* Try to rearrange the comparison to make it cheaper.  */
12009   if (ix86_fp_comparison_cost (code)
12010       > ix86_fp_comparison_cost (swap_condition (code))
12011       && (REG_P (op1) || can_create_pseudo_p ()))
12012     {
12013       rtx tmp;
12014       tmp = op0, op0 = op1, op1 = tmp;
12015       code = swap_condition (code);
12016       if (!REG_P (op0))
12017         op0 = force_reg (op_mode, op0);
12018     }
12019
12020   *pop0 = op0;
12021   *pop1 = op1;
12022   return code;
12023 }
12024
12025 /* Convert comparison codes we use to represent FP comparison to integer
12026    code that will result in proper branch.  Return UNKNOWN if no such code
12027    is available.  */
12028
12029 enum rtx_code
12030 ix86_fp_compare_code_to_integer (enum rtx_code code)
12031 {
12032   switch (code)
12033     {
12034     case GT:
12035       return GTU;
12036     case GE:
12037       return GEU;
12038     case ORDERED:
12039     case UNORDERED:
12040       return code;
12041       break;
12042     case UNEQ:
12043       return EQ;
12044       break;
12045     case UNLT:
12046       return LTU;
12047       break;
12048     case UNLE:
12049       return LEU;
12050       break;
12051     case LTGT:
12052       return NE;
12053       break;
12054     default:
12055       return UNKNOWN;
12056     }
12057 }
12058
12059 /* Generate insn patterns to do a floating point compare of OPERANDS.  */
12060
12061 static rtx
12062 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
12063                         rtx *second_test, rtx *bypass_test)
12064 {
12065   enum machine_mode fpcmp_mode, intcmp_mode;
12066   rtx tmp, tmp2;
12067   int cost = ix86_fp_comparison_cost (code);
12068   enum rtx_code bypass_code, first_code, second_code;
12069
12070   fpcmp_mode = ix86_fp_compare_mode (code);
12071   code = ix86_prepare_fp_compare_args (code, &op0, &op1);
12072
12073   if (second_test)
12074     *second_test = NULL_RTX;
12075   if (bypass_test)
12076     *bypass_test = NULL_RTX;
12077
12078   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12079
12080   /* Do fcomi/sahf based test when profitable.  */
12081   if (ix86_fp_comparison_arithmetics_cost (code) > cost
12082       && (bypass_code == UNKNOWN || bypass_test)
12083       && (second_code == UNKNOWN || second_test))
12084     {
12085       tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
12086       tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
12087                          tmp);
12088       if (TARGET_CMOVE)
12089         emit_insn (tmp);
12090       else
12091         {
12092           gcc_assert (TARGET_SAHF);
12093
12094           if (!scratch)
12095             scratch = gen_reg_rtx (HImode);
12096           tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
12097
12098           emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
12099         }
12100
12101       /* The FP codes work out to act like unsigned.  */
12102       intcmp_mode = fpcmp_mode;
12103       code = first_code;
12104       if (bypass_code != UNKNOWN)
12105         *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
12106                                        gen_rtx_REG (intcmp_mode, FLAGS_REG),
12107                                        const0_rtx);
12108       if (second_code != UNKNOWN)
12109         *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
12110                                        gen_rtx_REG (intcmp_mode, FLAGS_REG),
12111                                        const0_rtx);
12112     }
12113   else
12114     {
12115       /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
12116       tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
12117       tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
12118       if (!scratch)
12119         scratch = gen_reg_rtx (HImode);
12120       emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
12121
12122       /* In the unordered case, we have to check C2 for NaN's, which
12123          doesn't happen to work out to anything nice combination-wise.
12124          So do some bit twiddling on the value we've got in AH to come
12125          up with an appropriate set of condition codes.  */
12126
12127       intcmp_mode = CCNOmode;
12128       switch (code)
12129         {
12130         case GT:
12131         case UNGT:
12132           if (code == GT || !TARGET_IEEE_FP)
12133             {
12134               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
12135               code = EQ;
12136             }
12137           else
12138             {
12139               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12140               emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
12141               emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
12142               intcmp_mode = CCmode;
12143               code = GEU;
12144             }
12145           break;
12146         case LT:
12147         case UNLT:
12148           if (code == LT && TARGET_IEEE_FP)
12149             {
12150               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12151               emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
12152               intcmp_mode = CCmode;
12153               code = EQ;
12154             }
12155           else
12156             {
12157               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
12158               code = NE;
12159             }
12160           break;
12161         case GE:
12162         case UNGE:
12163           if (code == GE || !TARGET_IEEE_FP)
12164             {
12165               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
12166               code = EQ;
12167             }
12168           else
12169             {
12170               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12171               emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12172                                              GEN_INT (0x01)));
12173               code = NE;
12174             }
12175           break;
12176         case LE:
12177         case UNLE:
12178           if (code == LE && TARGET_IEEE_FP)
12179             {
12180               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12181               emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
12182               emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
12183               intcmp_mode = CCmode;
12184               code = LTU;
12185             }
12186           else
12187             {
12188               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
12189               code = NE;
12190             }
12191           break;
12192         case EQ:
12193         case UNEQ:
12194           if (code == EQ && TARGET_IEEE_FP)
12195             {
12196               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12197               emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
12198               intcmp_mode = CCmode;
12199               code = EQ;
12200             }
12201           else
12202             {
12203               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12204               code = NE;
12205               break;
12206             }
12207           break;
12208         case NE:
12209         case LTGT:
12210           if (code == NE && TARGET_IEEE_FP)
12211             {
12212               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12213               emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12214                                              GEN_INT (0x40)));
12215               code = NE;
12216             }
12217           else
12218             {
12219               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12220               code = EQ;
12221             }
12222           break;
12223
12224         case UNORDERED:
12225           emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12226           code = NE;
12227           break;
12228         case ORDERED:
12229           emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12230           code = EQ;
12231           break;
12232
12233         default:
12234           gcc_unreachable ();
12235         }
12236     }
12237
12238   /* Return the test that should be put into the flags user, i.e.
12239      the bcc, scc, or cmov instruction.  */
12240   return gen_rtx_fmt_ee (code, VOIDmode,
12241                          gen_rtx_REG (intcmp_mode, FLAGS_REG),
12242                          const0_rtx);
12243 }
12244
12245 rtx
12246 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
12247 {
12248   rtx op0, op1, ret;
12249   op0 = ix86_compare_op0;
12250   op1 = ix86_compare_op1;
12251
12252   if (second_test)
12253     *second_test = NULL_RTX;
12254   if (bypass_test)
12255     *bypass_test = NULL_RTX;
12256
12257   if (ix86_compare_emitted)
12258     {
12259       ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
12260       ix86_compare_emitted = NULL_RTX;
12261     }
12262   else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
12263     {
12264       gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
12265       ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12266                                     second_test, bypass_test);
12267     }
12268   else
12269     ret = ix86_expand_int_compare (code, op0, op1);
12270
12271   return ret;
12272 }
12273
12274 /* Return true if the CODE will result in nontrivial jump sequence.  */
12275 bool
12276 ix86_fp_jump_nontrivial_p (enum rtx_code code)
12277 {
12278   enum rtx_code bypass_code, first_code, second_code;
12279   if (!TARGET_CMOVE)
12280     return true;
12281   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12282   return bypass_code != UNKNOWN || second_code != UNKNOWN;
12283 }
12284
12285 void
12286 ix86_expand_branch (enum rtx_code code, rtx label)
12287 {
12288   rtx tmp;
12289
12290   /* If we have emitted a compare insn, go straight to simple.
12291      ix86_expand_compare won't emit anything if ix86_compare_emitted
12292      is non NULL.  */
12293   if (ix86_compare_emitted)
12294     goto simple;
12295
12296   switch (GET_MODE (ix86_compare_op0))
12297     {
12298     case QImode:
12299     case HImode:
12300     case SImode:
12301       simple:
12302       tmp = ix86_expand_compare (code, NULL, NULL);
12303       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12304                                   gen_rtx_LABEL_REF (VOIDmode, label),
12305                                   pc_rtx);
12306       emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
12307       return;
12308
12309     case SFmode:
12310     case DFmode:
12311     case XFmode:
12312       {
12313         rtvec vec;
12314         int use_fcomi;
12315         enum rtx_code bypass_code, first_code, second_code;
12316
12317         code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
12318                                              &ix86_compare_op1);
12319
12320         ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12321
12322         /* Check whether we will use the natural sequence with one jump.  If
12323            so, we can expand jump early.  Otherwise delay expansion by
12324            creating compound insn to not confuse optimizers.  */
12325         if (bypass_code == UNKNOWN && second_code == UNKNOWN)
12326           {
12327             ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
12328                                   gen_rtx_LABEL_REF (VOIDmode, label),
12329                                   pc_rtx, NULL_RTX, NULL_RTX);
12330           }
12331         else
12332           {
12333             tmp = gen_rtx_fmt_ee (code, VOIDmode,
12334                                   ix86_compare_op0, ix86_compare_op1);
12335             tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12336                                         gen_rtx_LABEL_REF (VOIDmode, label),
12337                                         pc_rtx);
12338             tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
12339
12340             use_fcomi = ix86_use_fcomi_compare (code);
12341             vec = rtvec_alloc (3 + !use_fcomi);
12342             RTVEC_ELT (vec, 0) = tmp;
12343             RTVEC_ELT (vec, 1)
12344               = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
12345             RTVEC_ELT (vec, 2)
12346               = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
12347             if (! use_fcomi)
12348               RTVEC_ELT (vec, 3)
12349                 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
12350
12351             emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
12352           }
12353         return;
12354       }
12355
12356     case DImode:
12357       if (TARGET_64BIT)
12358         goto simple;
12359     case TImode:
12360       /* Expand DImode branch into multiple compare+branch.  */
12361       {
12362         rtx lo[2], hi[2], label2;
12363         enum rtx_code code1, code2, code3;
12364         enum machine_mode submode;
12365
12366         if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
12367           {
12368             tmp = ix86_compare_op0;
12369             ix86_compare_op0 = ix86_compare_op1;
12370             ix86_compare_op1 = tmp;
12371             code = swap_condition (code);
12372           }
12373         if (GET_MODE (ix86_compare_op0) == DImode)
12374           {
12375             split_di (&ix86_compare_op0, 1, lo+0, hi+0);
12376             split_di (&ix86_compare_op1, 1, lo+1, hi+1);
12377             submode = SImode;
12378           }
12379         else
12380           {
12381             split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
12382             split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
12383             submode = DImode;
12384           }
12385
12386         /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12387            avoid two branches.  This costs one extra insn, so disable when
12388            optimizing for size.  */
12389
12390         if ((code == EQ || code == NE)
12391             && (!optimize_size
12392                 || hi[1] == const0_rtx || lo[1] == const0_rtx))
12393           {
12394             rtx xor0, xor1;
12395
12396             xor1 = hi[0];
12397             if (hi[1] != const0_rtx)
12398               xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
12399                                    NULL_RTX, 0, OPTAB_WIDEN);
12400
12401             xor0 = lo[0];
12402             if (lo[1] != const0_rtx)
12403               xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
12404                                    NULL_RTX, 0, OPTAB_WIDEN);
12405
12406             tmp = expand_binop (submode, ior_optab, xor1, xor0,
12407                                 NULL_RTX, 0, OPTAB_WIDEN);
12408
12409             ix86_compare_op0 = tmp;
12410             ix86_compare_op1 = const0_rtx;
12411             ix86_expand_branch (code, label);
12412             return;
12413           }
12414
12415         /* Otherwise, if we are doing less-than or greater-or-equal-than,
12416            op1 is a constant and the low word is zero, then we can just
12417            examine the high word.  Similarly for low word -1 and
12418            less-or-equal-than or greater-than.  */
12419
12420         if (CONST_INT_P (hi[1]))
12421           switch (code)
12422             {
12423             case LT: case LTU: case GE: case GEU:
12424               if (lo[1] == const0_rtx)
12425                 {
12426                   ix86_compare_op0 = hi[0];
12427                   ix86_compare_op1 = hi[1];
12428                   ix86_expand_branch (code, label);
12429                   return;
12430                 }
12431               break;
12432             case LE: case LEU: case GT: case GTU:
12433               if (lo[1] == constm1_rtx)
12434                 {
12435                   ix86_compare_op0 = hi[0];
12436                   ix86_compare_op1 = hi[1];
12437                   ix86_expand_branch (code, label);
12438                   return;
12439                 }
12440               break;
12441             default:
12442               break;
12443             }
12444
12445         /* Otherwise, we need two or three jumps.  */
12446
12447         label2 = gen_label_rtx ();
12448
12449         code1 = code;
12450         code2 = swap_condition (code);
12451         code3 = unsigned_condition (code);
12452
12453         switch (code)
12454           {
12455           case LT: case GT: case LTU: case GTU:
12456             break;
12457
12458           case LE:   code1 = LT;  code2 = GT;  break;
12459           case GE:   code1 = GT;  code2 = LT;  break;
12460           case LEU:  code1 = LTU; code2 = GTU; break;
12461           case GEU:  code1 = GTU; code2 = LTU; break;
12462
12463           case EQ:   code1 = UNKNOWN; code2 = NE;  break;
12464           case NE:   code2 = UNKNOWN; break;
12465
12466           default:
12467             gcc_unreachable ();
12468           }
12469
12470         /*
12471          * a < b =>
12472          *    if (hi(a) < hi(b)) goto true;
12473          *    if (hi(a) > hi(b)) goto false;
12474          *    if (lo(a) < lo(b)) goto true;
12475          *  false:
12476          */
12477
12478         ix86_compare_op0 = hi[0];
12479         ix86_compare_op1 = hi[1];
12480
12481         if (code1 != UNKNOWN)
12482           ix86_expand_branch (code1, label);
12483         if (code2 != UNKNOWN)
12484           ix86_expand_branch (code2, label2);
12485
12486         ix86_compare_op0 = lo[0];
12487         ix86_compare_op1 = lo[1];
12488         ix86_expand_branch (code3, label);
12489
12490         if (code2 != UNKNOWN)
12491           emit_label (label2);
12492         return;
12493       }
12494
12495     default:
12496       gcc_unreachable ();
12497     }
12498 }
12499
12500 /* Split branch based on floating point condition.  */
12501 void
12502 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
12503                       rtx target1, rtx target2, rtx tmp, rtx pushed)
12504 {
12505   rtx second, bypass;
12506   rtx label = NULL_RTX;
12507   rtx condition;
12508   int bypass_probability = -1, second_probability = -1, probability = -1;
12509   rtx i;
12510
12511   if (target2 != pc_rtx)
12512     {
12513       rtx tmp = target2;
12514       code = reverse_condition_maybe_unordered (code);
12515       target2 = target1;
12516       target1 = tmp;
12517     }
12518
12519   condition = ix86_expand_fp_compare (code, op1, op2,
12520                                       tmp, &second, &bypass);
12521
12522   /* Remove pushed operand from stack.  */
12523   if (pushed)
12524     ix86_free_from_memory (GET_MODE (pushed));
12525
12526   if (split_branch_probability >= 0)
12527     {
12528       /* Distribute the probabilities across the jumps.
12529          Assume the BYPASS and SECOND to be always test
12530          for UNORDERED.  */
12531       probability = split_branch_probability;
12532
12533       /* Value of 1 is low enough to make no need for probability
12534          to be updated.  Later we may run some experiments and see
12535          if unordered values are more frequent in practice.  */
12536       if (bypass)
12537         bypass_probability = 1;
12538       if (second)
12539         second_probability = 1;
12540     }
12541   if (bypass != NULL_RTX)
12542     {
12543       label = gen_label_rtx ();
12544       i = emit_jump_insn (gen_rtx_SET
12545                           (VOIDmode, pc_rtx,
12546                            gen_rtx_IF_THEN_ELSE (VOIDmode,
12547                                                  bypass,
12548                                                  gen_rtx_LABEL_REF (VOIDmode,
12549                                                                     label),
12550                                                  pc_rtx)));
12551       if (bypass_probability >= 0)
12552         REG_NOTES (i)
12553           = gen_rtx_EXPR_LIST (REG_BR_PROB,
12554                                GEN_INT (bypass_probability),
12555                                REG_NOTES (i));
12556     }
12557   i = emit_jump_insn (gen_rtx_SET
12558                       (VOIDmode, pc_rtx,
12559                        gen_rtx_IF_THEN_ELSE (VOIDmode,
12560                                              condition, target1, target2)));
12561   if (probability >= 0)
12562     REG_NOTES (i)
12563       = gen_rtx_EXPR_LIST (REG_BR_PROB,
12564                            GEN_INT (probability),
12565                            REG_NOTES (i));
12566   if (second != NULL_RTX)
12567     {
12568       i = emit_jump_insn (gen_rtx_SET
12569                           (VOIDmode, pc_rtx,
12570                            gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12571                                                  target2)));
12572       if (second_probability >= 0)
12573         REG_NOTES (i)
12574           = gen_rtx_EXPR_LIST (REG_BR_PROB,
12575                                GEN_INT (second_probability),
12576                                REG_NOTES (i));
12577     }
12578   if (label != NULL_RTX)
12579     emit_label (label);
12580 }
12581
12582 int
12583 ix86_expand_setcc (enum rtx_code code, rtx dest)
12584 {
12585   rtx ret, tmp, tmpreg, equiv;
12586   rtx second_test, bypass_test;
12587
12588   if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
12589     return 0; /* FAIL */
12590
12591   gcc_assert (GET_MODE (dest) == QImode);
12592
12593   ret = ix86_expand_compare (code, &second_test, &bypass_test);
12594   PUT_MODE (ret, QImode);
12595
12596   tmp = dest;
12597   tmpreg = dest;
12598
12599   emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
12600   if (bypass_test || second_test)
12601     {
12602       rtx test = second_test;
12603       int bypass = 0;
12604       rtx tmp2 = gen_reg_rtx (QImode);
12605       if (bypass_test)
12606         {
12607           gcc_assert (!second_test);
12608           test = bypass_test;
12609           bypass = 1;
12610           PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12611         }
12612       PUT_MODE (test, QImode);
12613       emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12614
12615       if (bypass)
12616         emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12617       else
12618         emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12619     }
12620
12621   /* Attach a REG_EQUAL note describing the comparison result.  */
12622   if (ix86_compare_op0 && ix86_compare_op1)
12623     {
12624       equiv = simplify_gen_relational (code, QImode,
12625                                        GET_MODE (ix86_compare_op0),
12626                                        ix86_compare_op0, ix86_compare_op1);
12627       set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12628     }
12629
12630   return 1; /* DONE */
12631 }
12632
12633 /* Expand comparison setting or clearing carry flag.  Return true when
12634    successful and set pop for the operation.  */
12635 static bool
12636 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12637 {
12638   enum machine_mode mode =
12639     GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12640
12641   /* Do not handle DImode compares that go through special path.  */
12642   if (mode == (TARGET_64BIT ? TImode : DImode))
12643     return false;
12644
12645   if (SCALAR_FLOAT_MODE_P (mode))
12646     {
12647       rtx second_test = NULL, bypass_test = NULL;
12648       rtx compare_op, compare_seq;
12649
12650       gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12651
12652       /* Shortcut:  following common codes never translate
12653          into carry flag compares.  */
12654       if (code == EQ || code == NE || code == UNEQ || code == LTGT
12655           || code == ORDERED || code == UNORDERED)
12656         return false;
12657
12658       /* These comparisons require zero flag; swap operands so they won't.  */
12659       if ((code == GT || code == UNLE || code == LE || code == UNGT)
12660           && !TARGET_IEEE_FP)
12661         {
12662           rtx tmp = op0;
12663           op0 = op1;
12664           op1 = tmp;
12665           code = swap_condition (code);
12666         }
12667
12668       /* Try to expand the comparison and verify that we end up with
12669          carry flag based comparison.  This fails to be true only when
12670          we decide to expand comparison using arithmetic that is not
12671          too common scenario.  */
12672       start_sequence ();
12673       compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12674                                            &second_test, &bypass_test);
12675       compare_seq = get_insns ();
12676       end_sequence ();
12677
12678       if (second_test || bypass_test)
12679         return false;
12680
12681       if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12682           || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12683         code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12684       else
12685         code = GET_CODE (compare_op);
12686
12687       if (code != LTU && code != GEU)
12688         return false;
12689
12690       emit_insn (compare_seq);
12691       *pop = compare_op;
12692       return true;
12693     }
12694
12695   if (!INTEGRAL_MODE_P (mode))
12696     return false;
12697
12698   switch (code)
12699     {
12700     case LTU:
12701     case GEU:
12702       break;
12703
12704     /* Convert a==0 into (unsigned)a<1.  */
12705     case EQ:
12706     case NE:
12707       if (op1 != const0_rtx)
12708         return false;
12709       op1 = const1_rtx;
12710       code = (code == EQ ? LTU : GEU);
12711       break;
12712
12713     /* Convert a>b into b<a or a>=b-1.  */
12714     case GTU:
12715     case LEU:
12716       if (CONST_INT_P (op1))
12717         {
12718           op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12719           /* Bail out on overflow.  We still can swap operands but that
12720              would force loading of the constant into register.  */
12721           if (op1 == const0_rtx
12722               || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12723             return false;
12724           code = (code == GTU ? GEU : LTU);
12725         }
12726       else
12727         {
12728           rtx tmp = op1;
12729           op1 = op0;
12730           op0 = tmp;
12731           code = (code == GTU ? LTU : GEU);
12732         }
12733       break;
12734
12735     /* Convert a>=0 into (unsigned)a<0x80000000.  */
12736     case LT:
12737     case GE:
12738       if (mode == DImode || op1 != const0_rtx)
12739         return false;
12740       op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12741       code = (code == LT ? GEU : LTU);
12742       break;
12743     case LE:
12744     case GT:
12745       if (mode == DImode || op1 != constm1_rtx)
12746         return false;
12747       op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12748       code = (code == LE ? GEU : LTU);
12749       break;
12750
12751     default:
12752       return false;
12753     }
12754   /* Swapping operands may cause constant to appear as first operand.  */
12755   if (!nonimmediate_operand (op0, VOIDmode))
12756     {
12757       if (!can_create_pseudo_p ())
12758         return false;
12759       op0 = force_reg (mode, op0);
12760     }
12761   ix86_compare_op0 = op0;
12762   ix86_compare_op1 = op1;
12763   *pop = ix86_expand_compare (code, NULL, NULL);
12764   gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12765   return true;
12766 }
12767
12768 int
12769 ix86_expand_int_movcc (rtx operands[])
12770 {
12771   enum rtx_code code = GET_CODE (operands[1]), compare_code;
12772   rtx compare_seq, compare_op;
12773   rtx second_test, bypass_test;
12774   enum machine_mode mode = GET_MODE (operands[0]);
12775   bool sign_bit_compare_p = false;;
12776
12777   start_sequence ();
12778   compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12779   compare_seq = get_insns ();
12780   end_sequence ();
12781
12782   compare_code = GET_CODE (compare_op);
12783
12784   if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12785       || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12786     sign_bit_compare_p = true;
12787
12788   /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12789      HImode insns, we'd be swallowed in word prefix ops.  */
12790
12791   if ((mode != HImode || TARGET_FAST_PREFIX)
12792       && (mode != (TARGET_64BIT ? TImode : DImode))
12793       && CONST_INT_P (operands[2])
12794       && CONST_INT_P (operands[3]))
12795     {
12796       rtx out = operands[0];
12797       HOST_WIDE_INT ct = INTVAL (operands[2]);
12798       HOST_WIDE_INT cf = INTVAL (operands[3]);
12799       HOST_WIDE_INT diff;
12800
12801       diff = ct - cf;
12802       /*  Sign bit compares are better done using shifts than we do by using
12803           sbb.  */
12804       if (sign_bit_compare_p
12805           || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12806                                              ix86_compare_op1, &compare_op))
12807         {
12808           /* Detect overlap between destination and compare sources.  */
12809           rtx tmp = out;
12810
12811           if (!sign_bit_compare_p)
12812             {
12813               bool fpcmp = false;
12814
12815               compare_code = GET_CODE (compare_op);
12816
12817               if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12818                   || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12819                 {
12820                   fpcmp = true;
12821                   compare_code = ix86_fp_compare_code_to_integer (compare_code);
12822                 }
12823
12824               /* To simplify rest of code, restrict to the GEU case.  */
12825               if (compare_code == LTU)
12826                 {
12827                   HOST_WIDE_INT tmp = ct;
12828                   ct = cf;
12829                   cf = tmp;
12830                   compare_code = reverse_condition (compare_code);
12831                   code = reverse_condition (code);
12832                 }
12833               else
12834                 {
12835                   if (fpcmp)
12836                     PUT_CODE (compare_op,
12837                               reverse_condition_maybe_unordered
12838                                 (GET_CODE (compare_op)));
12839                   else
12840                     PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12841                 }
12842               diff = ct - cf;
12843
12844               if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12845                   || reg_overlap_mentioned_p (out, ix86_compare_op1))
12846                 tmp = gen_reg_rtx (mode);
12847
12848               if (mode == DImode)
12849                 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12850               else
12851                 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12852             }
12853           else
12854             {
12855               if (code == GT || code == GE)
12856                 code = reverse_condition (code);
12857               else
12858                 {
12859                   HOST_WIDE_INT tmp = ct;
12860                   ct = cf;
12861                   cf = tmp;
12862                   diff = ct - cf;
12863                 }
12864               tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12865                                      ix86_compare_op1, VOIDmode, 0, -1);
12866             }
12867
12868           if (diff == 1)
12869             {
12870               /*
12871                * cmpl op0,op1
12872                * sbbl dest,dest
12873                * [addl dest, ct]
12874                *
12875                * Size 5 - 8.
12876                */
12877               if (ct)
12878                 tmp = expand_simple_binop (mode, PLUS,
12879                                            tmp, GEN_INT (ct),
12880                                            copy_rtx (tmp), 1, OPTAB_DIRECT);
12881             }
12882           else if (cf == -1)
12883             {
12884               /*
12885                * cmpl op0,op1
12886                * sbbl dest,dest
12887                * orl $ct, dest
12888                *
12889                * Size 8.
12890                */
12891               tmp = expand_simple_binop (mode, IOR,
12892                                          tmp, GEN_INT (ct),
12893                                          copy_rtx (tmp), 1, OPTAB_DIRECT);
12894             }
12895           else if (diff == -1 && ct)
12896             {
12897               /*
12898                * cmpl op0,op1
12899                * sbbl dest,dest
12900                * notl dest
12901                * [addl dest, cf]
12902                *
12903                * Size 8 - 11.
12904                */
12905               tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12906               if (cf)
12907                 tmp = expand_simple_binop (mode, PLUS,
12908                                            copy_rtx (tmp), GEN_INT (cf),
12909                                            copy_rtx (tmp), 1, OPTAB_DIRECT);
12910             }
12911           else
12912             {
12913               /*
12914                * cmpl op0,op1
12915                * sbbl dest,dest
12916                * [notl dest]
12917                * andl cf - ct, dest
12918                * [addl dest, ct]
12919                *
12920                * Size 8 - 11.
12921                */
12922
12923               if (cf == 0)
12924                 {
12925                   cf = ct;
12926                   ct = 0;
12927                   tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12928                 }
12929
12930               tmp = expand_simple_binop (mode, AND,
12931                                          copy_rtx (tmp),
12932                                          gen_int_mode (cf - ct, mode),
12933                                          copy_rtx (tmp), 1, OPTAB_DIRECT);
12934               if (ct)
12935                 tmp = expand_simple_binop (mode, PLUS,
12936                                            copy_rtx (tmp), GEN_INT (ct),
12937                                            copy_rtx (tmp), 1, OPTAB_DIRECT);
12938             }
12939
12940           if (!rtx_equal_p (tmp, out))
12941             emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12942
12943           return 1; /* DONE */
12944         }
12945
12946       if (diff < 0)
12947         {
12948           enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12949
12950           HOST_WIDE_INT tmp;
12951           tmp = ct, ct = cf, cf = tmp;
12952           diff = -diff;
12953
12954           if (SCALAR_FLOAT_MODE_P (cmp_mode))
12955             {
12956               gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12957
12958               /* We may be reversing unordered compare to normal compare, that
12959                  is not valid in general (we may convert non-trapping condition
12960                  to trapping one), however on i386 we currently emit all
12961                  comparisons unordered.  */
12962               compare_code = reverse_condition_maybe_unordered (compare_code);
12963               code = reverse_condition_maybe_unordered (code);
12964             }
12965           else
12966             {
12967               compare_code = reverse_condition (compare_code);
12968               code = reverse_condition (code);
12969             }
12970         }
12971
12972       compare_code = UNKNOWN;
12973       if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12974           && CONST_INT_P (ix86_compare_op1))
12975         {
12976           if (ix86_compare_op1 == const0_rtx
12977               && (code == LT || code == GE))
12978             compare_code = code;
12979           else if (ix86_compare_op1 == constm1_rtx)
12980             {
12981               if (code == LE)
12982                 compare_code = LT;
12983               else if (code == GT)
12984                 compare_code = GE;
12985             }
12986         }
12987
12988       /* Optimize dest = (op0 < 0) ? -1 : cf.  */
12989       if (compare_code != UNKNOWN
12990           && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12991           && (cf == -1 || ct == -1))
12992         {
12993           /* If lea code below could be used, only optimize
12994              if it results in a 2 insn sequence.  */
12995
12996           if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12997                  || diff == 3 || diff == 5 || diff == 9)
12998               || (compare_code == LT && ct == -1)
12999               || (compare_code == GE && cf == -1))
13000             {
13001               /*
13002                * notl op1       (if necessary)
13003                * sarl $31, op1
13004                * orl cf, op1
13005                */
13006               if (ct != -1)
13007                 {
13008                   cf = ct;
13009                   ct = -1;
13010                   code = reverse_condition (code);
13011                 }
13012
13013               out = emit_store_flag (out, code, ix86_compare_op0,
13014                                      ix86_compare_op1, VOIDmode, 0, -1);
13015
13016               out = expand_simple_binop (mode, IOR,
13017                                          out, GEN_INT (cf),
13018                                          out, 1, OPTAB_DIRECT);
13019               if (out != operands[0])
13020                 emit_move_insn (operands[0], out);
13021
13022               return 1; /* DONE */
13023             }
13024         }
13025
13026
13027       if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
13028            || diff == 3 || diff == 5 || diff == 9)
13029           && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
13030           && (mode != DImode
13031               || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
13032         {
13033           /*
13034            * xorl dest,dest
13035            * cmpl op1,op2
13036            * setcc dest
13037            * lea cf(dest*(ct-cf)),dest
13038            *
13039            * Size 14.
13040            *
13041            * This also catches the degenerate setcc-only case.
13042            */
13043
13044           rtx tmp;
13045           int nops;
13046
13047           out = emit_store_flag (out, code, ix86_compare_op0,
13048                                  ix86_compare_op1, VOIDmode, 0, 1);
13049
13050           nops = 0;
13051           /* On x86_64 the lea instruction operates on Pmode, so we need
13052              to get arithmetics done in proper mode to match.  */
13053           if (diff == 1)
13054             tmp = copy_rtx (out);
13055           else
13056             {
13057               rtx out1;
13058               out1 = copy_rtx (out);
13059               tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
13060               nops++;
13061               if (diff & 1)
13062                 {
13063                   tmp = gen_rtx_PLUS (mode, tmp, out1);
13064                   nops++;
13065                 }
13066             }
13067           if (cf != 0)
13068             {
13069               tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
13070               nops++;
13071             }
13072           if (!rtx_equal_p (tmp, out))
13073             {
13074               if (nops == 1)
13075                 out = force_operand (tmp, copy_rtx (out));
13076               else
13077                 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
13078             }
13079           if (!rtx_equal_p (out, operands[0]))
13080             emit_move_insn (operands[0], copy_rtx (out));
13081
13082           return 1; /* DONE */
13083         }
13084
13085       /*
13086        * General case:                  Jumpful:
13087        *   xorl dest,dest               cmpl op1, op2
13088        *   cmpl op1, op2                movl ct, dest
13089        *   setcc dest                   jcc 1f
13090        *   decl dest                    movl cf, dest
13091        *   andl (cf-ct),dest            1:
13092        *   addl ct,dest
13093        *
13094        * Size 20.                       Size 14.
13095        *
13096        * This is reasonably steep, but branch mispredict costs are
13097        * high on modern cpus, so consider failing only if optimizing
13098        * for space.
13099        */
13100
13101       if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
13102           && BRANCH_COST >= 2)
13103         {
13104           if (cf == 0)
13105             {
13106               enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
13107
13108               cf = ct;
13109               ct = 0;
13110
13111               if (SCALAR_FLOAT_MODE_P (cmp_mode))
13112                 {
13113                   gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
13114
13115                   /* We may be reversing unordered compare to normal compare,
13116                      that is not valid in general (we may convert non-trapping
13117                      condition to trapping one), however on i386 we currently
13118                      emit all comparisons unordered.  */
13119                   code = reverse_condition_maybe_unordered (code);
13120                 }
13121               else
13122                 {
13123                   code = reverse_condition (code);
13124                   if (compare_code != UNKNOWN)
13125                     compare_code = reverse_condition (compare_code);
13126                 }
13127             }
13128
13129           if (compare_code != UNKNOWN)
13130             {
13131               /* notl op1       (if needed)
13132                  sarl $31, op1
13133                  andl (cf-ct), op1
13134                  addl ct, op1
13135
13136                  For x < 0 (resp. x <= -1) there will be no notl,
13137                  so if possible swap the constants to get rid of the
13138                  complement.
13139                  True/false will be -1/0 while code below (store flag
13140                  followed by decrement) is 0/-1, so the constants need
13141                  to be exchanged once more.  */
13142
13143               if (compare_code == GE || !cf)
13144                 {
13145                   code = reverse_condition (code);
13146                   compare_code = LT;
13147                 }
13148               else
13149                 {
13150                   HOST_WIDE_INT tmp = cf;
13151                   cf = ct;
13152                   ct = tmp;
13153                 }
13154
13155               out = emit_store_flag (out, code, ix86_compare_op0,
13156                                      ix86_compare_op1, VOIDmode, 0, -1);
13157             }
13158           else
13159             {
13160               out = emit_store_flag (out, code, ix86_compare_op0,
13161                                      ix86_compare_op1, VOIDmode, 0, 1);
13162
13163               out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
13164                                          copy_rtx (out), 1, OPTAB_DIRECT);
13165             }
13166
13167           out = expand_simple_binop (mode, AND, copy_rtx (out),
13168                                      gen_int_mode (cf - ct, mode),
13169                                      copy_rtx (out), 1, OPTAB_DIRECT);
13170           if (ct)
13171             out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
13172                                        copy_rtx (out), 1, OPTAB_DIRECT);
13173           if (!rtx_equal_p (out, operands[0]))
13174             emit_move_insn (operands[0], copy_rtx (out));
13175
13176           return 1; /* DONE */
13177         }
13178     }
13179
13180   if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
13181     {
13182       /* Try a few things more with specific constants and a variable.  */
13183
13184       optab op;
13185       rtx var, orig_out, out, tmp;
13186
13187       if (BRANCH_COST <= 2)
13188         return 0; /* FAIL */
13189
13190       /* If one of the two operands is an interesting constant, load a
13191          constant with the above and mask it in with a logical operation.  */
13192
13193       if (CONST_INT_P (operands[2]))
13194         {
13195           var = operands[3];
13196           if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
13197             operands[3] = constm1_rtx, op = and_optab;
13198           else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
13199             operands[3] = const0_rtx, op = ior_optab;
13200           else
13201             return 0; /* FAIL */
13202         }
13203       else if (CONST_INT_P (operands[3]))
13204         {
13205           var = operands[2];
13206           if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
13207             operands[2] = constm1_rtx, op = and_optab;
13208           else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
13209             operands[2] = const0_rtx, op = ior_optab;
13210           else
13211             return 0; /* FAIL */
13212         }
13213       else
13214         return 0; /* FAIL */
13215
13216       orig_out = operands[0];
13217       tmp = gen_reg_rtx (mode);
13218       operands[0] = tmp;
13219
13220       /* Recurse to get the constant loaded.  */
13221       if (ix86_expand_int_movcc (operands) == 0)
13222         return 0; /* FAIL */
13223
13224       /* Mask in the interesting variable.  */
13225       out = expand_binop (mode, op, var, tmp, orig_out, 0,
13226                           OPTAB_WIDEN);
13227       if (!rtx_equal_p (out, orig_out))
13228         emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
13229
13230       return 1; /* DONE */
13231     }
13232
13233   /*
13234    * For comparison with above,
13235    *
13236    * movl cf,dest
13237    * movl ct,tmp
13238    * cmpl op1,op2
13239    * cmovcc tmp,dest
13240    *
13241    * Size 15.
13242    */
13243
13244   if (! nonimmediate_operand (operands[2], mode))
13245     operands[2] = force_reg (mode, operands[2]);
13246   if (! nonimmediate_operand (operands[3], mode))
13247     operands[3] = force_reg (mode, operands[3]);
13248
13249   if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13250     {
13251       rtx tmp = gen_reg_rtx (mode);
13252       emit_move_insn (tmp, operands[3]);
13253       operands[3] = tmp;
13254     }
13255   if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13256     {
13257       rtx tmp = gen_reg_rtx (mode);
13258       emit_move_insn (tmp, operands[2]);
13259       operands[2] = tmp;
13260     }
13261
13262   if (! register_operand (operands[2], VOIDmode)
13263       && (mode == QImode
13264           || ! register_operand (operands[3], VOIDmode)))
13265     operands[2] = force_reg (mode, operands[2]);
13266
13267   if (mode == QImode
13268       && ! register_operand (operands[3], VOIDmode))
13269     operands[3] = force_reg (mode, operands[3]);
13270
13271   emit_insn (compare_seq);
13272   emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13273                           gen_rtx_IF_THEN_ELSE (mode,
13274                                                 compare_op, operands[2],
13275                                                 operands[3])));
13276   if (bypass_test)
13277     emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13278                             gen_rtx_IF_THEN_ELSE (mode,
13279                                   bypass_test,
13280                                   copy_rtx (operands[3]),
13281                                   copy_rtx (operands[0]))));
13282   if (second_test)
13283     emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13284                             gen_rtx_IF_THEN_ELSE (mode,
13285                                   second_test,
13286                                   copy_rtx (operands[2]),
13287                                   copy_rtx (operands[0]))));
13288
13289   return 1; /* DONE */
13290 }
13291
13292 /* Swap, force into registers, or otherwise massage the two operands
13293    to an sse comparison with a mask result.  Thus we differ a bit from
13294    ix86_prepare_fp_compare_args which expects to produce a flags result.
13295
13296    The DEST operand exists to help determine whether to commute commutative
13297    operators.  The POP0/POP1 operands are updated in place.  The new
13298    comparison code is returned, or UNKNOWN if not implementable.  */
13299
13300 static enum rtx_code
13301 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
13302                                   rtx *pop0, rtx *pop1)
13303 {
13304   rtx tmp;
13305
13306   switch (code)
13307     {
13308     case LTGT:
13309     case UNEQ:
13310       /* We have no LTGT as an operator.  We could implement it with
13311          NE & ORDERED, but this requires an extra temporary.  It's
13312          not clear that it's worth it.  */
13313       return UNKNOWN;
13314
13315     case LT:
13316     case LE:
13317     case UNGT:
13318     case UNGE:
13319       /* These are supported directly.  */
13320       break;
13321
13322     case EQ:
13323     case NE:
13324     case UNORDERED:
13325     case ORDERED:
13326       /* For commutative operators, try to canonicalize the destination
13327          operand to be first in the comparison - this helps reload to
13328          avoid extra moves.  */
13329       if (!dest || !rtx_equal_p (dest, *pop1))
13330         break;
13331       /* FALLTHRU */
13332
13333     case GE:
13334     case GT:
13335     case UNLE:
13336     case UNLT:
13337       /* These are not supported directly.  Swap the comparison operands
13338          to transform into something that is supported.  */
13339       tmp = *pop0;
13340       *pop0 = *pop1;
13341       *pop1 = tmp;
13342       code = swap_condition (code);
13343       break;
13344
13345     default:
13346       gcc_unreachable ();
13347     }
13348
13349   return code;
13350 }
13351
13352 /* Detect conditional moves that exactly match min/max operational
13353    semantics.  Note that this is IEEE safe, as long as we don't
13354    interchange the operands.
13355
13356    Returns FALSE if this conditional move doesn't match a MIN/MAX,
13357    and TRUE if the operation is successful and instructions are emitted.  */
13358
13359 static bool
13360 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
13361                            rtx cmp_op1, rtx if_true, rtx if_false)
13362 {
13363   enum machine_mode mode;
13364   bool is_min;
13365   rtx tmp;
13366
13367   if (code == LT)
13368     ;
13369   else if (code == UNGE)
13370     {
13371       tmp = if_true;
13372       if_true = if_false;
13373       if_false = tmp;
13374     }
13375   else
13376     return false;
13377
13378   if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
13379     is_min = true;
13380   else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
13381     is_min = false;
13382   else
13383     return false;
13384
13385   mode = GET_MODE (dest);
13386
13387   /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13388      but MODE may be a vector mode and thus not appropriate.  */
13389   if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
13390     {
13391       int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
13392       rtvec v;
13393
13394       if_true = force_reg (mode, if_true);
13395       v = gen_rtvec (2, if_true, if_false);
13396       tmp = gen_rtx_UNSPEC (mode, v, u);
13397     }
13398   else
13399     {
13400       code = is_min ? SMIN : SMAX;
13401       tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
13402     }
13403
13404   emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
13405   return true;
13406 }
13407
13408 /* Expand an sse vector comparison.  Return the register with the result.  */
13409
13410 static rtx
13411 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
13412                      rtx op_true, rtx op_false)
13413 {
13414   enum machine_mode mode = GET_MODE (dest);
13415   rtx x;
13416
13417   cmp_op0 = force_reg (mode, cmp_op0);
13418   if (!nonimmediate_operand (cmp_op1, mode))
13419     cmp_op1 = force_reg (mode, cmp_op1);
13420
13421   if (optimize
13422       || reg_overlap_mentioned_p (dest, op_true)
13423       || reg_overlap_mentioned_p (dest, op_false))
13424     dest = gen_reg_rtx (mode);
13425
13426   x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
13427   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13428
13429   return dest;
13430 }
13431
13432 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13433    operations.  This is used for both scalar and vector conditional moves.  */
13434
13435 static void
13436 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
13437 {
13438   enum machine_mode mode = GET_MODE (dest);
13439   rtx t2, t3, x;
13440
13441   if (op_false == CONST0_RTX (mode))
13442     {
13443       op_true = force_reg (mode, op_true);
13444       x = gen_rtx_AND (mode, cmp, op_true);
13445       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13446     }
13447   else if (op_true == CONST0_RTX (mode))
13448     {
13449       op_false = force_reg (mode, op_false);
13450       x = gen_rtx_NOT (mode, cmp);
13451       x = gen_rtx_AND (mode, x, op_false);
13452       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13453     }
13454   else if (TARGET_SSE5)
13455     {
13456       rtx pcmov = gen_rtx_SET (mode, dest,
13457                                gen_rtx_IF_THEN_ELSE (mode, cmp,
13458                                                      op_true,
13459                                                      op_false));
13460       emit_insn (pcmov);
13461     }
13462   else
13463     {
13464       op_true = force_reg (mode, op_true);
13465       op_false = force_reg (mode, op_false);
13466
13467       t2 = gen_reg_rtx (mode);
13468       if (optimize)
13469         t3 = gen_reg_rtx (mode);
13470       else
13471         t3 = dest;
13472
13473       x = gen_rtx_AND (mode, op_true, cmp);
13474       emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13475
13476       x = gen_rtx_NOT (mode, cmp);
13477       x = gen_rtx_AND (mode, x, op_false);
13478       emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13479
13480       x = gen_rtx_IOR (mode, t3, t2);
13481       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13482     }
13483 }
13484
13485 /* Expand a floating-point conditional move.  Return true if successful.  */
13486
13487 int
13488 ix86_expand_fp_movcc (rtx operands[])
13489 {
13490   enum machine_mode mode = GET_MODE (operands[0]);
13491   enum rtx_code code = GET_CODE (operands[1]);
13492   rtx tmp, compare_op, second_test, bypass_test;
13493
13494   if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13495     {
13496       enum machine_mode cmode;
13497
13498       /* Since we've no cmove for sse registers, don't force bad register
13499          allocation just to gain access to it.  Deny movcc when the
13500          comparison mode doesn't match the move mode.  */
13501       cmode = GET_MODE (ix86_compare_op0);
13502       if (cmode == VOIDmode)
13503         cmode = GET_MODE (ix86_compare_op1);
13504       if (cmode != mode)
13505         return 0;
13506
13507       code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13508                                                &ix86_compare_op0,
13509                                                &ix86_compare_op1);
13510       if (code == UNKNOWN)
13511         return 0;
13512
13513       if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13514                                      ix86_compare_op1, operands[2],
13515                                      operands[3]))
13516         return 1;
13517
13518       tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13519                                  ix86_compare_op1, operands[2], operands[3]);
13520       ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
13521       return 1;
13522     }
13523
13524   /* The floating point conditional move instructions don't directly
13525      support conditions resulting from a signed integer comparison.  */
13526
13527   compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13528
13529   /* The floating point conditional move instructions don't directly
13530      support signed integer comparisons.  */
13531
13532   if (!fcmov_comparison_operator (compare_op, VOIDmode))
13533     {
13534       gcc_assert (!second_test && !bypass_test);
13535       tmp = gen_reg_rtx (QImode);
13536       ix86_expand_setcc (code, tmp);
13537       code = NE;
13538       ix86_compare_op0 = tmp;
13539       ix86_compare_op1 = const0_rtx;
13540       compare_op = ix86_expand_compare (code,  &second_test, &bypass_test);
13541     }
13542   if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13543     {
13544       tmp = gen_reg_rtx (mode);
13545       emit_move_insn (tmp, operands[3]);
13546       operands[3] = tmp;
13547     }
13548   if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13549     {
13550       tmp = gen_reg_rtx (mode);
13551       emit_move_insn (tmp, operands[2]);
13552       operands[2] = tmp;
13553     }
13554
13555   emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13556                           gen_rtx_IF_THEN_ELSE (mode, compare_op,
13557                                                 operands[2], operands[3])));
13558   if (bypass_test)
13559     emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13560                             gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13561                                                   operands[3], operands[0])));
13562   if (second_test)
13563     emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13564                             gen_rtx_IF_THEN_ELSE (mode, second_test,
13565                                                   operands[2], operands[0])));
13566
13567   return 1;
13568 }
13569
13570 /* Expand a floating-point vector conditional move; a vcond operation
13571    rather than a movcc operation.  */
13572
13573 bool
13574 ix86_expand_fp_vcond (rtx operands[])
13575 {
13576   enum rtx_code code = GET_CODE (operands[3]);
13577   rtx cmp;
13578
13579   code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13580                                            &operands[4], &operands[5]);
13581   if (code == UNKNOWN)
13582     return false;
13583
13584   if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13585                                  operands[5], operands[1], operands[2]))
13586     return true;
13587
13588   cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13589                              operands[1], operands[2]);
13590   ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13591   return true;
13592 }
13593
13594 /* Expand a signed/unsigned integral vector conditional move.  */
13595
13596 bool
13597 ix86_expand_int_vcond (rtx operands[])
13598 {
13599   enum machine_mode mode = GET_MODE (operands[0]);
13600   enum rtx_code code = GET_CODE (operands[3]);
13601   bool negate = false;
13602   rtx x, cop0, cop1;
13603
13604   cop0 = operands[4];
13605   cop1 = operands[5];
13606
13607   /* SSE5 supports all of the comparisons on all vector int types.  */
13608   if (!TARGET_SSE5)
13609     {
13610       /* Canonicalize the comparison to EQ, GT, GTU.  */
13611       switch (code)
13612         {
13613         case EQ:
13614         case GT:
13615         case GTU:
13616           break;
13617
13618         case NE:
13619         case LE:
13620         case LEU:
13621           code = reverse_condition (code);
13622           negate = true;
13623           break;
13624
13625         case GE:
13626         case GEU:
13627           code = reverse_condition (code);
13628           negate = true;
13629           /* FALLTHRU */
13630
13631         case LT:
13632         case LTU:
13633           code = swap_condition (code);
13634           x = cop0, cop0 = cop1, cop1 = x;
13635           break;
13636
13637         default:
13638           gcc_unreachable ();
13639         }
13640
13641       /* Only SSE4.1/SSE4.2 supports V2DImode.  */
13642       if (mode == V2DImode)
13643         {
13644           switch (code)
13645             {
13646             case EQ:
13647               /* SSE4.1 supports EQ.  */
13648               if (!TARGET_SSE4_1)
13649                 return false;
13650               break;
13651
13652             case GT:
13653             case GTU:
13654               /* SSE4.2 supports GT/GTU.  */
13655               if (!TARGET_SSE4_2)
13656                 return false;
13657               break;
13658
13659             default:
13660               gcc_unreachable ();
13661             }
13662         }
13663
13664       /* Unsigned parallel compare is not supported by the hardware.  Play some
13665          tricks to turn this into a signed comparison against 0.  */
13666       if (code == GTU)
13667         {
13668           cop0 = force_reg (mode, cop0);
13669
13670           switch (mode)
13671             {
13672             case V4SImode:
13673             case V2DImode:
13674               {
13675                 rtx t1, t2, mask;
13676
13677                 /* Perform a parallel modulo subtraction.  */
13678                 t1 = gen_reg_rtx (mode);
13679                 emit_insn ((mode == V4SImode
13680                             ? gen_subv4si3
13681                             : gen_subv2di3) (t1, cop0, cop1));
13682
13683                 /* Extract the original sign bit of op0.  */
13684                 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13685                                                 true, false);
13686                 t2 = gen_reg_rtx (mode);
13687                 emit_insn ((mode == V4SImode
13688                             ? gen_andv4si3
13689                             : gen_andv2di3) (t2, cop0, mask));
13690
13691                 /* XOR it back into the result of the subtraction.  This results
13692                    in the sign bit set iff we saw unsigned underflow.  */
13693                 x = gen_reg_rtx (mode);
13694                 emit_insn ((mode == V4SImode
13695                             ? gen_xorv4si3
13696                             : gen_xorv2di3) (x, t1, t2));
13697
13698                 code = GT;
13699               }
13700               break;
13701
13702             case V16QImode:
13703             case V8HImode:
13704               /* Perform a parallel unsigned saturating subtraction.  */
13705               x = gen_reg_rtx (mode);
13706               emit_insn (gen_rtx_SET (VOIDmode, x,
13707                                       gen_rtx_US_MINUS (mode, cop0, cop1)));
13708
13709               code = EQ;
13710               negate = !negate;
13711               break;
13712
13713             default:
13714               gcc_unreachable ();
13715             }
13716
13717           cop0 = x;
13718           cop1 = CONST0_RTX (mode);
13719         }
13720     }
13721
13722   x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13723                            operands[1+negate], operands[2-negate]);
13724
13725   ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13726                          operands[2-negate]);
13727   return true;
13728 }
13729
13730 /* Unpack OP[1] into the next wider integer vector type.  UNSIGNED_P is
13731    true if we should do zero extension, else sign extension.  HIGH_P is
13732    true if we want the N/2 high elements, else the low elements.  */
13733
13734 void
13735 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13736 {
13737   enum machine_mode imode = GET_MODE (operands[1]);
13738   rtx (*unpack)(rtx, rtx, rtx);
13739   rtx se, dest;
13740
13741   switch (imode)
13742     {
13743     case V16QImode:
13744       if (high_p)
13745         unpack = gen_vec_interleave_highv16qi;
13746       else
13747         unpack = gen_vec_interleave_lowv16qi;
13748       break;
13749     case V8HImode:
13750       if (high_p)
13751         unpack = gen_vec_interleave_highv8hi;
13752       else
13753         unpack = gen_vec_interleave_lowv8hi;
13754       break;
13755     case V4SImode:
13756       if (high_p)
13757         unpack = gen_vec_interleave_highv4si;
13758       else
13759         unpack = gen_vec_interleave_lowv4si;
13760       break;
13761     default:
13762       gcc_unreachable ();
13763     }
13764
13765   dest = gen_lowpart (imode, operands[0]);
13766
13767   if (unsigned_p)
13768     se = force_reg (imode, CONST0_RTX (imode));
13769   else
13770     se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13771                               operands[1], pc_rtx, pc_rtx);
13772
13773   emit_insn (unpack (dest, operands[1], se));
13774 }
13775
13776 /* This function performs the same task as ix86_expand_sse_unpack,
13777    but with SSE4.1 instructions.  */
13778
13779 void
13780 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13781 {
13782   enum machine_mode imode = GET_MODE (operands[1]);
13783   rtx (*unpack)(rtx, rtx);
13784   rtx src, dest;
13785
13786   switch (imode)
13787     {
13788     case V16QImode:
13789       if (unsigned_p)
13790         unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13791       else
13792         unpack = gen_sse4_1_extendv8qiv8hi2;
13793       break;
13794     case V8HImode:
13795       if (unsigned_p)
13796         unpack = gen_sse4_1_zero_extendv4hiv4si2;
13797       else
13798         unpack = gen_sse4_1_extendv4hiv4si2;
13799       break;
13800     case V4SImode:
13801       if (unsigned_p)
13802         unpack = gen_sse4_1_zero_extendv2siv2di2;
13803       else
13804         unpack = gen_sse4_1_extendv2siv2di2;
13805       break;
13806     default:
13807       gcc_unreachable ();
13808     }
13809
13810   dest = operands[0];
13811   if (high_p)
13812     {
13813       /* Shift higher 8 bytes to lower 8 bytes.  */
13814       src = gen_reg_rtx (imode);
13815       emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13816                                    gen_lowpart (TImode, operands[1]),
13817                                    GEN_INT (64)));
13818     }
13819   else
13820     src = operands[1];
13821
13822   emit_insn (unpack (dest, src));
13823 }
13824
13825 /* This function performs the same task as ix86_expand_sse_unpack,
13826    but with sse5 instructions.  */
13827
13828 void
13829 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13830 {
13831   enum machine_mode imode = GET_MODE (operands[1]);
13832   int pperm_bytes[16];
13833   int i;
13834   int h = (high_p) ? 8 : 0;
13835   int h2;
13836   int sign_extend;
13837   rtvec v = rtvec_alloc (16);
13838   rtvec vs;
13839   rtx x, p;
13840   rtx op0 = operands[0], op1 = operands[1];
13841
13842   switch (imode)
13843     {
13844     case V16QImode:
13845       vs = rtvec_alloc (8);
13846       h2 = (high_p) ? 8 : 0;
13847       for (i = 0; i < 8; i++)
13848         {
13849           pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
13850           pperm_bytes[2*i+1] = ((unsigned_p)
13851                                 ? PPERM_ZERO
13852                                 : PPERM_SIGN | PPERM_SRC2 | i | h);
13853         }
13854
13855       for (i = 0; i < 16; i++)
13856         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13857
13858       for (i = 0; i < 8; i++)
13859         RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13860
13861       p = gen_rtx_PARALLEL (VOIDmode, vs);
13862       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13863       if (unsigned_p)
13864         emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
13865       else
13866         emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
13867       break;
13868
13869     case V8HImode:
13870       vs = rtvec_alloc (4);
13871       h2 = (high_p) ? 4 : 0;
13872       for (i = 0; i < 4; i++)
13873         {
13874           sign_extend = ((unsigned_p)
13875                          ? PPERM_ZERO
13876                          : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
13877           pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
13878           pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
13879           pperm_bytes[4*i+2] = sign_extend;
13880           pperm_bytes[4*i+3] = sign_extend;
13881         }
13882
13883       for (i = 0; i < 16; i++)
13884         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13885
13886       for (i = 0; i < 4; i++)
13887         RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13888
13889       p = gen_rtx_PARALLEL (VOIDmode, vs);
13890       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13891       if (unsigned_p)
13892         emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
13893       else
13894         emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
13895       break;
13896
13897     case V4SImode:
13898       vs = rtvec_alloc (2);
13899       h2 = (high_p) ? 2 : 0;
13900       for (i = 0; i < 2; i++)
13901         {
13902           sign_extend = ((unsigned_p)
13903                          ? PPERM_ZERO
13904                          : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
13905           pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
13906           pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
13907           pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
13908           pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
13909           pperm_bytes[8*i+4] = sign_extend;
13910           pperm_bytes[8*i+5] = sign_extend;
13911           pperm_bytes[8*i+6] = sign_extend;
13912           pperm_bytes[8*i+7] = sign_extend;
13913         }
13914
13915       for (i = 0; i < 16; i++)
13916         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13917
13918       for (i = 0; i < 2; i++)
13919         RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13920
13921       p = gen_rtx_PARALLEL (VOIDmode, vs);
13922       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13923       if (unsigned_p)
13924         emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
13925       else
13926         emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
13927       break;
13928
13929     default:
13930       gcc_unreachable ();
13931     }
13932
13933   return;
13934 }
13935
13936 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13937    next narrower integer vector type */
13938 void
13939 ix86_expand_sse5_pack (rtx operands[3])
13940 {
13941   enum machine_mode imode = GET_MODE (operands[0]);
13942   int pperm_bytes[16];
13943   int i;
13944   rtvec v = rtvec_alloc (16);
13945   rtx x;
13946   rtx op0 = operands[0];
13947   rtx op1 = operands[1];
13948   rtx op2 = operands[2];
13949
13950   switch (imode)
13951     {
13952     case V16QImode:
13953       for (i = 0; i < 8; i++)
13954         {
13955           pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
13956           pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
13957         }
13958
13959       for (i = 0; i < 16; i++)
13960         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13961
13962       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13963       emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
13964       break;
13965
13966     case V8HImode:
13967       for (i = 0; i < 4; i++)
13968         {
13969           pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
13970           pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
13971           pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
13972           pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
13973         }
13974
13975       for (i = 0; i < 16; i++)
13976         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13977
13978       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13979       emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
13980       break;
13981
13982     case V4SImode:
13983       for (i = 0; i < 2; i++)
13984         {
13985           pperm_bytes[(4*i)+0]  = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
13986           pperm_bytes[(4*i)+1]  = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
13987           pperm_bytes[(4*i)+2]  = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
13988           pperm_bytes[(4*i)+3]  = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
13989           pperm_bytes[(4*i)+8]  = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
13990           pperm_bytes[(4*i)+9]  = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
13991           pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
13992           pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
13993         }
13994
13995       for (i = 0; i < 16; i++)
13996         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13997
13998       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13999       emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
14000       break;
14001
14002     default:
14003       gcc_unreachable ();
14004     }
14005
14006   return;
14007 }
14008
14009 /* Expand conditional increment or decrement using adb/sbb instructions.
14010    The default case using setcc followed by the conditional move can be
14011    done by generic code.  */
14012 int
14013 ix86_expand_int_addcc (rtx operands[])
14014 {
14015   enum rtx_code code = GET_CODE (operands[1]);
14016   rtx compare_op;
14017   rtx val = const0_rtx;
14018   bool fpcmp = false;
14019   enum machine_mode mode = GET_MODE (operands[0]);
14020
14021   if (operands[3] != const1_rtx
14022       && operands[3] != constm1_rtx)
14023     return 0;
14024   if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
14025                                        ix86_compare_op1, &compare_op))
14026      return 0;
14027   code = GET_CODE (compare_op);
14028
14029   if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14030       || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14031     {
14032       fpcmp = true;
14033       code = ix86_fp_compare_code_to_integer (code);
14034     }
14035
14036   if (code != LTU)
14037     {
14038       val = constm1_rtx;
14039       if (fpcmp)
14040         PUT_CODE (compare_op,
14041                   reverse_condition_maybe_unordered
14042                     (GET_CODE (compare_op)));
14043       else
14044         PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
14045     }
14046   PUT_MODE (compare_op, mode);
14047
14048   /* Construct either adc or sbb insn.  */
14049   if ((code == LTU) == (operands[3] == constm1_rtx))
14050     {
14051       switch (GET_MODE (operands[0]))
14052         {
14053           case QImode:
14054             emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
14055             break;
14056           case HImode:
14057             emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
14058             break;
14059           case SImode:
14060             emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
14061             break;
14062           case DImode:
14063             emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
14064             break;
14065           default:
14066             gcc_unreachable ();
14067         }
14068     }
14069   else
14070     {
14071       switch (GET_MODE (operands[0]))
14072         {
14073           case QImode:
14074             emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
14075             break;
14076           case HImode:
14077             emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
14078             break;
14079           case SImode:
14080             emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
14081             break;
14082           case DImode:
14083             emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
14084             break;
14085           default:
14086             gcc_unreachable ();
14087         }
14088     }
14089   return 1; /* DONE */
14090 }
14091
14092
14093 /* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
14094    works for floating pointer parameters and nonoffsetable memories.
14095    For pushes, it returns just stack offsets; the values will be saved
14096    in the right order.  Maximally three parts are generated.  */
14097
14098 static int
14099 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
14100 {
14101   int size;
14102
14103   if (!TARGET_64BIT)
14104     size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
14105   else
14106     size = (GET_MODE_SIZE (mode) + 4) / 8;
14107
14108   gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
14109   gcc_assert (size >= 2 && size <= 3);
14110
14111   /* Optimize constant pool reference to immediates.  This is used by fp
14112      moves, that force all constants to memory to allow combining.  */
14113   if (MEM_P (operand) && MEM_READONLY_P (operand))
14114     {
14115       rtx tmp = maybe_get_pool_constant (operand);
14116       if (tmp)
14117         operand = tmp;
14118     }
14119
14120   if (MEM_P (operand) && !offsettable_memref_p (operand))
14121     {
14122       /* The only non-offsetable memories we handle are pushes.  */
14123       int ok = push_operand (operand, VOIDmode);
14124
14125       gcc_assert (ok);
14126
14127       operand = copy_rtx (operand);
14128       PUT_MODE (operand, Pmode);
14129       parts[0] = parts[1] = parts[2] = operand;
14130       return size;
14131     }
14132
14133   if (GET_CODE (operand) == CONST_VECTOR)
14134     {
14135       enum machine_mode imode = int_mode_for_mode (mode);
14136       /* Caution: if we looked through a constant pool memory above,
14137          the operand may actually have a different mode now.  That's
14138          ok, since we want to pun this all the way back to an integer.  */
14139       operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
14140       gcc_assert (operand != NULL);
14141       mode = imode;
14142     }
14143
14144   if (!TARGET_64BIT)
14145     {
14146       if (mode == DImode)
14147         split_di (&operand, 1, &parts[0], &parts[1]);
14148       else
14149         {
14150           if (REG_P (operand))
14151             {
14152               gcc_assert (reload_completed);
14153               parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
14154               parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
14155               if (size == 3)
14156                 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
14157             }
14158           else if (offsettable_memref_p (operand))
14159             {
14160               operand = adjust_address (operand, SImode, 0);
14161               parts[0] = operand;
14162               parts[1] = adjust_address (operand, SImode, 4);
14163               if (size == 3)
14164                 parts[2] = adjust_address (operand, SImode, 8);
14165             }
14166           else if (GET_CODE (operand) == CONST_DOUBLE)
14167             {
14168               REAL_VALUE_TYPE r;
14169               long l[4];
14170
14171               REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14172               switch (mode)
14173                 {
14174                 case XFmode:
14175                   REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
14176                   parts[2] = gen_int_mode (l[2], SImode);
14177                   break;
14178                 case DFmode:
14179                   REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14180                   break;
14181                 default:
14182                   gcc_unreachable ();
14183                 }
14184               parts[1] = gen_int_mode (l[1], SImode);
14185               parts[0] = gen_int_mode (l[0], SImode);
14186             }
14187           else
14188             gcc_unreachable ();
14189         }
14190     }
14191   else
14192     {
14193       if (mode == TImode)
14194         split_ti (&operand, 1, &parts[0], &parts[1]);
14195       if (mode == XFmode || mode == TFmode)
14196         {
14197           enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
14198           if (REG_P (operand))
14199             {
14200               gcc_assert (reload_completed);
14201               parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
14202               parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
14203             }
14204           else if (offsettable_memref_p (operand))
14205             {
14206               operand = adjust_address (operand, DImode, 0);
14207               parts[0] = operand;
14208               parts[1] = adjust_address (operand, upper_mode, 8);
14209             }
14210           else if (GET_CODE (operand) == CONST_DOUBLE)
14211             {
14212               REAL_VALUE_TYPE r;
14213               long l[4];
14214
14215               REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14216               real_to_target (l, &r, mode);
14217
14218               /* Do not use shift by 32 to avoid warning on 32bit systems.  */
14219               if (HOST_BITS_PER_WIDE_INT >= 64)
14220                 parts[0]
14221                   = gen_int_mode
14222                       ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
14223                        + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
14224                        DImode);
14225               else
14226                 parts[0] = immed_double_const (l[0], l[1], DImode);
14227
14228               if (upper_mode == SImode)
14229                 parts[1] = gen_int_mode (l[2], SImode);
14230               else if (HOST_BITS_PER_WIDE_INT >= 64)
14231                 parts[1]
14232                   = gen_int_mode
14233                       ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
14234                        + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
14235                        DImode);
14236               else
14237                 parts[1] = immed_double_const (l[2], l[3], DImode);
14238             }
14239           else
14240             gcc_unreachable ();
14241         }
14242     }
14243
14244   return size;
14245 }
14246
14247 /* Emit insns to perform a move or push of DI, DF, and XF values.
14248    Return false when normal moves are needed; true when all required
14249    insns have been emitted.  Operands 2-4 contain the input values
14250    int the correct order; operands 5-7 contain the output values.  */
14251
14252 void
14253 ix86_split_long_move (rtx operands[])
14254 {
14255   rtx part[2][3];
14256   int nparts;
14257   int push = 0;
14258   int collisions = 0;
14259   enum machine_mode mode = GET_MODE (operands[0]);
14260
14261   /* The DFmode expanders may ask us to move double.
14262      For 64bit target this is single move.  By hiding the fact
14263      here we simplify i386.md splitters.  */
14264   if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
14265     {
14266       /* Optimize constant pool reference to immediates.  This is used by
14267          fp moves, that force all constants to memory to allow combining.  */
14268
14269       if (MEM_P (operands[1])
14270           && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
14271           && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
14272         operands[1] = get_pool_constant (XEXP (operands[1], 0));
14273       if (push_operand (operands[0], VOIDmode))
14274         {
14275           operands[0] = copy_rtx (operands[0]);
14276           PUT_MODE (operands[0], Pmode);
14277         }
14278       else
14279         operands[0] = gen_lowpart (DImode, operands[0]);
14280       operands[1] = gen_lowpart (DImode, operands[1]);
14281       emit_move_insn (operands[0], operands[1]);
14282       return;
14283     }
14284
14285   /* The only non-offsettable memory we handle is push.  */
14286   if (push_operand (operands[0], VOIDmode))
14287     push = 1;
14288   else
14289     gcc_assert (!MEM_P (operands[0])
14290                 || offsettable_memref_p (operands[0]));
14291
14292   nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
14293   ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
14294
14295   /* When emitting push, take care for source operands on the stack.  */
14296   if (push && MEM_P (operands[1])
14297       && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
14298     {
14299       if (nparts == 3)
14300         part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
14301                                      XEXP (part[1][2], 0));
14302       part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
14303                                    XEXP (part[1][1], 0));
14304     }
14305
14306   /* We need to do copy in the right order in case an address register
14307      of the source overlaps the destination.  */
14308   if (REG_P (part[0][0]) && MEM_P (part[1][0]))
14309     {
14310       if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
14311         collisions++;
14312       if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14313         collisions++;
14314       if (nparts == 3
14315           && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
14316         collisions++;
14317
14318       /* Collision in the middle part can be handled by reordering.  */
14319       if (collisions == 1 && nparts == 3
14320           && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14321         {
14322           rtx tmp;
14323           tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
14324           tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
14325         }
14326
14327       /* If there are more collisions, we can't handle it by reordering.
14328          Do an lea to the last part and use only one colliding move.  */
14329       else if (collisions > 1)
14330         {
14331           rtx base;
14332
14333           collisions = 1;
14334
14335           base = part[0][nparts - 1];
14336
14337           /* Handle the case when the last part isn't valid for lea.
14338              Happens in 64-bit mode storing the 12-byte XFmode.  */
14339           if (GET_MODE (base) != Pmode)
14340             base = gen_rtx_REG (Pmode, REGNO (base));
14341
14342           emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
14343           part[1][0] = replace_equiv_address (part[1][0], base);
14344           part[1][1] = replace_equiv_address (part[1][1],
14345                                       plus_constant (base, UNITS_PER_WORD));
14346           if (nparts == 3)
14347             part[1][2] = replace_equiv_address (part[1][2],
14348                                       plus_constant (base, 8));
14349         }
14350     }
14351
14352   if (push)
14353     {
14354       if (!TARGET_64BIT)
14355         {
14356           if (nparts == 3)
14357             {
14358               if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
14359                 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
14360               emit_move_insn (part[0][2], part[1][2]);
14361             }
14362         }
14363       else
14364         {
14365           /* In 64bit mode we don't have 32bit push available.  In case this is
14366              register, it is OK - we will just use larger counterpart.  We also
14367              retype memory - these comes from attempt to avoid REX prefix on
14368              moving of second half of TFmode value.  */
14369           if (GET_MODE (part[1][1]) == SImode)
14370             {
14371               switch (GET_CODE (part[1][1]))
14372                 {
14373                 case MEM:
14374                   part[1][1] = adjust_address (part[1][1], DImode, 0);
14375                   break;
14376
14377                 case REG:
14378                   part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
14379                   break;
14380
14381                 default:
14382                   gcc_unreachable ();
14383                 }
14384
14385               if (GET_MODE (part[1][0]) == SImode)
14386                 part[1][0] = part[1][1];
14387             }
14388         }
14389       emit_move_insn (part[0][1], part[1][1]);
14390       emit_move_insn (part[0][0], part[1][0]);
14391       return;
14392     }
14393
14394   /* Choose correct order to not overwrite the source before it is copied.  */
14395   if ((REG_P (part[0][0])
14396        && REG_P (part[1][1])
14397        && (REGNO (part[0][0]) == REGNO (part[1][1])
14398            || (nparts == 3
14399                && REGNO (part[0][0]) == REGNO (part[1][2]))))
14400       || (collisions > 0
14401           && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
14402     {
14403       if (nparts == 3)
14404         {
14405           operands[2] = part[0][2];
14406           operands[3] = part[0][1];
14407           operands[4] = part[0][0];
14408           operands[5] = part[1][2];
14409           operands[6] = part[1][1];
14410           operands[7] = part[1][0];
14411         }
14412       else
14413         {
14414           operands[2] = part[0][1];
14415           operands[3] = part[0][0];
14416           operands[5] = part[1][1];
14417           operands[6] = part[1][0];
14418         }
14419     }
14420   else
14421     {
14422       if (nparts == 3)
14423         {
14424           operands[2] = part[0][0];
14425           operands[3] = part[0][1];
14426           operands[4] = part[0][2];
14427           operands[5] = part[1][0];
14428           operands[6] = part[1][1];
14429           operands[7] = part[1][2];
14430         }
14431       else
14432         {
14433           operands[2] = part[0][0];
14434           operands[3] = part[0][1];
14435           operands[5] = part[1][0];
14436           operands[6] = part[1][1];
14437         }
14438     }
14439
14440   /* If optimizing for size, attempt to locally unCSE nonzero constants.  */
14441   if (optimize_size)
14442     {
14443       if (CONST_INT_P (operands[5])
14444           && operands[5] != const0_rtx
14445           && REG_P (operands[2]))
14446         {
14447           if (CONST_INT_P (operands[6])
14448               && INTVAL (operands[6]) == INTVAL (operands[5]))
14449             operands[6] = operands[2];
14450
14451           if (nparts == 3
14452               && CONST_INT_P (operands[7])
14453               && INTVAL (operands[7]) == INTVAL (operands[5]))
14454             operands[7] = operands[2];
14455         }
14456
14457       if (nparts == 3
14458           && CONST_INT_P (operands[6])
14459           && operands[6] != const0_rtx
14460           && REG_P (operands[3])
14461           && CONST_INT_P (operands[7])
14462           && INTVAL (operands[7]) == INTVAL (operands[6]))
14463         operands[7] = operands[3];
14464     }
14465
14466   emit_move_insn (operands[2], operands[5]);
14467   emit_move_insn (operands[3], operands[6]);
14468   if (nparts == 3)
14469     emit_move_insn (operands[4], operands[7]);
14470
14471   return;
14472 }
14473
14474 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14475    left shift by a constant, either using a single shift or
14476    a sequence of add instructions.  */
14477
14478 static void
14479 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
14480 {
14481   if (count == 1)
14482     {
14483       emit_insn ((mode == DImode
14484                   ? gen_addsi3
14485                   : gen_adddi3) (operand, operand, operand));
14486     }
14487   else if (!optimize_size
14488            && count * ix86_cost->add <= ix86_cost->shift_const)
14489     {
14490       int i;
14491       for (i=0; i<count; i++)
14492         {
14493           emit_insn ((mode == DImode
14494                       ? gen_addsi3
14495                       : gen_adddi3) (operand, operand, operand));
14496         }
14497     }
14498   else
14499     emit_insn ((mode == DImode
14500                 ? gen_ashlsi3
14501                 : gen_ashldi3) (operand, operand, GEN_INT (count)));
14502 }
14503
14504 void
14505 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
14506 {
14507   rtx low[2], high[2];
14508   int count;
14509   const int single_width = mode == DImode ? 32 : 64;
14510
14511   if (CONST_INT_P (operands[2]))
14512     {
14513       (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14514       count = INTVAL (operands[2]) & (single_width * 2 - 1);
14515
14516       if (count >= single_width)
14517         {
14518           emit_move_insn (high[0], low[1]);
14519           emit_move_insn (low[0], const0_rtx);
14520
14521           if (count > single_width)
14522             ix86_expand_ashl_const (high[0], count - single_width, mode);
14523         }
14524       else
14525         {
14526           if (!rtx_equal_p (operands[0], operands[1]))
14527             emit_move_insn (operands[0], operands[1]);
14528           emit_insn ((mode == DImode
14529                      ? gen_x86_shld_1
14530                      : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
14531           ix86_expand_ashl_const (low[0], count, mode);
14532         }
14533       return;
14534     }
14535
14536   (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14537
14538   if (operands[1] == const1_rtx)
14539     {
14540       /* Assuming we've chosen a QImode capable registers, then 1 << N
14541          can be done with two 32/64-bit shifts, no branches, no cmoves.  */
14542       if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
14543         {
14544           rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
14545
14546           ix86_expand_clear (low[0]);
14547           ix86_expand_clear (high[0]);
14548           emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
14549
14550           d = gen_lowpart (QImode, low[0]);
14551           d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14552           s = gen_rtx_EQ (QImode, flags, const0_rtx);
14553           emit_insn (gen_rtx_SET (VOIDmode, d, s));
14554
14555           d = gen_lowpart (QImode, high[0]);
14556           d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14557           s = gen_rtx_NE (QImode, flags, const0_rtx);
14558           emit_insn (gen_rtx_SET (VOIDmode, d, s));
14559         }
14560
14561       /* Otherwise, we can get the same results by manually performing
14562          a bit extract operation on bit 5/6, and then performing the two
14563          shifts.  The two methods of getting 0/1 into low/high are exactly
14564          the same size.  Avoiding the shift in the bit extract case helps
14565          pentium4 a bit; no one else seems to care much either way.  */
14566       else
14567         {
14568           rtx x;
14569
14570           if (TARGET_PARTIAL_REG_STALL && !optimize_size)
14571             x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
14572           else
14573             x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
14574           emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
14575
14576           emit_insn ((mode == DImode
14577                       ? gen_lshrsi3
14578                       : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
14579           emit_insn ((mode == DImode
14580                       ? gen_andsi3
14581                       : gen_anddi3) (high[0], high[0], GEN_INT (1)));
14582           emit_move_insn (low[0], high[0]);
14583           emit_insn ((mode == DImode
14584                       ? gen_xorsi3
14585                       : gen_xordi3) (low[0], low[0], GEN_INT (1)));
14586         }
14587
14588       emit_insn ((mode == DImode
14589                     ? gen_ashlsi3
14590                     : gen_ashldi3) (low[0], low[0], operands[2]));
14591       emit_insn ((mode == DImode
14592                     ? gen_ashlsi3
14593                     : gen_ashldi3) (high[0], high[0], operands[2]));
14594       return;
14595     }
14596
14597   if (operands[1] == constm1_rtx)
14598     {
14599       /* For -1 << N, we can avoid the shld instruction, because we
14600          know that we're shifting 0...31/63 ones into a -1.  */
14601       emit_move_insn (low[0], constm1_rtx);
14602       if (optimize_size)
14603         emit_move_insn (high[0], low[0]);
14604       else
14605         emit_move_insn (high[0], constm1_rtx);
14606     }
14607   else
14608     {
14609       if (!rtx_equal_p (operands[0], operands[1]))
14610         emit_move_insn (operands[0], operands[1]);
14611
14612       (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14613       emit_insn ((mode == DImode
14614                   ? gen_x86_shld_1
14615                   : gen_x86_64_shld) (high[0], low[0], operands[2]));
14616     }
14617
14618   emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
14619
14620   if (TARGET_CMOVE && scratch)
14621     {
14622       ix86_expand_clear (scratch);
14623       emit_insn ((mode == DImode
14624                   ? gen_x86_shift_adj_1
14625                   : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
14626     }
14627   else
14628     emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
14629 }
14630
14631 void
14632 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
14633 {
14634   rtx low[2], high[2];
14635   int count;
14636   const int single_width = mode == DImode ? 32 : 64;
14637
14638   if (CONST_INT_P (operands[2]))
14639     {
14640       (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14641       count = INTVAL (operands[2]) & (single_width * 2 - 1);
14642
14643       if (count == single_width * 2 - 1)
14644         {
14645           emit_move_insn (high[0], high[1]);
14646           emit_insn ((mode == DImode
14647                       ? gen_ashrsi3
14648                       : gen_ashrdi3) (high[0], high[0],
14649                                       GEN_INT (single_width - 1)));
14650           emit_move_insn (low[0], high[0]);
14651
14652         }
14653       else if (count >= single_width)
14654         {
14655           emit_move_insn (low[0], high[1]);
14656           emit_move_insn (high[0], low[0]);
14657           emit_insn ((mode == DImode
14658                       ? gen_ashrsi3
14659                       : gen_ashrdi3) (high[0], high[0],
14660                                       GEN_INT (single_width - 1)));
14661           if (count > single_width)
14662             emit_insn ((mode == DImode
14663                         ? gen_ashrsi3
14664                         : gen_ashrdi3) (low[0], low[0],
14665                                         GEN_INT (count - single_width)));
14666         }
14667       else
14668         {
14669           if (!rtx_equal_p (operands[0], operands[1]))
14670             emit_move_insn (operands[0], operands[1]);
14671           emit_insn ((mode == DImode
14672                       ? gen_x86_shrd_1
14673                       : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14674           emit_insn ((mode == DImode
14675                       ? gen_ashrsi3
14676                       : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
14677         }
14678     }
14679   else
14680     {
14681       if (!rtx_equal_p (operands[0], operands[1]))
14682         emit_move_insn (operands[0], operands[1]);
14683
14684       (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14685
14686       emit_insn ((mode == DImode
14687                   ? gen_x86_shrd_1
14688                   : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14689       emit_insn ((mode == DImode
14690                   ? gen_ashrsi3
14691                   : gen_ashrdi3)  (high[0], high[0], operands[2]));
14692
14693       if (TARGET_CMOVE && scratch)
14694         {
14695           emit_move_insn (scratch, high[0]);
14696           emit_insn ((mode == DImode
14697                       ? gen_ashrsi3
14698                       : gen_ashrdi3) (scratch, scratch,
14699                                       GEN_INT (single_width - 1)));
14700           emit_insn ((mode == DImode
14701                       ? gen_x86_shift_adj_1
14702                       : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14703                                          scratch));
14704         }
14705       else
14706         emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
14707     }
14708 }
14709
14710 void
14711 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
14712 {
14713   rtx low[2], high[2];
14714   int count;
14715   const int single_width = mode == DImode ? 32 : 64;
14716
14717   if (CONST_INT_P (operands[2]))
14718     {
14719       (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14720       count = INTVAL (operands[2]) & (single_width * 2 - 1);
14721
14722       if (count >= single_width)
14723         {
14724           emit_move_insn (low[0], high[1]);
14725           ix86_expand_clear (high[0]);
14726
14727           if (count > single_width)
14728             emit_insn ((mode == DImode
14729                         ? gen_lshrsi3
14730                         : gen_lshrdi3) (low[0], low[0],
14731                                         GEN_INT (count - single_width)));
14732         }
14733       else
14734         {
14735           if (!rtx_equal_p (operands[0], operands[1]))
14736             emit_move_insn (operands[0], operands[1]);
14737           emit_insn ((mode == DImode
14738                       ? gen_x86_shrd_1
14739                       : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14740           emit_insn ((mode == DImode
14741                       ? gen_lshrsi3
14742                       : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
14743         }
14744     }
14745   else
14746     {
14747       if (!rtx_equal_p (operands[0], operands[1]))
14748         emit_move_insn (operands[0], operands[1]);
14749
14750       (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14751
14752       emit_insn ((mode == DImode
14753                   ? gen_x86_shrd_1
14754                   : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14755       emit_insn ((mode == DImode
14756                   ? gen_lshrsi3
14757                   : gen_lshrdi3) (high[0], high[0], operands[2]));
14758
14759       /* Heh.  By reversing the arguments, we can reuse this pattern.  */
14760       if (TARGET_CMOVE && scratch)
14761         {
14762           ix86_expand_clear (scratch);
14763           emit_insn ((mode == DImode
14764                       ? gen_x86_shift_adj_1
14765                       : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14766                                                scratch));
14767         }
14768       else
14769         emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
14770     }
14771 }
14772
14773 /* Predict just emitted jump instruction to be taken with probability PROB.  */
14774 static void
14775 predict_jump (int prob)
14776 {
14777   rtx insn = get_last_insn ();
14778   gcc_assert (JUMP_P (insn));
14779   REG_NOTES (insn)
14780     = gen_rtx_EXPR_LIST (REG_BR_PROB,
14781                          GEN_INT (prob),
14782                          REG_NOTES (insn));
14783 }
14784
14785 /* Helper function for the string operations below.  Dest VARIABLE whether
14786    it is aligned to VALUE bytes.  If true, jump to the label.  */
14787 static rtx
14788 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
14789 {
14790   rtx label = gen_label_rtx ();
14791   rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14792   if (GET_MODE (variable) == DImode)
14793     emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14794   else
14795     emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14796   emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
14797                            1, label);
14798   if (epilogue)
14799     predict_jump (REG_BR_PROB_BASE * 50 / 100);
14800   else
14801     predict_jump (REG_BR_PROB_BASE * 90 / 100);
14802   return label;
14803 }
14804
14805 /* Adjust COUNTER by the VALUE.  */
14806 static void
14807 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14808 {
14809   if (GET_MODE (countreg) == DImode)
14810     emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14811   else
14812     emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14813 }
14814
14815 /* Zero extend possibly SImode EXP to Pmode register.  */
14816 rtx
14817 ix86_zero_extend_to_Pmode (rtx exp)
14818 {
14819   rtx r;
14820   if (GET_MODE (exp) == VOIDmode)
14821     return force_reg (Pmode, exp);
14822   if (GET_MODE (exp) == Pmode)
14823     return copy_to_mode_reg (Pmode, exp);
14824   r = gen_reg_rtx (Pmode);
14825   emit_insn (gen_zero_extendsidi2 (r, exp));
14826   return r;
14827 }
14828
14829 /* Divide COUNTREG by SCALE.  */
14830 static rtx
14831 scale_counter (rtx countreg, int scale)
14832 {
14833   rtx sc;
14834   rtx piece_size_mask;
14835
14836   if (scale == 1)
14837     return countreg;
14838   if (CONST_INT_P (countreg))
14839     return GEN_INT (INTVAL (countreg) / scale);
14840   gcc_assert (REG_P (countreg));
14841
14842   piece_size_mask = GEN_INT (scale - 1);
14843   sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14844                             GEN_INT (exact_log2 (scale)),
14845                             NULL, 1, OPTAB_DIRECT);
14846   return sc;
14847 }
14848
14849 /* Return mode for the memcpy/memset loop counter.  Prefer SImode over
14850    DImode for constant loop counts.  */
14851
14852 static enum machine_mode
14853 counter_mode (rtx count_exp)
14854 {
14855   if (GET_MODE (count_exp) != VOIDmode)
14856     return GET_MODE (count_exp);
14857   if (GET_CODE (count_exp) != CONST_INT)
14858     return Pmode;
14859   if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14860     return DImode;
14861   return SImode;
14862 }
14863
14864 /* When SRCPTR is non-NULL, output simple loop to move memory
14865    pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14866    overall size is COUNT specified in bytes.  When SRCPTR is NULL, output the
14867    equivalent loop to set memory by VALUE (supposed to be in MODE).
14868
14869    The size is rounded down to whole number of chunk size moved at once.
14870    SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info.  */
14871
14872
14873 static void
14874 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14875                                rtx destptr, rtx srcptr, rtx value,
14876                                rtx count, enum machine_mode mode, int unroll,
14877                                int expected_size)
14878 {
14879   rtx out_label, top_label, iter, tmp;
14880   enum machine_mode iter_mode = counter_mode (count);
14881   rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14882   rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14883   rtx size;
14884   rtx x_addr;
14885   rtx y_addr;
14886   int i;
14887
14888   top_label = gen_label_rtx ();
14889   out_label = gen_label_rtx ();
14890   iter = gen_reg_rtx (iter_mode);
14891
14892   size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14893                               NULL, 1, OPTAB_DIRECT);
14894   /* Those two should combine.  */
14895   if (piece_size == const1_rtx)
14896     {
14897       emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14898                                true, out_label);
14899       predict_jump (REG_BR_PROB_BASE * 10 / 100);
14900     }
14901   emit_move_insn (iter, const0_rtx);
14902
14903   emit_label (top_label);
14904
14905   tmp = convert_modes (Pmode, iter_mode, iter, true);
14906   x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14907   destmem = change_address (destmem, mode, x_addr);
14908
14909   if (srcmem)
14910     {
14911       y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14912       srcmem = change_address (srcmem, mode, y_addr);
14913
14914       /* When unrolling for chips that reorder memory reads and writes,
14915          we can save registers by using single temporary.
14916          Also using 4 temporaries is overkill in 32bit mode.  */
14917       if (!TARGET_64BIT && 0)
14918         {
14919           for (i = 0; i < unroll; i++)
14920             {
14921               if (i)
14922                 {
14923                   destmem =
14924                     adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14925                   srcmem =
14926                     adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14927                 }
14928               emit_move_insn (destmem, srcmem);
14929             }
14930         }
14931       else
14932         {
14933           rtx tmpreg[4];
14934           gcc_assert (unroll <= 4);
14935           for (i = 0; i < unroll; i++)
14936             {
14937               tmpreg[i] = gen_reg_rtx (mode);
14938               if (i)
14939                 {
14940                   srcmem =
14941                     adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14942                 }
14943               emit_move_insn (tmpreg[i], srcmem);
14944             }
14945           for (i = 0; i < unroll; i++)
14946             {
14947               if (i)
14948                 {
14949                   destmem =
14950                     adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14951                 }
14952               emit_move_insn (destmem, tmpreg[i]);
14953             }
14954         }
14955     }
14956   else
14957     for (i = 0; i < unroll; i++)
14958       {
14959         if (i)
14960           destmem =
14961             adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14962         emit_move_insn (destmem, value);
14963       }
14964
14965   tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14966                              true, OPTAB_LIB_WIDEN);
14967   if (tmp != iter)
14968     emit_move_insn (iter, tmp);
14969
14970   emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14971                            true, top_label);
14972   if (expected_size != -1)
14973     {
14974       expected_size /= GET_MODE_SIZE (mode) * unroll;
14975       if (expected_size == 0)
14976         predict_jump (0);
14977       else if (expected_size > REG_BR_PROB_BASE)
14978         predict_jump (REG_BR_PROB_BASE - 1);
14979       else
14980         predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14981     }
14982   else
14983     predict_jump (REG_BR_PROB_BASE * 80 / 100);
14984   iter = ix86_zero_extend_to_Pmode (iter);
14985   tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14986                              true, OPTAB_LIB_WIDEN);
14987   if (tmp != destptr)
14988     emit_move_insn (destptr, tmp);
14989   if (srcptr)
14990     {
14991       tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14992                                  true, OPTAB_LIB_WIDEN);
14993       if (tmp != srcptr)
14994         emit_move_insn (srcptr, tmp);
14995     }
14996   emit_label (out_label);
14997 }
14998
14999 /* Output "rep; mov" instruction.
15000    Arguments have same meaning as for previous function */
15001 static void
15002 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
15003                            rtx destptr, rtx srcptr,
15004                            rtx count,
15005                            enum machine_mode mode)
15006 {
15007   rtx destexp;
15008   rtx srcexp;
15009   rtx countreg;
15010
15011   /* If the size is known, it is shorter to use rep movs.  */
15012   if (mode == QImode && CONST_INT_P (count)
15013       && !(INTVAL (count) & 3))
15014     mode = SImode;
15015
15016   if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
15017     destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
15018   if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
15019     srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
15020   countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
15021   if (mode != QImode)
15022     {
15023       destexp = gen_rtx_ASHIFT (Pmode, countreg,
15024                                 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
15025       destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
15026       srcexp = gen_rtx_ASHIFT (Pmode, countreg,
15027                                GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
15028       srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
15029     }
15030   else
15031     {
15032       destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
15033       srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
15034     }
15035   emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
15036                           destexp, srcexp));
15037 }
15038
15039 /* Output "rep; stos" instruction.
15040    Arguments have same meaning as for previous function */
15041 static void
15042 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
15043                             rtx count,
15044                             enum machine_mode mode)
15045 {
15046   rtx destexp;
15047   rtx countreg;
15048
15049   if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
15050     destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
15051   value = force_reg (mode, gen_lowpart (mode, value));
15052   countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
15053   if (mode != QImode)
15054     {
15055       destexp = gen_rtx_ASHIFT (Pmode, countreg,
15056                                 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
15057       destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
15058     }
15059   else
15060     destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
15061   emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
15062 }
15063
15064 static void
15065 emit_strmov (rtx destmem, rtx srcmem,
15066              rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
15067 {
15068   rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
15069   rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
15070   emit_insn (gen_strmov (destptr, dest, srcptr, src));
15071 }
15072
15073 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST.  */
15074 static void
15075 expand_movmem_epilogue (rtx destmem, rtx srcmem,
15076                         rtx destptr, rtx srcptr, rtx count, int max_size)
15077 {
15078   rtx src, dest;
15079   if (CONST_INT_P (count))
15080     {
15081       HOST_WIDE_INT countval = INTVAL (count);
15082       int offset = 0;
15083
15084       if ((countval & 0x10) && max_size > 16)
15085         {
15086           if (TARGET_64BIT)
15087             {
15088               emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
15089               emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
15090             }
15091           else
15092             gcc_unreachable ();
15093           offset += 16;
15094         }
15095       if ((countval & 0x08) && max_size > 8)
15096         {
15097           if (TARGET_64BIT)
15098             emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
15099           else
15100             {
15101               emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
15102               emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
15103             }
15104           offset += 8;
15105         }
15106       if ((countval & 0x04) && max_size > 4)
15107         {
15108           emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
15109           offset += 4;
15110         }
15111       if ((countval & 0x02) && max_size > 2)
15112         {
15113           emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
15114           offset += 2;
15115         }
15116       if ((countval & 0x01) && max_size > 1)
15117         {
15118           emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
15119           offset += 1;
15120         }
15121       return;
15122     }
15123   if (max_size > 8)
15124     {
15125       count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
15126                                     count, 1, OPTAB_DIRECT);
15127       expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
15128                                      count, QImode, 1, 4);
15129       return;
15130     }
15131
15132   /* When there are stringops, we can cheaply increase dest and src pointers.
15133      Otherwise we save code size by maintaining offset (zero is readily
15134      available from preceding rep operation) and using x86 addressing modes.
15135    */
15136   if (TARGET_SINGLE_STRINGOP)
15137     {
15138       if (max_size > 4)
15139         {
15140           rtx label = ix86_expand_aligntest (count, 4, true);
15141           src = change_address (srcmem, SImode, srcptr);
15142           dest = change_address (destmem, SImode, destptr);
15143           emit_insn (gen_strmov (destptr, dest, srcptr, src));
15144           emit_label (label);
15145           LABEL_NUSES (label) = 1;
15146         }
15147       if (max_size > 2)
15148         {
15149           rtx label = ix86_expand_aligntest (count, 2, true);
15150           src = change_address (srcmem, HImode, srcptr);
15151           dest = change_address (destmem, HImode, destptr);
15152           emit_insn (gen_strmov (destptr, dest, srcptr, src));
15153           emit_label (label);
15154           LABEL_NUSES (label) = 1;
15155         }
15156       if (max_size > 1)
15157         {
15158           rtx label = ix86_expand_aligntest (count, 1, true);
15159           src = change_address (srcmem, QImode, srcptr);
15160           dest = change_address (destmem, QImode, destptr);
15161           emit_insn (gen_strmov (destptr, dest, srcptr, src));
15162           emit_label (label);
15163           LABEL_NUSES (label) = 1;
15164         }
15165     }
15166   else
15167     {
15168       rtx offset = force_reg (Pmode, const0_rtx);
15169       rtx tmp;
15170
15171       if (max_size > 4)
15172         {
15173           rtx label = ix86_expand_aligntest (count, 4, true);
15174           src = change_address (srcmem, SImode, srcptr);
15175           dest = change_address (destmem, SImode, destptr);
15176           emit_move_insn (dest, src);
15177           tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
15178                                      true, OPTAB_LIB_WIDEN);
15179           if (tmp != offset)
15180             emit_move_insn (offset, tmp);
15181           emit_label (label);
15182           LABEL_NUSES (label) = 1;
15183         }
15184       if (max_size > 2)
15185         {
15186           rtx label = ix86_expand_aligntest (count, 2, true);
15187           tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15188           src = change_address (srcmem, HImode, tmp);
15189           tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15190           dest = change_address (destmem, HImode, tmp);
15191           emit_move_insn (dest, src);
15192           tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
15193                                      true, OPTAB_LIB_WIDEN);
15194           if (tmp != offset)
15195             emit_move_insn (offset, tmp);
15196           emit_label (label);
15197           LABEL_NUSES (label) = 1;
15198         }
15199       if (max_size > 1)
15200         {
15201           rtx label = ix86_expand_aligntest (count, 1, true);
15202           tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15203           src = change_address (srcmem, QImode, tmp);
15204           tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15205           dest = change_address (destmem, QImode, tmp);
15206           emit_move_insn (dest, src);
15207           emit_label (label);
15208           LABEL_NUSES (label) = 1;
15209         }
15210     }
15211 }
15212
15213 /* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
15214 static void
15215 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
15216                                  rtx count, int max_size)
15217 {
15218   count =
15219     expand_simple_binop (counter_mode (count), AND, count,
15220                          GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
15221   expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
15222                                  gen_lowpart (QImode, value), count, QImode,
15223                                  1, max_size / 2);
15224 }
15225
15226 /* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
15227 static void
15228 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
15229 {
15230   rtx dest;
15231
15232   if (CONST_INT_P (count))
15233     {
15234       HOST_WIDE_INT countval = INTVAL (count);
15235       int offset = 0;
15236
15237       if ((countval & 0x10) && max_size > 16)
15238         {
15239           if (TARGET_64BIT)
15240             {
15241               dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15242               emit_insn (gen_strset (destptr, dest, value));
15243               dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
15244               emit_insn (gen_strset (destptr, dest, value));
15245             }
15246           else
15247             gcc_unreachable ();
15248           offset += 16;
15249         }
15250       if ((countval & 0x08) && max_size > 8)
15251         {
15252           if (TARGET_64BIT)
15253             {
15254               dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15255               emit_insn (gen_strset (destptr, dest, value));
15256             }
15257           else
15258             {
15259               dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15260               emit_insn (gen_strset (destptr, dest, value));
15261               dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
15262               emit_insn (gen_strset (destptr, dest, value));
15263             }
15264           offset += 8;
15265         }
15266       if ((countval & 0x04) && max_size > 4)
15267         {
15268           dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15269           emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15270           offset += 4;
15271         }
15272       if ((countval & 0x02) && max_size > 2)
15273         {
15274           dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
15275           emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15276           offset += 2;
15277         }
15278       if ((countval & 0x01) && max_size > 1)
15279         {
15280           dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
15281           emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15282           offset += 1;
15283         }
15284       return;
15285     }
15286   if (max_size > 32)
15287     {
15288       expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
15289       return;
15290     }
15291   if (max_size > 16)
15292     {
15293       rtx label = ix86_expand_aligntest (count, 16, true);
15294       if (TARGET_64BIT)
15295         {
15296           dest = change_address (destmem, DImode, destptr);
15297           emit_insn (gen_strset (destptr, dest, value));
15298           emit_insn (gen_strset (destptr, dest, value));
15299         }
15300       else
15301         {
15302           dest = change_address (destmem, SImode, destptr);
15303           emit_insn (gen_strset (destptr, dest, value));
15304           emit_insn (gen_strset (destptr, dest, value));
15305           emit_insn (gen_strset (destptr, dest, value));
15306           emit_insn (gen_strset (destptr, dest, value));
15307         }
15308       emit_label (label);
15309       LABEL_NUSES (label) = 1;
15310     }
15311   if (max_size > 8)
15312     {
15313       rtx label = ix86_expand_aligntest (count, 8, true);
15314       if (TARGET_64BIT)
15315         {
15316           dest = change_address (destmem, DImode, destptr);
15317           emit_insn (gen_strset (destptr, dest, value));
15318         }
15319       else
15320         {
15321           dest = change_address (destmem, SImode, destptr);
15322           emit_insn (gen_strset (destptr, dest, value));
15323           emit_insn (gen_strset (destptr, dest, value));
15324         }
15325       emit_label (label);
15326       LABEL_NUSES (label) = 1;
15327     }
15328   if (max_size > 4)
15329     {
15330       rtx label = ix86_expand_aligntest (count, 4, true);
15331       dest = change_address (destmem, SImode, destptr);
15332       emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15333       emit_label (label);
15334       LABEL_NUSES (label) = 1;
15335     }
15336   if (max_size > 2)
15337     {
15338       rtx label = ix86_expand_aligntest (count, 2, true);
15339       dest = change_address (destmem, HImode, destptr);
15340       emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15341       emit_label (label);
15342       LABEL_NUSES (label) = 1;
15343     }
15344   if (max_size > 1)
15345     {
15346       rtx label = ix86_expand_aligntest (count, 1, true);
15347       dest = change_address (destmem, QImode, destptr);
15348       emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15349       emit_label (label);
15350       LABEL_NUSES (label) = 1;
15351     }
15352 }
15353
15354 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
15355    DESIRED_ALIGNMENT.  */
15356 static void
15357 expand_movmem_prologue (rtx destmem, rtx srcmem,
15358                         rtx destptr, rtx srcptr, rtx count,
15359                         int align, int desired_alignment)
15360 {
15361   if (align <= 1 && desired_alignment > 1)
15362     {
15363       rtx label = ix86_expand_aligntest (destptr, 1, false);
15364       srcmem = change_address (srcmem, QImode, srcptr);
15365       destmem = change_address (destmem, QImode, destptr);
15366       emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15367       ix86_adjust_counter (count, 1);
15368       emit_label (label);
15369       LABEL_NUSES (label) = 1;
15370     }
15371   if (align <= 2 && desired_alignment > 2)
15372     {
15373       rtx label = ix86_expand_aligntest (destptr, 2, false);
15374       srcmem = change_address (srcmem, HImode, srcptr);
15375       destmem = change_address (destmem, HImode, destptr);
15376       emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15377       ix86_adjust_counter (count, 2);
15378       emit_label (label);
15379       LABEL_NUSES (label) = 1;
15380     }
15381   if (align <= 4 && desired_alignment > 4)
15382     {
15383       rtx label = ix86_expand_aligntest (destptr, 4, false);
15384       srcmem = change_address (srcmem, SImode, srcptr);
15385       destmem = change_address (destmem, SImode, destptr);
15386       emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15387       ix86_adjust_counter (count, 4);
15388       emit_label (label);
15389       LABEL_NUSES (label) = 1;
15390     }
15391   gcc_assert (desired_alignment <= 8);
15392 }
15393
15394 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15395    DESIRED_ALIGNMENT.  */
15396 static void
15397 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
15398                         int align, int desired_alignment)
15399 {
15400   if (align <= 1 && desired_alignment > 1)
15401     {
15402       rtx label = ix86_expand_aligntest (destptr, 1, false);
15403       destmem = change_address (destmem, QImode, destptr);
15404       emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
15405       ix86_adjust_counter (count, 1);
15406       emit_label (label);
15407       LABEL_NUSES (label) = 1;
15408     }
15409   if (align <= 2 && desired_alignment > 2)
15410     {
15411       rtx label = ix86_expand_aligntest (destptr, 2, false);
15412       destmem = change_address (destmem, HImode, destptr);
15413       emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
15414       ix86_adjust_counter (count, 2);
15415       emit_label (label);
15416       LABEL_NUSES (label) = 1;
15417     }
15418   if (align <= 4 && desired_alignment > 4)
15419     {
15420       rtx label = ix86_expand_aligntest (destptr, 4, false);
15421       destmem = change_address (destmem, SImode, destptr);
15422       emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
15423       ix86_adjust_counter (count, 4);
15424       emit_label (label);
15425       LABEL_NUSES (label) = 1;
15426     }
15427   gcc_assert (desired_alignment <= 8);
15428 }
15429
15430 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation.  */
15431 static enum stringop_alg
15432 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
15433             int *dynamic_check)
15434 {
15435   const struct stringop_algs * algs;
15436   /* Algorithms using the rep prefix want at least edi and ecx;
15437      additionally, memset wants eax and memcpy wants esi.  Don't
15438      consider such algorithms if the user has appropriated those
15439      registers for their own purposes.  */
15440   bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
15441                              || (memset
15442                                  ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
15443
15444 #define ALG_USABLE_P(alg) (rep_prefix_usable                    \
15445                            || (alg != rep_prefix_1_byte         \
15446                                && alg != rep_prefix_4_byte      \
15447                                && alg != rep_prefix_8_byte))
15448
15449   *dynamic_check = -1;
15450   if (memset)
15451     algs = &ix86_cost->memset[TARGET_64BIT != 0];
15452   else
15453     algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
15454   if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
15455     return stringop_alg;
15456   /* rep; movq or rep; movl is the smallest variant.  */
15457   else if (optimize_size)
15458     {
15459       if (!count || (count & 3))
15460         return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
15461       else
15462         return rep_prefix_usable ? rep_prefix_4_byte : loop;
15463     }
15464   /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15465    */
15466   else if (expected_size != -1 && expected_size < 4)
15467     return loop_1_byte;
15468   else if (expected_size != -1)
15469     {
15470       unsigned int i;
15471       enum stringop_alg alg = libcall;
15472       for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15473         {
15474           /* We get here if the algorithms that were not libcall-based
15475              were rep-prefix based and we are unable to use rep prefixes
15476              based on global register usage.  Break out of the loop and
15477              use the heuristic below.  */
15478           if (algs->size[i].max == 0)
15479             break;
15480           if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
15481             {
15482               enum stringop_alg candidate = algs->size[i].alg;
15483
15484               if (candidate != libcall && ALG_USABLE_P (candidate))
15485                 alg = candidate;
15486               /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15487                  last non-libcall inline algorithm.  */
15488               if (TARGET_INLINE_ALL_STRINGOPS)
15489                 {
15490                   /* When the current size is best to be copied by a libcall,
15491                      but we are still forced to inline, run the heuristic below
15492                      that will pick code for medium sized blocks.  */
15493                   if (alg != libcall)
15494                     return alg;
15495                   break;
15496                 }
15497               else if (ALG_USABLE_P (candidate))
15498                 return candidate;
15499             }
15500         }
15501       gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
15502     }
15503   /* When asked to inline the call anyway, try to pick meaningful choice.
15504      We look for maximal size of block that is faster to copy by hand and
15505      take blocks of at most of that size guessing that average size will
15506      be roughly half of the block.
15507
15508      If this turns out to be bad, we might simply specify the preferred
15509      choice in ix86_costs.  */
15510   if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15511       && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
15512     {
15513       int max = -1;
15514       enum stringop_alg alg;
15515       int i;
15516       bool any_alg_usable_p = true;
15517
15518       for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15519         {
15520           enum stringop_alg candidate = algs->size[i].alg;
15521           any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
15522
15523           if (candidate != libcall && candidate
15524               && ALG_USABLE_P (candidate))
15525               max = algs->size[i].max;
15526         }
15527       /* If there aren't any usable algorithms, then recursing on
15528          smaller sizes isn't going to find anything.  Just return the
15529          simple byte-at-a-time copy loop.  */
15530       if (!any_alg_usable_p)
15531         {
15532           /* Pick something reasonable.  */
15533           if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15534             *dynamic_check = 128;
15535           return loop_1_byte;
15536         }
15537       if (max == -1)
15538         max = 4096;
15539       alg = decide_alg (count, max / 2, memset, dynamic_check);
15540       gcc_assert (*dynamic_check == -1);
15541       gcc_assert (alg != libcall);
15542       if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15543         *dynamic_check = max;
15544       return alg;
15545     }
15546   return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
15547 #undef ALG_USABLE_P
15548 }
15549
15550 /* Decide on alignment.  We know that the operand is already aligned to ALIGN
15551    (ALIGN can be based on profile feedback and thus it is not 100% guaranteed).  */
15552 static int
15553 decide_alignment (int align,
15554                   enum stringop_alg alg,
15555                   int expected_size)
15556 {
15557   int desired_align = 0;
15558   switch (alg)
15559     {
15560       case no_stringop:
15561         gcc_unreachable ();
15562       case loop:
15563       case unrolled_loop:
15564         desired_align = GET_MODE_SIZE (Pmode);
15565         break;
15566       case rep_prefix_8_byte:
15567         desired_align = 8;
15568         break;
15569       case rep_prefix_4_byte:
15570         /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15571            copying whole cacheline at once.  */
15572         if (TARGET_PENTIUMPRO)
15573           desired_align = 8;
15574         else
15575           desired_align = 4;
15576         break;
15577       case rep_prefix_1_byte:
15578         /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15579            copying whole cacheline at once.  */
15580         if (TARGET_PENTIUMPRO)
15581           desired_align = 8;
15582         else
15583           desired_align = 1;
15584         break;
15585       case loop_1_byte:
15586         desired_align = 1;
15587         break;
15588       case libcall:
15589         return 0;
15590     }
15591
15592   if (optimize_size)
15593     desired_align = 1;
15594   if (desired_align < align)
15595     desired_align = align;
15596   if (expected_size != -1 && expected_size < 4)
15597     desired_align = align;
15598   return desired_align;
15599 }
15600
15601 /* Return the smallest power of 2 greater than VAL.  */
15602 static int
15603 smallest_pow2_greater_than (int val)
15604 {
15605   int ret = 1;
15606   while (ret <= val)
15607     ret <<= 1;
15608   return ret;
15609 }
15610
15611 /* Expand string move (memcpy) operation.  Use i386 string operations when
15612    profitable.  expand_setmem contains similar code.  The code depends upon
15613    architecture, block size and alignment, but always has the same
15614    overall structure:
15615
15616    1) Prologue guard: Conditional that jumps up to epilogues for small
15617       blocks that can be handled by epilogue alone.  This is faster but
15618       also needed for correctness, since prologue assume the block is larger
15619       than the desired alignment.
15620
15621       Optional dynamic check for size and libcall for large
15622       blocks is emitted here too, with -minline-stringops-dynamically.
15623
15624    2) Prologue: copy first few bytes in order to get destination aligned
15625       to DESIRED_ALIGN.  It is emitted only when ALIGN is less than
15626       DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15627       We emit either a jump tree on power of two sized blocks, or a byte loop.
15628
15629    3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15630       with specified algorithm.
15631
15632    4) Epilogue: code copying tail of the block that is too small to be
15633       handled by main body (or up to size guarded by prologue guard).  */
15634
15635 int
15636 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
15637                     rtx expected_align_exp, rtx expected_size_exp)
15638 {
15639   rtx destreg;
15640   rtx srcreg;
15641   rtx label = NULL;
15642   rtx tmp;
15643   rtx jump_around_label = NULL;
15644   HOST_WIDE_INT align = 1;
15645   unsigned HOST_WIDE_INT count = 0;
15646   HOST_WIDE_INT expected_size = -1;
15647   int size_needed = 0, epilogue_size_needed;
15648   int desired_align = 0;
15649   enum stringop_alg alg;
15650   int dynamic_check;
15651
15652   if (CONST_INT_P (align_exp))
15653     align = INTVAL (align_exp);
15654   /* i386 can do misaligned access on reasonably increased cost.  */
15655   if (CONST_INT_P (expected_align_exp)
15656       && INTVAL (expected_align_exp) > align)
15657     align = INTVAL (expected_align_exp);
15658   if (CONST_INT_P (count_exp))
15659     count = expected_size = INTVAL (count_exp);
15660   if (CONST_INT_P (expected_size_exp) && count == 0)
15661     expected_size = INTVAL (expected_size_exp);
15662
15663   /* Make sure we don't need to care about overflow later on.  */
15664   if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15665     return 0;
15666
15667   /* Step 0: Decide on preferred algorithm, desired alignment and
15668      size of chunks to be copied by main loop.  */
15669
15670   alg = decide_alg (count, expected_size, false, &dynamic_check);
15671   desired_align = decide_alignment (align, alg, expected_size);
15672
15673   if (!TARGET_ALIGN_STRINGOPS)
15674     align = desired_align;
15675
15676   if (alg == libcall)
15677     return 0;
15678   gcc_assert (alg != no_stringop);
15679   if (!count)
15680     count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15681   destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15682   srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15683   switch (alg)
15684     {
15685     case libcall:
15686     case no_stringop:
15687       gcc_unreachable ();
15688     case loop:
15689       size_needed = GET_MODE_SIZE (Pmode);
15690       break;
15691     case unrolled_loop:
15692       size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15693       break;
15694     case rep_prefix_8_byte:
15695       size_needed = 8;
15696       break;
15697     case rep_prefix_4_byte:
15698       size_needed = 4;
15699       break;
15700     case rep_prefix_1_byte:
15701     case loop_1_byte:
15702       size_needed = 1;
15703       break;
15704     }
15705
15706   epilogue_size_needed = size_needed;
15707
15708   /* Step 1: Prologue guard.  */
15709
15710   /* Alignment code needs count to be in register.  */
15711   if (CONST_INT_P (count_exp) && desired_align > align)
15712     count_exp = force_reg (counter_mode (count_exp), count_exp);
15713   gcc_assert (desired_align >= 1 && align >= 1);
15714
15715   /* Ensure that alignment prologue won't copy past end of block.  */
15716   if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15717     {
15718       epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15719       /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15720          Make sure it is power of 2.  */
15721       epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15722
15723       if (CONST_INT_P (count_exp))
15724         {
15725           if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
15726             goto epilogue;
15727         }
15728       else
15729         {
15730           label = gen_label_rtx ();
15731           emit_cmp_and_jump_insns (count_exp,
15732                                    GEN_INT (epilogue_size_needed),
15733                                    LTU, 0, counter_mode (count_exp), 1, label);
15734           if (expected_size == -1 || expected_size < epilogue_size_needed)
15735             predict_jump (REG_BR_PROB_BASE * 60 / 100);
15736           else
15737             predict_jump (REG_BR_PROB_BASE * 20 / 100);
15738         }
15739     }
15740
15741   /* Emit code to decide on runtime whether library call or inline should be
15742      used.  */
15743   if (dynamic_check != -1)
15744     {
15745       if (CONST_INT_P (count_exp))
15746         {
15747           if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
15748             {
15749               emit_block_move_via_libcall (dst, src, count_exp, false);
15750               count_exp = const0_rtx;
15751               goto epilogue;
15752             }
15753         }
15754       else
15755         {
15756           rtx hot_label = gen_label_rtx ();
15757           jump_around_label = gen_label_rtx ();
15758           emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15759                                    LEU, 0, GET_MODE (count_exp), 1, hot_label);
15760           predict_jump (REG_BR_PROB_BASE * 90 / 100);
15761           emit_block_move_via_libcall (dst, src, count_exp, false);
15762           emit_jump (jump_around_label);
15763           emit_label (hot_label);
15764         }
15765     }
15766
15767   /* Step 2: Alignment prologue.  */
15768
15769   if (desired_align > align)
15770     {
15771       /* Except for the first move in epilogue, we no longer know
15772          constant offset in aliasing info.  It don't seems to worth
15773          the pain to maintain it for the first move, so throw away
15774          the info early.  */
15775       src = change_address (src, BLKmode, srcreg);
15776       dst = change_address (dst, BLKmode, destreg);
15777       expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15778                               desired_align);
15779     }
15780   if (label && size_needed == 1)
15781     {
15782       emit_label (label);
15783       LABEL_NUSES (label) = 1;
15784       label = NULL;
15785     }
15786
15787   /* Step 3: Main loop.  */
15788
15789   switch (alg)
15790     {
15791     case libcall:
15792     case no_stringop:
15793       gcc_unreachable ();
15794     case loop_1_byte:
15795       expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15796                                      count_exp, QImode, 1, expected_size);
15797       break;
15798     case loop:
15799       expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15800                                      count_exp, Pmode, 1, expected_size);
15801       break;
15802     case unrolled_loop:
15803       /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15804          registers for 4 temporaries anyway.  */
15805       expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15806                                      count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15807                                      expected_size);
15808       break;
15809     case rep_prefix_8_byte:
15810       expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15811                                  DImode);
15812       break;
15813     case rep_prefix_4_byte:
15814       expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15815                                  SImode);
15816       break;
15817     case rep_prefix_1_byte:
15818       expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15819                                  QImode);
15820       break;
15821     }
15822   /* Adjust properly the offset of src and dest memory for aliasing.  */
15823   if (CONST_INT_P (count_exp))
15824     {
15825       src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15826                                           (count / size_needed) * size_needed);
15827       dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15828                                           (count / size_needed) * size_needed);
15829     }
15830   else
15831     {
15832       src = change_address (src, BLKmode, srcreg);
15833       dst = change_address (dst, BLKmode, destreg);
15834     }
15835
15836   /* Step 4: Epilogue to copy the remaining bytes.  */
15837  epilogue:
15838   if (label)
15839     {
15840       /* When the main loop is done, COUNT_EXP might hold original count,
15841          while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15842          Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15843          bytes. Compensate if needed.  */
15844
15845       if (size_needed < epilogue_size_needed)
15846         {
15847           tmp =
15848             expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15849                                  GEN_INT (size_needed - 1), count_exp, 1,
15850                                  OPTAB_DIRECT);
15851           if (tmp != count_exp)
15852             emit_move_insn (count_exp, tmp);
15853         }
15854       emit_label (label);
15855       LABEL_NUSES (label) = 1;
15856     }
15857
15858   if (count_exp != const0_rtx && epilogue_size_needed > 1)
15859     expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15860                             epilogue_size_needed);
15861   if (jump_around_label)
15862     emit_label (jump_around_label);
15863   return 1;
15864 }
15865
15866 /* Helper function for memcpy.  For QImode value 0xXY produce
15867    0xXYXYXYXY of wide specified by MODE.  This is essentially
15868    a * 0x10101010, but we can do slightly better than
15869    synth_mult by unwinding the sequence by hand on CPUs with
15870    slow multiply.  */
15871 static rtx
15872 promote_duplicated_reg (enum machine_mode mode, rtx val)
15873 {
15874   enum machine_mode valmode = GET_MODE (val);
15875   rtx tmp;
15876   int nops = mode == DImode ? 3 : 2;
15877
15878   gcc_assert (mode == SImode || mode == DImode);
15879   if (val == const0_rtx)
15880     return copy_to_mode_reg (mode, const0_rtx);
15881   if (CONST_INT_P (val))
15882     {
15883       HOST_WIDE_INT v = INTVAL (val) & 255;
15884
15885       v |= v << 8;
15886       v |= v << 16;
15887       if (mode == DImode)
15888         v |= (v << 16) << 16;
15889       return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15890     }
15891
15892   if (valmode == VOIDmode)
15893     valmode = QImode;
15894   if (valmode != QImode)
15895     val = gen_lowpart (QImode, val);
15896   if (mode == QImode)
15897     return val;
15898   if (!TARGET_PARTIAL_REG_STALL)
15899     nops--;
15900   if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15901       + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15902       <= (ix86_cost->shift_const + ix86_cost->add) * nops
15903           + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15904     {
15905       rtx reg = convert_modes (mode, QImode, val, true);
15906       tmp = promote_duplicated_reg (mode, const1_rtx);
15907       return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15908                                   OPTAB_DIRECT);
15909     }
15910   else
15911     {
15912       rtx reg = convert_modes (mode, QImode, val, true);
15913
15914       if (!TARGET_PARTIAL_REG_STALL)
15915         if (mode == SImode)
15916           emit_insn (gen_movsi_insv_1 (reg, reg));
15917         else
15918           emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15919       else
15920         {
15921           tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15922                                      NULL, 1, OPTAB_DIRECT);
15923           reg =
15924             expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15925         }
15926       tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15927                                  NULL, 1, OPTAB_DIRECT);
15928       reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15929       if (mode == SImode)
15930         return reg;
15931       tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15932                                  NULL, 1, OPTAB_DIRECT);
15933       reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15934       return reg;
15935     }
15936 }
15937
15938 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15939    be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15940    alignment from ALIGN to DESIRED_ALIGN.  */
15941 static rtx
15942 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15943 {
15944   rtx promoted_val;
15945
15946   if (TARGET_64BIT
15947       && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15948     promoted_val = promote_duplicated_reg (DImode, val);
15949   else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15950     promoted_val = promote_duplicated_reg (SImode, val);
15951   else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15952     promoted_val = promote_duplicated_reg (HImode, val);
15953   else
15954     promoted_val = val;
15955
15956   return promoted_val;
15957 }
15958
15959 /* Expand string clear operation (bzero).  Use i386 string operations when
15960    profitable.  See expand_movmem comment for explanation of individual
15961    steps performed.  */
15962 int
15963 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15964                     rtx expected_align_exp, rtx expected_size_exp)
15965 {
15966   rtx destreg;
15967   rtx label = NULL;
15968   rtx tmp;
15969   rtx jump_around_label = NULL;
15970   HOST_WIDE_INT align = 1;
15971   unsigned HOST_WIDE_INT count = 0;
15972   HOST_WIDE_INT expected_size = -1;
15973   int size_needed = 0, epilogue_size_needed;
15974   int desired_align = 0;
15975   enum stringop_alg alg;
15976   rtx promoted_val = NULL;
15977   bool force_loopy_epilogue = false;
15978   int dynamic_check;
15979
15980   if (CONST_INT_P (align_exp))
15981     align = INTVAL (align_exp);
15982   /* i386 can do misaligned access on reasonably increased cost.  */
15983   if (CONST_INT_P (expected_align_exp)
15984       && INTVAL (expected_align_exp) > align)
15985     align = INTVAL (expected_align_exp);
15986   if (CONST_INT_P (count_exp))
15987     count = expected_size = INTVAL (count_exp);
15988   if (CONST_INT_P (expected_size_exp) && count == 0)
15989     expected_size = INTVAL (expected_size_exp);
15990
15991   /* Make sure we don't need to care about overflow later on.  */
15992   if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15993     return 0;
15994
15995   /* Step 0: Decide on preferred algorithm, desired alignment and
15996      size of chunks to be copied by main loop.  */
15997
15998   alg = decide_alg (count, expected_size, true, &dynamic_check);
15999   desired_align = decide_alignment (align, alg, expected_size);
16000
16001   if (!TARGET_ALIGN_STRINGOPS)
16002     align = desired_align;
16003
16004   if (alg == libcall)
16005     return 0;
16006   gcc_assert (alg != no_stringop);
16007   if (!count)
16008     count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
16009   destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
16010   switch (alg)
16011     {
16012     case libcall:
16013     case no_stringop:
16014       gcc_unreachable ();
16015     case loop:
16016       size_needed = GET_MODE_SIZE (Pmode);
16017       break;
16018     case unrolled_loop:
16019       size_needed = GET_MODE_SIZE (Pmode) * 4;
16020       break;
16021     case rep_prefix_8_byte:
16022       size_needed = 8;
16023       break;
16024     case rep_prefix_4_byte:
16025       size_needed = 4;
16026       break;
16027     case rep_prefix_1_byte:
16028     case loop_1_byte:
16029       size_needed = 1;
16030       break;
16031     }
16032   epilogue_size_needed = size_needed;
16033
16034   /* Step 1: Prologue guard.  */
16035
16036   /* Alignment code needs count to be in register.  */
16037   if (CONST_INT_P (count_exp) && desired_align > align)
16038     {
16039       enum machine_mode mode = SImode;
16040       if (TARGET_64BIT && (count & ~0xffffffff))
16041         mode = DImode;
16042       count_exp = force_reg (mode, count_exp);
16043     }
16044   /* Do the cheap promotion to allow better CSE across the
16045      main loop and epilogue (ie one load of the big constant in the
16046      front of all code.  */
16047   if (CONST_INT_P (val_exp))
16048     promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
16049                                                    desired_align, align);
16050   /* Ensure that alignment prologue won't copy past end of block.  */
16051   if (size_needed > 1 || (desired_align > 1 && desired_align > align))
16052     {
16053       epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
16054       /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
16055          Make sure it is power of 2.  */
16056       epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
16057
16058       /* To improve performance of small blocks, we jump around the VAL
16059          promoting mode.  This mean that if the promoted VAL is not constant,
16060          we might not use it in the epilogue and have to use byte
16061          loop variant.  */
16062       if (epilogue_size_needed > 2 && !promoted_val)
16063         force_loopy_epilogue = true;
16064       label = gen_label_rtx ();
16065       emit_cmp_and_jump_insns (count_exp,
16066                                GEN_INT (epilogue_size_needed),
16067                                LTU, 0, counter_mode (count_exp), 1, label);
16068       if (GET_CODE (count_exp) == CONST_INT)
16069         ;
16070       else if (expected_size == -1 || expected_size <= epilogue_size_needed)
16071         predict_jump (REG_BR_PROB_BASE * 60 / 100);
16072       else
16073         predict_jump (REG_BR_PROB_BASE * 20 / 100);
16074     }
16075   if (dynamic_check != -1)
16076     {
16077       rtx hot_label = gen_label_rtx ();
16078       jump_around_label = gen_label_rtx ();
16079       emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
16080                                LEU, 0, counter_mode (count_exp), 1, hot_label);
16081       predict_jump (REG_BR_PROB_BASE * 90 / 100);
16082       set_storage_via_libcall (dst, count_exp, val_exp, false);
16083       emit_jump (jump_around_label);
16084       emit_label (hot_label);
16085     }
16086
16087   /* Step 2: Alignment prologue.  */
16088
16089   /* Do the expensive promotion once we branched off the small blocks.  */
16090   if (!promoted_val)
16091     promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
16092                                                    desired_align, align);
16093   gcc_assert (desired_align >= 1 && align >= 1);
16094
16095   if (desired_align > align)
16096     {
16097       /* Except for the first move in epilogue, we no longer know
16098          constant offset in aliasing info.  It don't seems to worth
16099          the pain to maintain it for the first move, so throw away
16100          the info early.  */
16101       dst = change_address (dst, BLKmode, destreg);
16102       expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
16103                               desired_align);
16104     }
16105   if (label && size_needed == 1)
16106     {
16107       emit_label (label);
16108       LABEL_NUSES (label) = 1;
16109       label = NULL;
16110     }
16111
16112   /* Step 3: Main loop.  */
16113
16114   switch (alg)
16115     {
16116     case libcall:
16117     case no_stringop:
16118       gcc_unreachable ();
16119     case loop_1_byte:
16120       expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16121                                      count_exp, QImode, 1, expected_size);
16122       break;
16123     case loop:
16124       expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16125                                      count_exp, Pmode, 1, expected_size);
16126       break;
16127     case unrolled_loop:
16128       expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16129                                      count_exp, Pmode, 4, expected_size);
16130       break;
16131     case rep_prefix_8_byte:
16132       expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16133                                   DImode);
16134       break;
16135     case rep_prefix_4_byte:
16136       expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16137                                   SImode);
16138       break;
16139     case rep_prefix_1_byte:
16140       expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16141                                   QImode);
16142       break;
16143     }
16144   /* Adjust properly the offset of src and dest memory for aliasing.  */
16145   if (CONST_INT_P (count_exp))
16146     dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
16147                                         (count / size_needed) * size_needed);
16148   else
16149     dst = change_address (dst, BLKmode, destreg);
16150
16151   /* Step 4: Epilogue to copy the remaining bytes.  */
16152
16153   if (label)
16154     {
16155       /* When the main loop is done, COUNT_EXP might hold original count,
16156          while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
16157          Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
16158          bytes. Compensate if needed.  */
16159
16160       if (size_needed < desired_align - align)
16161         {
16162           tmp =
16163             expand_simple_binop (counter_mode (count_exp), AND, count_exp,
16164                                  GEN_INT (size_needed - 1), count_exp, 1,
16165                                  OPTAB_DIRECT);
16166           size_needed = desired_align - align + 1;
16167           if (tmp != count_exp)
16168             emit_move_insn (count_exp, tmp);
16169         }
16170       emit_label (label);
16171       LABEL_NUSES (label) = 1;
16172     }
16173   if (count_exp != const0_rtx && epilogue_size_needed > 1)
16174     {
16175       if (force_loopy_epilogue)
16176         expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
16177                                          size_needed);
16178       else
16179         expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
16180                                 size_needed);
16181     }
16182   if (jump_around_label)
16183     emit_label (jump_around_label);
16184   return 1;
16185 }
16186
16187 /* Expand the appropriate insns for doing strlen if not just doing
16188    repnz; scasb
16189
16190    out = result, initialized with the start address
16191    align_rtx = alignment of the address.
16192    scratch = scratch register, initialized with the startaddress when
16193         not aligned, otherwise undefined
16194
16195    This is just the body. It needs the initializations mentioned above and
16196    some address computing at the end.  These things are done in i386.md.  */
16197
16198 static void
16199 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
16200 {
16201   int align;
16202   rtx tmp;
16203   rtx align_2_label = NULL_RTX;
16204   rtx align_3_label = NULL_RTX;
16205   rtx align_4_label = gen_label_rtx ();
16206   rtx end_0_label = gen_label_rtx ();
16207   rtx mem;
16208   rtx tmpreg = gen_reg_rtx (SImode);
16209   rtx scratch = gen_reg_rtx (SImode);
16210   rtx cmp;
16211
16212   align = 0;
16213   if (CONST_INT_P (align_rtx))
16214     align = INTVAL (align_rtx);
16215
16216   /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
16217
16218   /* Is there a known alignment and is it less than 4?  */
16219   if (align < 4)
16220     {
16221       rtx scratch1 = gen_reg_rtx (Pmode);
16222       emit_move_insn (scratch1, out);
16223       /* Is there a known alignment and is it not 2? */
16224       if (align != 2)
16225         {
16226           align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
16227           align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
16228
16229           /* Leave just the 3 lower bits.  */
16230           align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
16231                                     NULL_RTX, 0, OPTAB_WIDEN);
16232
16233           emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
16234                                    Pmode, 1, align_4_label);
16235           emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
16236                                    Pmode, 1, align_2_label);
16237           emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
16238                                    Pmode, 1, align_3_label);
16239         }
16240       else
16241         {
16242           /* Since the alignment is 2, we have to check 2 or 0 bytes;
16243              check if is aligned to 4 - byte.  */
16244
16245           align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
16246                                     NULL_RTX, 0, OPTAB_WIDEN);
16247
16248           emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
16249                                    Pmode, 1, align_4_label);
16250         }
16251
16252       mem = change_address (src, QImode, out);
16253
16254       /* Now compare the bytes.  */
16255
16256       /* Compare the first n unaligned byte on a byte per byte basis.  */
16257       emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
16258                                QImode, 1, end_0_label);
16259
16260       /* Increment the address.  */
16261       if (TARGET_64BIT)
16262         emit_insn (gen_adddi3 (out, out, const1_rtx));
16263       else
16264         emit_insn (gen_addsi3 (out, out, const1_rtx));
16265
16266       /* Not needed with an alignment of 2 */
16267       if (align != 2)
16268         {
16269           emit_label (align_2_label);
16270
16271           emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16272                                    end_0_label);
16273
16274           if (TARGET_64BIT)
16275             emit_insn (gen_adddi3 (out, out, const1_rtx));
16276           else
16277             emit_insn (gen_addsi3 (out, out, const1_rtx));
16278
16279           emit_label (align_3_label);
16280         }
16281
16282       emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16283                                end_0_label);
16284
16285       if (TARGET_64BIT)
16286         emit_insn (gen_adddi3 (out, out, const1_rtx));
16287       else
16288         emit_insn (gen_addsi3 (out, out, const1_rtx));
16289     }
16290
16291   /* Generate loop to check 4 bytes at a time.  It is not a good idea to
16292      align this loop.  It gives only huge programs, but does not help to
16293      speed up.  */
16294   emit_label (align_4_label);
16295
16296   mem = change_address (src, SImode, out);
16297   emit_move_insn (scratch, mem);
16298   if (TARGET_64BIT)
16299     emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
16300   else
16301     emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
16302
16303   /* This formula yields a nonzero result iff one of the bytes is zero.
16304      This saves three branches inside loop and many cycles.  */
16305
16306   emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
16307   emit_insn (gen_one_cmplsi2 (scratch, scratch));
16308   emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
16309   emit_insn (gen_andsi3 (tmpreg, tmpreg,
16310                          gen_int_mode (0x80808080, SImode)));
16311   emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
16312                            align_4_label);
16313
16314   if (TARGET_CMOVE)
16315     {
16316        rtx reg = gen_reg_rtx (SImode);
16317        rtx reg2 = gen_reg_rtx (Pmode);
16318        emit_move_insn (reg, tmpreg);
16319        emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
16320
16321        /* If zero is not in the first two bytes, move two bytes forward.  */
16322        emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16323        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16324        tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16325        emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
16326                                gen_rtx_IF_THEN_ELSE (SImode, tmp,
16327                                                      reg,
16328                                                      tmpreg)));
16329        /* Emit lea manually to avoid clobbering of flags.  */
16330        emit_insn (gen_rtx_SET (SImode, reg2,
16331                                gen_rtx_PLUS (Pmode, out, const2_rtx)));
16332
16333        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16334        tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16335        emit_insn (gen_rtx_SET (VOIDmode, out,
16336                                gen_rtx_IF_THEN_ELSE (Pmode, tmp,
16337                                                      reg2,
16338                                                      out)));
16339
16340     }
16341   else
16342     {
16343        rtx end_2_label = gen_label_rtx ();
16344        /* Is zero in the first two bytes? */
16345
16346        emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16347        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16348        tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
16349        tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16350                             gen_rtx_LABEL_REF (VOIDmode, end_2_label),
16351                             pc_rtx);
16352        tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16353        JUMP_LABEL (tmp) = end_2_label;
16354
16355        /* Not in the first two.  Move two bytes forward.  */
16356        emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
16357        if (TARGET_64BIT)
16358          emit_insn (gen_adddi3 (out, out, const2_rtx));
16359        else
16360          emit_insn (gen_addsi3 (out, out, const2_rtx));
16361
16362        emit_label (end_2_label);
16363
16364     }
16365
16366   /* Avoid branch in fixing the byte.  */
16367   tmpreg = gen_lowpart (QImode, tmpreg);
16368   emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
16369   cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
16370   if (TARGET_64BIT)
16371     emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
16372   else
16373     emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
16374
16375   emit_label (end_0_label);
16376 }
16377
16378 /* Expand strlen.  */
16379
16380 int
16381 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
16382 {
16383   rtx addr, scratch1, scratch2, scratch3, scratch4;
16384
16385   /* The generic case of strlen expander is long.  Avoid it's
16386      expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
16387
16388   if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16389       && !TARGET_INLINE_ALL_STRINGOPS
16390       && !optimize_size
16391       && (!CONST_INT_P (align) || INTVAL (align) < 4))
16392     return 0;
16393
16394   addr = force_reg (Pmode, XEXP (src, 0));
16395   scratch1 = gen_reg_rtx (Pmode);
16396
16397   if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16398       && !optimize_size)
16399     {
16400       /* Well it seems that some optimizer does not combine a call like
16401          foo(strlen(bar), strlen(bar));
16402          when the move and the subtraction is done here.  It does calculate
16403          the length just once when these instructions are done inside of
16404          output_strlen_unroll().  But I think since &bar[strlen(bar)] is
16405          often used and I use one fewer register for the lifetime of
16406          output_strlen_unroll() this is better.  */
16407
16408       emit_move_insn (out, addr);
16409
16410       ix86_expand_strlensi_unroll_1 (out, src, align);
16411
16412       /* strlensi_unroll_1 returns the address of the zero at the end of
16413          the string, like memchr(), so compute the length by subtracting
16414          the start address.  */
16415       if (TARGET_64BIT)
16416         emit_insn (gen_subdi3 (out, out, addr));
16417       else
16418         emit_insn (gen_subsi3 (out, out, addr));
16419     }
16420   else
16421     {
16422       rtx unspec;
16423
16424       /* Can't use this if the user has appropriated eax, ecx, or edi.  */
16425       if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
16426         return false;
16427
16428       scratch2 = gen_reg_rtx (Pmode);
16429       scratch3 = gen_reg_rtx (Pmode);
16430       scratch4 = force_reg (Pmode, constm1_rtx);
16431
16432       emit_move_insn (scratch3, addr);
16433       eoschar = force_reg (QImode, eoschar);
16434
16435       src = replace_equiv_address_nv (src, scratch3);
16436
16437       /* If .md starts supporting :P, this can be done in .md.  */
16438       unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
16439                                                  scratch4), UNSPEC_SCAS);
16440       emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
16441       if (TARGET_64BIT)
16442         {
16443           emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
16444           emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
16445         }
16446       else
16447         {
16448           emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
16449           emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
16450         }
16451     }
16452   return 1;
16453 }
16454
16455 /* For given symbol (function) construct code to compute address of it's PLT
16456    entry in large x86-64 PIC model.  */
16457 rtx
16458 construct_plt_address (rtx symbol)
16459 {
16460   rtx tmp = gen_reg_rtx (Pmode);
16461   rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
16462
16463   gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
16464   gcc_assert (ix86_cmodel == CM_LARGE_PIC);
16465
16466   emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
16467   emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
16468   return tmp;
16469 }
16470
16471 void
16472 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
16473                   rtx callarg2 ATTRIBUTE_UNUSED,
16474                   rtx pop, int sibcall)
16475 {
16476   rtx use = NULL, call;
16477
16478   if (pop == const0_rtx)
16479     pop = NULL;
16480   gcc_assert (!TARGET_64BIT || !pop);
16481
16482   if (TARGET_MACHO && !TARGET_64BIT)
16483     {
16484 #if TARGET_MACHO
16485       if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
16486         fnaddr = machopic_indirect_call_target (fnaddr);
16487 #endif
16488     }
16489   else
16490     {
16491       /* Static functions and indirect calls don't need the pic register.  */
16492       if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
16493           && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16494           && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
16495         use_reg (&use, pic_offset_table_rtx);
16496     }
16497
16498   if (TARGET_64BIT && INTVAL (callarg2) >= 0)
16499     {
16500       rtx al = gen_rtx_REG (QImode, AX_REG);
16501       emit_move_insn (al, callarg2);
16502       use_reg (&use, al);
16503     }
16504
16505   if (ix86_cmodel == CM_LARGE_PIC
16506       && GET_CODE (fnaddr) == MEM
16507       && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16508       && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
16509     fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
16510   else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
16511     {
16512       fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16513       fnaddr = gen_rtx_MEM (QImode, fnaddr);
16514     }
16515   if (sibcall && TARGET_64BIT
16516       && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
16517     {
16518       rtx addr;
16519       addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16520       fnaddr = gen_rtx_REG (Pmode, R11_REG);
16521       emit_move_insn (fnaddr, addr);
16522       fnaddr = gen_rtx_MEM (QImode, fnaddr);
16523     }
16524
16525   call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
16526   if (retval)
16527     call = gen_rtx_SET (VOIDmode, retval, call);
16528   if (pop)
16529     {
16530       pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
16531       pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
16532       call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
16533     }
16534
16535   call = emit_call_insn (call);
16536   if (use)
16537     CALL_INSN_FUNCTION_USAGE (call) = use;
16538 }
16539
16540 \f
16541 /* Clear stack slot assignments remembered from previous functions.
16542    This is called from INIT_EXPANDERS once before RTL is emitted for each
16543    function.  */
16544
16545 static struct machine_function *
16546 ix86_init_machine_status (void)
16547 {
16548   struct machine_function *f;
16549
16550   f = GGC_CNEW (struct machine_function);
16551   f->use_fast_prologue_epilogue_nregs = -1;
16552   f->tls_descriptor_call_expanded_p = 0;
16553   f->call_abi = DEFAULT_ABI;
16554
16555   return f;
16556 }
16557
16558 /* Return a MEM corresponding to a stack slot with mode MODE.
16559    Allocate a new slot if necessary.
16560
16561    The RTL for a function can have several slots available: N is
16562    which slot to use.  */
16563
16564 rtx
16565 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
16566 {
16567   struct stack_local_entry *s;
16568
16569   gcc_assert (n < MAX_386_STACK_LOCALS);
16570
16571   /* Virtual slot is valid only before vregs are instantiated.  */
16572   gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
16573
16574   for (s = ix86_stack_locals; s; s = s->next)
16575     if (s->mode == mode && s->n == n)
16576       return copy_rtx (s->rtl);
16577
16578   s = (struct stack_local_entry *)
16579     ggc_alloc (sizeof (struct stack_local_entry));
16580   s->n = n;
16581   s->mode = mode;
16582   s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16583
16584   s->next = ix86_stack_locals;
16585   ix86_stack_locals = s;
16586   return s->rtl;
16587 }
16588
16589 /* Construct the SYMBOL_REF for the tls_get_addr function.  */
16590
16591 static GTY(()) rtx ix86_tls_symbol;
16592 rtx
16593 ix86_tls_get_addr (void)
16594 {
16595
16596   if (!ix86_tls_symbol)
16597     {
16598       ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
16599                                             (TARGET_ANY_GNU_TLS
16600                                              && !TARGET_64BIT)
16601                                             ? "___tls_get_addr"
16602                                             : "__tls_get_addr");
16603     }
16604
16605   return ix86_tls_symbol;
16606 }
16607
16608 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
16609
16610 static GTY(()) rtx ix86_tls_module_base_symbol;
16611 rtx
16612 ix86_tls_module_base (void)
16613 {
16614
16615   if (!ix86_tls_module_base_symbol)
16616     {
16617       ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
16618                                                         "_TLS_MODULE_BASE_");
16619       SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16620         |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16621     }
16622
16623   return ix86_tls_module_base_symbol;
16624 }
16625 \f
16626 /* Calculate the length of the memory address in the instruction
16627    encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
16628
16629 int
16630 memory_address_length (rtx addr)
16631 {
16632   struct ix86_address parts;
16633   rtx base, index, disp;
16634   int len;
16635   int ok;
16636
16637   if (GET_CODE (addr) == PRE_DEC
16638       || GET_CODE (addr) == POST_INC
16639       || GET_CODE (addr) == PRE_MODIFY
16640       || GET_CODE (addr) == POST_MODIFY)
16641     return 0;
16642
16643   ok = ix86_decompose_address (addr, &parts);
16644   gcc_assert (ok);
16645
16646   if (parts.base && GET_CODE (parts.base) == SUBREG)
16647     parts.base = SUBREG_REG (parts.base);
16648   if (parts.index && GET_CODE (parts.index) == SUBREG)
16649     parts.index = SUBREG_REG (parts.index);
16650
16651   base = parts.base;
16652   index = parts.index;
16653   disp = parts.disp;
16654   len = 0;
16655
16656   /* Rule of thumb:
16657        - esp as the base always wants an index,
16658        - ebp as the base always wants a displacement.  */
16659
16660   /* Register Indirect.  */
16661   if (base && !index && !disp)
16662     {
16663       /* esp (for its index) and ebp (for its displacement) need
16664          the two-byte modrm form.  */
16665       if (addr == stack_pointer_rtx
16666           || addr == arg_pointer_rtx
16667           || addr == frame_pointer_rtx
16668           || addr == hard_frame_pointer_rtx)
16669         len = 1;
16670     }
16671
16672   /* Direct Addressing.  */
16673   else if (disp && !base && !index)
16674     len = 4;
16675
16676   else
16677     {
16678       /* Find the length of the displacement constant.  */
16679       if (disp)
16680         {
16681           if (base && satisfies_constraint_K (disp))
16682             len = 1;
16683           else
16684             len = 4;
16685         }
16686       /* ebp always wants a displacement.  */
16687       else if (base == hard_frame_pointer_rtx)
16688         len = 1;
16689
16690       /* An index requires the two-byte modrm form....  */
16691       if (index
16692           /* ...like esp, which always wants an index.  */
16693           || base == stack_pointer_rtx
16694           || base == arg_pointer_rtx
16695           || base == frame_pointer_rtx)
16696         len += 1;
16697     }
16698
16699   return len;
16700 }
16701
16702 /* Compute default value for "length_immediate" attribute.  When SHORTFORM
16703    is set, expect that insn have 8bit immediate alternative.  */
16704 int
16705 ix86_attr_length_immediate_default (rtx insn, int shortform)
16706 {
16707   int len = 0;
16708   int i;
16709   extract_insn_cached (insn);
16710   for (i = recog_data.n_operands - 1; i >= 0; --i)
16711     if (CONSTANT_P (recog_data.operand[i]))
16712       {
16713         gcc_assert (!len);
16714         if (shortform && satisfies_constraint_K (recog_data.operand[i]))
16715           len = 1;
16716         else
16717           {
16718             switch (get_attr_mode (insn))
16719               {
16720                 case MODE_QI:
16721                   len+=1;
16722                   break;
16723                 case MODE_HI:
16724                   len+=2;
16725                   break;
16726                 case MODE_SI:
16727                   len+=4;
16728                   break;
16729                 /* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
16730                 case MODE_DI:
16731                   len+=4;
16732                   break;
16733                 default:
16734                   fatal_insn ("unknown insn mode", insn);
16735               }
16736           }
16737       }
16738   return len;
16739 }
16740 /* Compute default value for "length_address" attribute.  */
16741 int
16742 ix86_attr_length_address_default (rtx insn)
16743 {
16744   int i;
16745
16746   if (get_attr_type (insn) == TYPE_LEA)
16747     {
16748       rtx set = PATTERN (insn);
16749
16750       if (GET_CODE (set) == PARALLEL)
16751         set = XVECEXP (set, 0, 0);
16752
16753       gcc_assert (GET_CODE (set) == SET);
16754
16755       return memory_address_length (SET_SRC (set));
16756     }
16757
16758   extract_insn_cached (insn);
16759   for (i = recog_data.n_operands - 1; i >= 0; --i)
16760     if (MEM_P (recog_data.operand[i]))
16761       {
16762         return memory_address_length (XEXP (recog_data.operand[i], 0));
16763         break;
16764       }
16765   return 0;
16766 }
16767 \f
16768 /* Return the maximum number of instructions a cpu can issue.  */
16769
16770 static int
16771 ix86_issue_rate (void)
16772 {
16773   switch (ix86_tune)
16774     {
16775     case PROCESSOR_PENTIUM:
16776     case PROCESSOR_K6:
16777       return 2;
16778
16779     case PROCESSOR_PENTIUMPRO:
16780     case PROCESSOR_PENTIUM4:
16781     case PROCESSOR_ATHLON:
16782     case PROCESSOR_K8:
16783     case PROCESSOR_AMDFAM10:
16784     case PROCESSOR_NOCONA:
16785     case PROCESSOR_GENERIC32:
16786     case PROCESSOR_GENERIC64:
16787       return 3;
16788
16789     case PROCESSOR_CORE2:
16790       return 4;
16791
16792     default:
16793       return 1;
16794     }
16795 }
16796
16797 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16798    by DEP_INSN and nothing set by DEP_INSN.  */
16799
16800 static int
16801 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16802 {
16803   rtx set, set2;
16804
16805   /* Simplify the test for uninteresting insns.  */
16806   if (insn_type != TYPE_SETCC
16807       && insn_type != TYPE_ICMOV
16808       && insn_type != TYPE_FCMOV
16809       && insn_type != TYPE_IBR)
16810     return 0;
16811
16812   if ((set = single_set (dep_insn)) != 0)
16813     {
16814       set = SET_DEST (set);
16815       set2 = NULL_RTX;
16816     }
16817   else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16818            && XVECLEN (PATTERN (dep_insn), 0) == 2
16819            && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16820            && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16821     {
16822       set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16823       set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16824     }
16825   else
16826     return 0;
16827
16828   if (!REG_P (set) || REGNO (set) != FLAGS_REG)
16829     return 0;
16830
16831   /* This test is true if the dependent insn reads the flags but
16832      not any other potentially set register.  */
16833   if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16834     return 0;
16835
16836   if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16837     return 0;
16838
16839   return 1;
16840 }
16841
16842 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16843    address with operands set by DEP_INSN.  */
16844
16845 static int
16846 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16847 {
16848   rtx addr;
16849
16850   if (insn_type == TYPE_LEA
16851       && TARGET_PENTIUM)
16852     {
16853       addr = PATTERN (insn);
16854
16855       if (GET_CODE (addr) == PARALLEL)
16856         addr = XVECEXP (addr, 0, 0);
16857
16858       gcc_assert (GET_CODE (addr) == SET);
16859
16860       addr = SET_SRC (addr);
16861     }
16862   else
16863     {
16864       int i;
16865       extract_insn_cached (insn);
16866       for (i = recog_data.n_operands - 1; i >= 0; --i)
16867         if (MEM_P (recog_data.operand[i]))
16868           {
16869             addr = XEXP (recog_data.operand[i], 0);
16870             goto found;
16871           }
16872       return 0;
16873     found:;
16874     }
16875
16876   return modified_in_p (addr, dep_insn);
16877 }
16878
16879 static int
16880 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16881 {
16882   enum attr_type insn_type, dep_insn_type;
16883   enum attr_memory memory;
16884   rtx set, set2;
16885   int dep_insn_code_number;
16886
16887   /* Anti and output dependencies have zero cost on all CPUs.  */
16888   if (REG_NOTE_KIND (link) != 0)
16889     return 0;
16890
16891   dep_insn_code_number = recog_memoized (dep_insn);
16892
16893   /* If we can't recognize the insns, we can't really do anything.  */
16894   if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16895     return cost;
16896
16897   insn_type = get_attr_type (insn);
16898   dep_insn_type = get_attr_type (dep_insn);
16899
16900   switch (ix86_tune)
16901     {
16902     case PROCESSOR_PENTIUM:
16903       /* Address Generation Interlock adds a cycle of latency.  */
16904       if (ix86_agi_dependent (insn, dep_insn, insn_type))
16905         cost += 1;
16906
16907       /* ??? Compares pair with jump/setcc.  */
16908       if (ix86_flags_dependent (insn, dep_insn, insn_type))
16909         cost = 0;
16910
16911       /* Floating point stores require value to be ready one cycle earlier.  */
16912       if (insn_type == TYPE_FMOV
16913           && get_attr_memory (insn) == MEMORY_STORE
16914           && !ix86_agi_dependent (insn, dep_insn, insn_type))
16915         cost += 1;
16916       break;
16917
16918     case PROCESSOR_PENTIUMPRO:
16919       memory = get_attr_memory (insn);
16920
16921       /* INT->FP conversion is expensive.  */
16922       if (get_attr_fp_int_src (dep_insn))
16923         cost += 5;
16924
16925       /* There is one cycle extra latency between an FP op and a store.  */
16926       if (insn_type == TYPE_FMOV
16927           && (set = single_set (dep_insn)) != NULL_RTX
16928           && (set2 = single_set (insn)) != NULL_RTX
16929           && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16930           && MEM_P (SET_DEST (set2)))
16931         cost += 1;
16932
16933       /* Show ability of reorder buffer to hide latency of load by executing
16934          in parallel with previous instruction in case
16935          previous instruction is not needed to compute the address.  */
16936       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16937           && !ix86_agi_dependent (insn, dep_insn, insn_type))
16938         {
16939           /* Claim moves to take one cycle, as core can issue one load
16940              at time and the next load can start cycle later.  */
16941           if (dep_insn_type == TYPE_IMOV
16942               || dep_insn_type == TYPE_FMOV)
16943             cost = 1;
16944           else if (cost > 1)
16945             cost--;
16946         }
16947       break;
16948
16949     case PROCESSOR_K6:
16950       memory = get_attr_memory (insn);
16951
16952       /* The esp dependency is resolved before the instruction is really
16953          finished.  */
16954       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16955           && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16956         return 1;
16957
16958       /* INT->FP conversion is expensive.  */
16959       if (get_attr_fp_int_src (dep_insn))
16960         cost += 5;
16961
16962       /* Show ability of reorder buffer to hide latency of load by executing
16963          in parallel with previous instruction in case
16964          previous instruction is not needed to compute the address.  */
16965       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16966           && !ix86_agi_dependent (insn, dep_insn, insn_type))
16967         {
16968           /* Claim moves to take one cycle, as core can issue one load
16969              at time and the next load can start cycle later.  */
16970           if (dep_insn_type == TYPE_IMOV
16971               || dep_insn_type == TYPE_FMOV)
16972             cost = 1;
16973           else if (cost > 2)
16974             cost -= 2;
16975           else
16976             cost = 1;
16977         }
16978       break;
16979
16980     case PROCESSOR_ATHLON:
16981     case PROCESSOR_K8:
16982     case PROCESSOR_AMDFAM10:
16983     case PROCESSOR_GENERIC32:
16984     case PROCESSOR_GENERIC64:
16985       memory = get_attr_memory (insn);
16986
16987       /* Show ability of reorder buffer to hide latency of load by executing
16988          in parallel with previous instruction in case
16989          previous instruction is not needed to compute the address.  */
16990       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16991           && !ix86_agi_dependent (insn, dep_insn, insn_type))
16992         {
16993           enum attr_unit unit = get_attr_unit (insn);
16994           int loadcost = 3;
16995
16996           /* Because of the difference between the length of integer and
16997              floating unit pipeline preparation stages, the memory operands
16998              for floating point are cheaper.
16999
17000              ??? For Athlon it the difference is most probably 2.  */
17001           if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
17002             loadcost = 3;
17003           else
17004             loadcost = TARGET_ATHLON ? 2 : 0;
17005
17006           if (cost >= loadcost)
17007             cost -= loadcost;
17008           else
17009             cost = 0;
17010         }
17011
17012     default:
17013       break;
17014     }
17015
17016   return cost;
17017 }
17018
17019 /* How many alternative schedules to try.  This should be as wide as the
17020    scheduling freedom in the DFA, but no wider.  Making this value too
17021    large results extra work for the scheduler.  */
17022
17023 static int
17024 ia32_multipass_dfa_lookahead (void)
17025 {
17026   switch (ix86_tune)
17027     {
17028     case PROCESSOR_PENTIUM:
17029       return 2;
17030
17031     case PROCESSOR_PENTIUMPRO:
17032     case PROCESSOR_K6:
17033       return 1;
17034
17035     default:
17036       return 0;
17037     }
17038 }
17039
17040 \f
17041 /* Compute the alignment given to a constant that is being placed in memory.
17042    EXP is the constant and ALIGN is the alignment that the object would
17043    ordinarily have.
17044    The value of this function is used instead of that alignment to align
17045    the object.  */
17046
17047 int
17048 ix86_constant_alignment (tree exp, int align)
17049 {
17050   if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
17051       || TREE_CODE (exp) == INTEGER_CST)
17052     {
17053       if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
17054         return 64;
17055       else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
17056         return 128;
17057     }
17058   else if (!optimize_size && TREE_CODE (exp) == STRING_CST
17059            && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
17060     return BITS_PER_WORD;
17061
17062   return align;
17063 }
17064
17065 /* Compute the alignment for a static variable.
17066    TYPE is the data type, and ALIGN is the alignment that
17067    the object would ordinarily have.  The value of this function is used
17068    instead of that alignment to align the object.  */
17069
17070 int
17071 ix86_data_alignment (tree type, int align)
17072 {
17073   int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
17074
17075   if (AGGREGATE_TYPE_P (type)
17076       && TYPE_SIZE (type)
17077       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17078       && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
17079           || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
17080       && align < max_align)
17081     align = max_align;
17082
17083   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17084      to 16byte boundary.  */
17085   if (TARGET_64BIT)
17086     {
17087       if (AGGREGATE_TYPE_P (type)
17088            && TYPE_SIZE (type)
17089            && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17090            && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
17091                || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
17092         return 128;
17093     }
17094
17095   if (TREE_CODE (type) == ARRAY_TYPE)
17096     {
17097       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17098         return 64;
17099       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17100         return 128;
17101     }
17102   else if (TREE_CODE (type) == COMPLEX_TYPE)
17103     {
17104
17105       if (TYPE_MODE (type) == DCmode && align < 64)
17106         return 64;
17107       if (TYPE_MODE (type) == XCmode && align < 128)
17108         return 128;
17109     }
17110   else if ((TREE_CODE (type) == RECORD_TYPE
17111             || TREE_CODE (type) == UNION_TYPE
17112             || TREE_CODE (type) == QUAL_UNION_TYPE)
17113            && TYPE_FIELDS (type))
17114     {
17115       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17116         return 64;
17117       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17118         return 128;
17119     }
17120   else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17121            || TREE_CODE (type) == INTEGER_TYPE)
17122     {
17123       if (TYPE_MODE (type) == DFmode && align < 64)
17124         return 64;
17125       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17126         return 128;
17127     }
17128
17129   return align;
17130 }
17131
17132 /* Compute the alignment for a local variable or a stack slot.  TYPE is
17133    the data type, MODE is the widest mode available and ALIGN is the
17134    alignment that the object would ordinarily have.  The value of this
17135    macro is used instead of that alignment to align the object.  */
17136
17137 unsigned int
17138 ix86_local_alignment (tree type, enum machine_mode mode,
17139                       unsigned int align)
17140 {
17141   /* If TYPE is NULL, we are allocating a stack slot for caller-save
17142      register in MODE.  We will return the largest alignment of XF
17143      and DF.  */
17144   if (!type)
17145     {
17146       if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
17147         align = GET_MODE_ALIGNMENT (DFmode);
17148       return align;
17149     }
17150
17151   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17152      to 16byte boundary.  */
17153   if (TARGET_64BIT)
17154     {
17155       if (AGGREGATE_TYPE_P (type)
17156            && TYPE_SIZE (type)
17157            && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17158            && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
17159                || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
17160         return 128;
17161     }
17162   if (TREE_CODE (type) == ARRAY_TYPE)
17163     {
17164       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17165         return 64;
17166       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17167         return 128;
17168     }
17169   else if (TREE_CODE (type) == COMPLEX_TYPE)
17170     {
17171       if (TYPE_MODE (type) == DCmode && align < 64)
17172         return 64;
17173       if (TYPE_MODE (type) == XCmode && align < 128)
17174         return 128;
17175     }
17176   else if ((TREE_CODE (type) == RECORD_TYPE
17177             || TREE_CODE (type) == UNION_TYPE
17178             || TREE_CODE (type) == QUAL_UNION_TYPE)
17179            && TYPE_FIELDS (type))
17180     {
17181       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17182         return 64;
17183       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17184         return 128;
17185     }
17186   else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17187            || TREE_CODE (type) == INTEGER_TYPE)
17188     {
17189
17190       if (TYPE_MODE (type) == DFmode && align < 64)
17191         return 64;
17192       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17193         return 128;
17194     }
17195   return align;
17196 }
17197 \f
17198 /* Emit RTL insns to initialize the variable parts of a trampoline.
17199    FNADDR is an RTX for the address of the function's pure code.
17200    CXT is an RTX for the static chain value for the function.  */
17201 void
17202 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
17203 {
17204   if (!TARGET_64BIT)
17205     {
17206       /* Compute offset from the end of the jmp to the target function.  */
17207       rtx disp = expand_binop (SImode, sub_optab, fnaddr,
17208                                plus_constant (tramp, 10),
17209                                NULL_RTX, 1, OPTAB_DIRECT);
17210       emit_move_insn (gen_rtx_MEM (QImode, tramp),
17211                       gen_int_mode (0xb9, QImode));
17212       emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
17213       emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
17214                       gen_int_mode (0xe9, QImode));
17215       emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
17216     }
17217   else
17218     {
17219       int offset = 0;
17220       /* Try to load address using shorter movl instead of movabs.
17221          We may want to support movq for kernel mode, but kernel does not use
17222          trampolines at the moment.  */
17223       if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
17224         {
17225           fnaddr = copy_to_mode_reg (DImode, fnaddr);
17226           emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17227                           gen_int_mode (0xbb41, HImode));
17228           emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
17229                           gen_lowpart (SImode, fnaddr));
17230           offset += 6;
17231         }
17232       else
17233         {
17234           emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17235                           gen_int_mode (0xbb49, HImode));
17236           emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17237                           fnaddr);
17238           offset += 10;
17239         }
17240       /* Load static chain using movabs to r10.  */
17241       emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17242                       gen_int_mode (0xba49, HImode));
17243       emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17244                       cxt);
17245       offset += 10;
17246       /* Jump to the r11 */
17247       emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17248                       gen_int_mode (0xff49, HImode));
17249       emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
17250                       gen_int_mode (0xe3, QImode));
17251       offset += 3;
17252       gcc_assert (offset <= TRAMPOLINE_SIZE);
17253     }
17254
17255 #ifdef ENABLE_EXECUTE_STACK
17256   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
17257                      LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
17258 #endif
17259 }
17260 \f
17261 /* Codes for all the SSE/MMX builtins.  */
17262 enum ix86_builtins
17263 {
17264   IX86_BUILTIN_ADDPS,
17265   IX86_BUILTIN_ADDSS,
17266   IX86_BUILTIN_DIVPS,
17267   IX86_BUILTIN_DIVSS,
17268   IX86_BUILTIN_MULPS,
17269   IX86_BUILTIN_MULSS,
17270   IX86_BUILTIN_SUBPS,
17271   IX86_BUILTIN_SUBSS,
17272
17273   IX86_BUILTIN_CMPEQPS,
17274   IX86_BUILTIN_CMPLTPS,
17275   IX86_BUILTIN_CMPLEPS,
17276   IX86_BUILTIN_CMPGTPS,
17277   IX86_BUILTIN_CMPGEPS,
17278   IX86_BUILTIN_CMPNEQPS,
17279   IX86_BUILTIN_CMPNLTPS,
17280   IX86_BUILTIN_CMPNLEPS,
17281   IX86_BUILTIN_CMPNGTPS,
17282   IX86_BUILTIN_CMPNGEPS,
17283   IX86_BUILTIN_CMPORDPS,
17284   IX86_BUILTIN_CMPUNORDPS,
17285   IX86_BUILTIN_CMPEQSS,
17286   IX86_BUILTIN_CMPLTSS,
17287   IX86_BUILTIN_CMPLESS,
17288   IX86_BUILTIN_CMPNEQSS,
17289   IX86_BUILTIN_CMPNLTSS,
17290   IX86_BUILTIN_CMPNLESS,
17291   IX86_BUILTIN_CMPNGTSS,
17292   IX86_BUILTIN_CMPNGESS,
17293   IX86_BUILTIN_CMPORDSS,
17294   IX86_BUILTIN_CMPUNORDSS,
17295
17296   IX86_BUILTIN_COMIEQSS,
17297   IX86_BUILTIN_COMILTSS,
17298   IX86_BUILTIN_COMILESS,
17299   IX86_BUILTIN_COMIGTSS,
17300   IX86_BUILTIN_COMIGESS,
17301   IX86_BUILTIN_COMINEQSS,
17302   IX86_BUILTIN_UCOMIEQSS,
17303   IX86_BUILTIN_UCOMILTSS,
17304   IX86_BUILTIN_UCOMILESS,
17305   IX86_BUILTIN_UCOMIGTSS,
17306   IX86_BUILTIN_UCOMIGESS,
17307   IX86_BUILTIN_UCOMINEQSS,
17308
17309   IX86_BUILTIN_CVTPI2PS,
17310   IX86_BUILTIN_CVTPS2PI,
17311   IX86_BUILTIN_CVTSI2SS,
17312   IX86_BUILTIN_CVTSI642SS,
17313   IX86_BUILTIN_CVTSS2SI,
17314   IX86_BUILTIN_CVTSS2SI64,
17315   IX86_BUILTIN_CVTTPS2PI,
17316   IX86_BUILTIN_CVTTSS2SI,
17317   IX86_BUILTIN_CVTTSS2SI64,
17318
17319   IX86_BUILTIN_MAXPS,
17320   IX86_BUILTIN_MAXSS,
17321   IX86_BUILTIN_MINPS,
17322   IX86_BUILTIN_MINSS,
17323
17324   IX86_BUILTIN_LOADUPS,
17325   IX86_BUILTIN_STOREUPS,
17326   IX86_BUILTIN_MOVSS,
17327
17328   IX86_BUILTIN_MOVHLPS,
17329   IX86_BUILTIN_MOVLHPS,
17330   IX86_BUILTIN_LOADHPS,
17331   IX86_BUILTIN_LOADLPS,
17332   IX86_BUILTIN_STOREHPS,
17333   IX86_BUILTIN_STORELPS,
17334
17335   IX86_BUILTIN_MASKMOVQ,
17336   IX86_BUILTIN_MOVMSKPS,
17337   IX86_BUILTIN_PMOVMSKB,
17338
17339   IX86_BUILTIN_MOVNTPS,
17340   IX86_BUILTIN_MOVNTQ,
17341
17342   IX86_BUILTIN_LOADDQU,
17343   IX86_BUILTIN_STOREDQU,
17344
17345   IX86_BUILTIN_PACKSSWB,
17346   IX86_BUILTIN_PACKSSDW,
17347   IX86_BUILTIN_PACKUSWB,
17348
17349   IX86_BUILTIN_PADDB,
17350   IX86_BUILTIN_PADDW,
17351   IX86_BUILTIN_PADDD,
17352   IX86_BUILTIN_PADDQ,
17353   IX86_BUILTIN_PADDSB,
17354   IX86_BUILTIN_PADDSW,
17355   IX86_BUILTIN_PADDUSB,
17356   IX86_BUILTIN_PADDUSW,
17357   IX86_BUILTIN_PSUBB,
17358   IX86_BUILTIN_PSUBW,
17359   IX86_BUILTIN_PSUBD,
17360   IX86_BUILTIN_PSUBQ,
17361   IX86_BUILTIN_PSUBSB,
17362   IX86_BUILTIN_PSUBSW,
17363   IX86_BUILTIN_PSUBUSB,
17364   IX86_BUILTIN_PSUBUSW,
17365
17366   IX86_BUILTIN_PAND,
17367   IX86_BUILTIN_PANDN,
17368   IX86_BUILTIN_POR,
17369   IX86_BUILTIN_PXOR,
17370
17371   IX86_BUILTIN_PAVGB,
17372   IX86_BUILTIN_PAVGW,
17373
17374   IX86_BUILTIN_PCMPEQB,
17375   IX86_BUILTIN_PCMPEQW,
17376   IX86_BUILTIN_PCMPEQD,
17377   IX86_BUILTIN_PCMPGTB,
17378   IX86_BUILTIN_PCMPGTW,
17379   IX86_BUILTIN_PCMPGTD,
17380
17381   IX86_BUILTIN_PMADDWD,
17382
17383   IX86_BUILTIN_PMAXSW,
17384   IX86_BUILTIN_PMAXUB,
17385   IX86_BUILTIN_PMINSW,
17386   IX86_BUILTIN_PMINUB,
17387
17388   IX86_BUILTIN_PMULHUW,
17389   IX86_BUILTIN_PMULHW,
17390   IX86_BUILTIN_PMULLW,
17391
17392   IX86_BUILTIN_PSADBW,
17393   IX86_BUILTIN_PSHUFW,
17394
17395   IX86_BUILTIN_PSLLW,
17396   IX86_BUILTIN_PSLLD,
17397   IX86_BUILTIN_PSLLQ,
17398   IX86_BUILTIN_PSRAW,
17399   IX86_BUILTIN_PSRAD,
17400   IX86_BUILTIN_PSRLW,
17401   IX86_BUILTIN_PSRLD,
17402   IX86_BUILTIN_PSRLQ,
17403   IX86_BUILTIN_PSLLWI,
17404   IX86_BUILTIN_PSLLDI,
17405   IX86_BUILTIN_PSLLQI,
17406   IX86_BUILTIN_PSRAWI,
17407   IX86_BUILTIN_PSRADI,
17408   IX86_BUILTIN_PSRLWI,
17409   IX86_BUILTIN_PSRLDI,
17410   IX86_BUILTIN_PSRLQI,
17411
17412   IX86_BUILTIN_PUNPCKHBW,
17413   IX86_BUILTIN_PUNPCKHWD,
17414   IX86_BUILTIN_PUNPCKHDQ,
17415   IX86_BUILTIN_PUNPCKLBW,
17416   IX86_BUILTIN_PUNPCKLWD,
17417   IX86_BUILTIN_PUNPCKLDQ,
17418
17419   IX86_BUILTIN_SHUFPS,
17420
17421   IX86_BUILTIN_RCPPS,
17422   IX86_BUILTIN_RCPSS,
17423   IX86_BUILTIN_RSQRTPS,
17424   IX86_BUILTIN_RSQRTPS_NR,
17425   IX86_BUILTIN_RSQRTSS,
17426   IX86_BUILTIN_RSQRTF,
17427   IX86_BUILTIN_SQRTPS,
17428   IX86_BUILTIN_SQRTPS_NR,
17429   IX86_BUILTIN_SQRTSS,
17430
17431   IX86_BUILTIN_UNPCKHPS,
17432   IX86_BUILTIN_UNPCKLPS,
17433
17434   IX86_BUILTIN_ANDPS,
17435   IX86_BUILTIN_ANDNPS,
17436   IX86_BUILTIN_ORPS,
17437   IX86_BUILTIN_XORPS,
17438
17439   IX86_BUILTIN_EMMS,
17440   IX86_BUILTIN_LDMXCSR,
17441   IX86_BUILTIN_STMXCSR,
17442   IX86_BUILTIN_SFENCE,
17443
17444   /* 3DNow! Original */
17445   IX86_BUILTIN_FEMMS,
17446   IX86_BUILTIN_PAVGUSB,
17447   IX86_BUILTIN_PF2ID,
17448   IX86_BUILTIN_PFACC,
17449   IX86_BUILTIN_PFADD,
17450   IX86_BUILTIN_PFCMPEQ,
17451   IX86_BUILTIN_PFCMPGE,
17452   IX86_BUILTIN_PFCMPGT,
17453   IX86_BUILTIN_PFMAX,
17454   IX86_BUILTIN_PFMIN,
17455   IX86_BUILTIN_PFMUL,
17456   IX86_BUILTIN_PFRCP,
17457   IX86_BUILTIN_PFRCPIT1,
17458   IX86_BUILTIN_PFRCPIT2,
17459   IX86_BUILTIN_PFRSQIT1,
17460   IX86_BUILTIN_PFRSQRT,
17461   IX86_BUILTIN_PFSUB,
17462   IX86_BUILTIN_PFSUBR,
17463   IX86_BUILTIN_PI2FD,
17464   IX86_BUILTIN_PMULHRW,
17465
17466   /* 3DNow! Athlon Extensions */
17467   IX86_BUILTIN_PF2IW,
17468   IX86_BUILTIN_PFNACC,
17469   IX86_BUILTIN_PFPNACC,
17470   IX86_BUILTIN_PI2FW,
17471   IX86_BUILTIN_PSWAPDSI,
17472   IX86_BUILTIN_PSWAPDSF,
17473
17474   /* SSE2 */
17475   IX86_BUILTIN_ADDPD,
17476   IX86_BUILTIN_ADDSD,
17477   IX86_BUILTIN_DIVPD,
17478   IX86_BUILTIN_DIVSD,
17479   IX86_BUILTIN_MULPD,
17480   IX86_BUILTIN_MULSD,
17481   IX86_BUILTIN_SUBPD,
17482   IX86_BUILTIN_SUBSD,
17483
17484   IX86_BUILTIN_CMPEQPD,
17485   IX86_BUILTIN_CMPLTPD,
17486   IX86_BUILTIN_CMPLEPD,
17487   IX86_BUILTIN_CMPGTPD,
17488   IX86_BUILTIN_CMPGEPD,
17489   IX86_BUILTIN_CMPNEQPD,
17490   IX86_BUILTIN_CMPNLTPD,
17491   IX86_BUILTIN_CMPNLEPD,
17492   IX86_BUILTIN_CMPNGTPD,
17493   IX86_BUILTIN_CMPNGEPD,
17494   IX86_BUILTIN_CMPORDPD,
17495   IX86_BUILTIN_CMPUNORDPD,
17496   IX86_BUILTIN_CMPEQSD,
17497   IX86_BUILTIN_CMPLTSD,
17498   IX86_BUILTIN_CMPLESD,
17499   IX86_BUILTIN_CMPNEQSD,
17500   IX86_BUILTIN_CMPNLTSD,
17501   IX86_BUILTIN_CMPNLESD,
17502   IX86_BUILTIN_CMPORDSD,
17503   IX86_BUILTIN_CMPUNORDSD,
17504
17505   IX86_BUILTIN_COMIEQSD,
17506   IX86_BUILTIN_COMILTSD,
17507   IX86_BUILTIN_COMILESD,
17508   IX86_BUILTIN_COMIGTSD,
17509   IX86_BUILTIN_COMIGESD,
17510   IX86_BUILTIN_COMINEQSD,
17511   IX86_BUILTIN_UCOMIEQSD,
17512   IX86_BUILTIN_UCOMILTSD,
17513   IX86_BUILTIN_UCOMILESD,
17514   IX86_BUILTIN_UCOMIGTSD,
17515   IX86_BUILTIN_UCOMIGESD,
17516   IX86_BUILTIN_UCOMINEQSD,
17517
17518   IX86_BUILTIN_MAXPD,
17519   IX86_BUILTIN_MAXSD,
17520   IX86_BUILTIN_MINPD,
17521   IX86_BUILTIN_MINSD,
17522
17523   IX86_BUILTIN_ANDPD,
17524   IX86_BUILTIN_ANDNPD,
17525   IX86_BUILTIN_ORPD,
17526   IX86_BUILTIN_XORPD,
17527
17528   IX86_BUILTIN_SQRTPD,
17529   IX86_BUILTIN_SQRTSD,
17530
17531   IX86_BUILTIN_UNPCKHPD,
17532   IX86_BUILTIN_UNPCKLPD,
17533
17534   IX86_BUILTIN_SHUFPD,
17535
17536   IX86_BUILTIN_LOADUPD,
17537   IX86_BUILTIN_STOREUPD,
17538   IX86_BUILTIN_MOVSD,
17539
17540   IX86_BUILTIN_LOADHPD,
17541   IX86_BUILTIN_LOADLPD,
17542
17543   IX86_BUILTIN_CVTDQ2PD,
17544   IX86_BUILTIN_CVTDQ2PS,
17545
17546   IX86_BUILTIN_CVTPD2DQ,
17547   IX86_BUILTIN_CVTPD2PI,
17548   IX86_BUILTIN_CVTPD2PS,
17549   IX86_BUILTIN_CVTTPD2DQ,
17550   IX86_BUILTIN_CVTTPD2PI,
17551
17552   IX86_BUILTIN_CVTPI2PD,
17553   IX86_BUILTIN_CVTSI2SD,
17554   IX86_BUILTIN_CVTSI642SD,
17555
17556   IX86_BUILTIN_CVTSD2SI,
17557   IX86_BUILTIN_CVTSD2SI64,
17558   IX86_BUILTIN_CVTSD2SS,
17559   IX86_BUILTIN_CVTSS2SD,
17560   IX86_BUILTIN_CVTTSD2SI,
17561   IX86_BUILTIN_CVTTSD2SI64,
17562
17563   IX86_BUILTIN_CVTPS2DQ,
17564   IX86_BUILTIN_CVTPS2PD,
17565   IX86_BUILTIN_CVTTPS2DQ,
17566
17567   IX86_BUILTIN_MOVNTI,
17568   IX86_BUILTIN_MOVNTPD,
17569   IX86_BUILTIN_MOVNTDQ,
17570
17571   /* SSE2 MMX */
17572   IX86_BUILTIN_MASKMOVDQU,
17573   IX86_BUILTIN_MOVMSKPD,
17574   IX86_BUILTIN_PMOVMSKB128,
17575
17576   IX86_BUILTIN_PACKSSWB128,
17577   IX86_BUILTIN_PACKSSDW128,
17578   IX86_BUILTIN_PACKUSWB128,
17579
17580   IX86_BUILTIN_PADDB128,
17581   IX86_BUILTIN_PADDW128,
17582   IX86_BUILTIN_PADDD128,
17583   IX86_BUILTIN_PADDQ128,
17584   IX86_BUILTIN_PADDSB128,
17585   IX86_BUILTIN_PADDSW128,
17586   IX86_BUILTIN_PADDUSB128,
17587   IX86_BUILTIN_PADDUSW128,
17588   IX86_BUILTIN_PSUBB128,
17589   IX86_BUILTIN_PSUBW128,
17590   IX86_BUILTIN_PSUBD128,
17591   IX86_BUILTIN_PSUBQ128,
17592   IX86_BUILTIN_PSUBSB128,
17593   IX86_BUILTIN_PSUBSW128,
17594   IX86_BUILTIN_PSUBUSB128,
17595   IX86_BUILTIN_PSUBUSW128,
17596
17597   IX86_BUILTIN_PAND128,
17598   IX86_BUILTIN_PANDN128,
17599   IX86_BUILTIN_POR128,
17600   IX86_BUILTIN_PXOR128,
17601
17602   IX86_BUILTIN_PAVGB128,
17603   IX86_BUILTIN_PAVGW128,
17604
17605   IX86_BUILTIN_PCMPEQB128,
17606   IX86_BUILTIN_PCMPEQW128,
17607   IX86_BUILTIN_PCMPEQD128,
17608   IX86_BUILTIN_PCMPGTB128,
17609   IX86_BUILTIN_PCMPGTW128,
17610   IX86_BUILTIN_PCMPGTD128,
17611
17612   IX86_BUILTIN_PMADDWD128,
17613
17614   IX86_BUILTIN_PMAXSW128,
17615   IX86_BUILTIN_PMAXUB128,
17616   IX86_BUILTIN_PMINSW128,
17617   IX86_BUILTIN_PMINUB128,
17618
17619   IX86_BUILTIN_PMULUDQ,
17620   IX86_BUILTIN_PMULUDQ128,
17621   IX86_BUILTIN_PMULHUW128,
17622   IX86_BUILTIN_PMULHW128,
17623   IX86_BUILTIN_PMULLW128,
17624
17625   IX86_BUILTIN_PSADBW128,
17626   IX86_BUILTIN_PSHUFHW,
17627   IX86_BUILTIN_PSHUFLW,
17628   IX86_BUILTIN_PSHUFD,
17629
17630   IX86_BUILTIN_PSLLDQI128,
17631   IX86_BUILTIN_PSLLWI128,
17632   IX86_BUILTIN_PSLLDI128,
17633   IX86_BUILTIN_PSLLQI128,
17634   IX86_BUILTIN_PSRAWI128,
17635   IX86_BUILTIN_PSRADI128,
17636   IX86_BUILTIN_PSRLDQI128,
17637   IX86_BUILTIN_PSRLWI128,
17638   IX86_BUILTIN_PSRLDI128,
17639   IX86_BUILTIN_PSRLQI128,
17640
17641   IX86_BUILTIN_PSLLDQ128,
17642   IX86_BUILTIN_PSLLW128,
17643   IX86_BUILTIN_PSLLD128,
17644   IX86_BUILTIN_PSLLQ128,
17645   IX86_BUILTIN_PSRAW128,
17646   IX86_BUILTIN_PSRAD128,
17647   IX86_BUILTIN_PSRLW128,
17648   IX86_BUILTIN_PSRLD128,
17649   IX86_BUILTIN_PSRLQ128,
17650
17651   IX86_BUILTIN_PUNPCKHBW128,
17652   IX86_BUILTIN_PUNPCKHWD128,
17653   IX86_BUILTIN_PUNPCKHDQ128,
17654   IX86_BUILTIN_PUNPCKHQDQ128,
17655   IX86_BUILTIN_PUNPCKLBW128,
17656   IX86_BUILTIN_PUNPCKLWD128,
17657   IX86_BUILTIN_PUNPCKLDQ128,
17658   IX86_BUILTIN_PUNPCKLQDQ128,
17659
17660   IX86_BUILTIN_CLFLUSH,
17661   IX86_BUILTIN_MFENCE,
17662   IX86_BUILTIN_LFENCE,
17663
17664   /* SSE3.  */
17665   IX86_BUILTIN_ADDSUBPS,
17666   IX86_BUILTIN_HADDPS,
17667   IX86_BUILTIN_HSUBPS,
17668   IX86_BUILTIN_MOVSHDUP,
17669   IX86_BUILTIN_MOVSLDUP,
17670   IX86_BUILTIN_ADDSUBPD,
17671   IX86_BUILTIN_HADDPD,
17672   IX86_BUILTIN_HSUBPD,
17673   IX86_BUILTIN_LDDQU,
17674
17675   IX86_BUILTIN_MONITOR,
17676   IX86_BUILTIN_MWAIT,
17677
17678   /* SSSE3.  */
17679   IX86_BUILTIN_PHADDW,
17680   IX86_BUILTIN_PHADDD,
17681   IX86_BUILTIN_PHADDSW,
17682   IX86_BUILTIN_PHSUBW,
17683   IX86_BUILTIN_PHSUBD,
17684   IX86_BUILTIN_PHSUBSW,
17685   IX86_BUILTIN_PMADDUBSW,
17686   IX86_BUILTIN_PMULHRSW,
17687   IX86_BUILTIN_PSHUFB,
17688   IX86_BUILTIN_PSIGNB,
17689   IX86_BUILTIN_PSIGNW,
17690   IX86_BUILTIN_PSIGND,
17691   IX86_BUILTIN_PALIGNR,
17692   IX86_BUILTIN_PABSB,
17693   IX86_BUILTIN_PABSW,
17694   IX86_BUILTIN_PABSD,
17695
17696   IX86_BUILTIN_PHADDW128,
17697   IX86_BUILTIN_PHADDD128,
17698   IX86_BUILTIN_PHADDSW128,
17699   IX86_BUILTIN_PHSUBW128,
17700   IX86_BUILTIN_PHSUBD128,
17701   IX86_BUILTIN_PHSUBSW128,
17702   IX86_BUILTIN_PMADDUBSW128,
17703   IX86_BUILTIN_PMULHRSW128,
17704   IX86_BUILTIN_PSHUFB128,
17705   IX86_BUILTIN_PSIGNB128,
17706   IX86_BUILTIN_PSIGNW128,
17707   IX86_BUILTIN_PSIGND128,
17708   IX86_BUILTIN_PALIGNR128,
17709   IX86_BUILTIN_PABSB128,
17710   IX86_BUILTIN_PABSW128,
17711   IX86_BUILTIN_PABSD128,
17712
17713   /* AMDFAM10 - SSE4A New Instructions.  */
17714   IX86_BUILTIN_MOVNTSD,
17715   IX86_BUILTIN_MOVNTSS,
17716   IX86_BUILTIN_EXTRQI,
17717   IX86_BUILTIN_EXTRQ,
17718   IX86_BUILTIN_INSERTQI,
17719   IX86_BUILTIN_INSERTQ,
17720
17721   /* SSE4.1.  */
17722   IX86_BUILTIN_BLENDPD,
17723   IX86_BUILTIN_BLENDPS,
17724   IX86_BUILTIN_BLENDVPD,
17725   IX86_BUILTIN_BLENDVPS,
17726   IX86_BUILTIN_PBLENDVB128,
17727   IX86_BUILTIN_PBLENDW128,
17728
17729   IX86_BUILTIN_DPPD,
17730   IX86_BUILTIN_DPPS,
17731
17732   IX86_BUILTIN_INSERTPS128,
17733
17734   IX86_BUILTIN_MOVNTDQA,
17735   IX86_BUILTIN_MPSADBW128,
17736   IX86_BUILTIN_PACKUSDW128,
17737   IX86_BUILTIN_PCMPEQQ,
17738   IX86_BUILTIN_PHMINPOSUW128,
17739
17740   IX86_BUILTIN_PMAXSB128,
17741   IX86_BUILTIN_PMAXSD128,
17742   IX86_BUILTIN_PMAXUD128,
17743   IX86_BUILTIN_PMAXUW128,
17744
17745   IX86_BUILTIN_PMINSB128,
17746   IX86_BUILTIN_PMINSD128,
17747   IX86_BUILTIN_PMINUD128,
17748   IX86_BUILTIN_PMINUW128,
17749
17750   IX86_BUILTIN_PMOVSXBW128,
17751   IX86_BUILTIN_PMOVSXBD128,
17752   IX86_BUILTIN_PMOVSXBQ128,
17753   IX86_BUILTIN_PMOVSXWD128,
17754   IX86_BUILTIN_PMOVSXWQ128,
17755   IX86_BUILTIN_PMOVSXDQ128,
17756
17757   IX86_BUILTIN_PMOVZXBW128,
17758   IX86_BUILTIN_PMOVZXBD128,
17759   IX86_BUILTIN_PMOVZXBQ128,
17760   IX86_BUILTIN_PMOVZXWD128,
17761   IX86_BUILTIN_PMOVZXWQ128,
17762   IX86_BUILTIN_PMOVZXDQ128,
17763
17764   IX86_BUILTIN_PMULDQ128,
17765   IX86_BUILTIN_PMULLD128,
17766
17767   IX86_BUILTIN_ROUNDPD,
17768   IX86_BUILTIN_ROUNDPS,
17769   IX86_BUILTIN_ROUNDSD,
17770   IX86_BUILTIN_ROUNDSS,
17771
17772   IX86_BUILTIN_PTESTZ,
17773   IX86_BUILTIN_PTESTC,
17774   IX86_BUILTIN_PTESTNZC,
17775
17776   IX86_BUILTIN_VEC_INIT_V2SI,
17777   IX86_BUILTIN_VEC_INIT_V4HI,
17778   IX86_BUILTIN_VEC_INIT_V8QI,
17779   IX86_BUILTIN_VEC_EXT_V2DF,
17780   IX86_BUILTIN_VEC_EXT_V2DI,
17781   IX86_BUILTIN_VEC_EXT_V4SF,
17782   IX86_BUILTIN_VEC_EXT_V4SI,
17783   IX86_BUILTIN_VEC_EXT_V8HI,
17784   IX86_BUILTIN_VEC_EXT_V2SI,
17785   IX86_BUILTIN_VEC_EXT_V4HI,
17786   IX86_BUILTIN_VEC_EXT_V16QI,
17787   IX86_BUILTIN_VEC_SET_V2DI,
17788   IX86_BUILTIN_VEC_SET_V4SF,
17789   IX86_BUILTIN_VEC_SET_V4SI,
17790   IX86_BUILTIN_VEC_SET_V8HI,
17791   IX86_BUILTIN_VEC_SET_V4HI,
17792   IX86_BUILTIN_VEC_SET_V16QI,
17793
17794   IX86_BUILTIN_VEC_PACK_SFIX,
17795
17796   /* SSE4.2.  */
17797   IX86_BUILTIN_CRC32QI,
17798   IX86_BUILTIN_CRC32HI,
17799   IX86_BUILTIN_CRC32SI,
17800   IX86_BUILTIN_CRC32DI,
17801
17802   IX86_BUILTIN_PCMPESTRI128,
17803   IX86_BUILTIN_PCMPESTRM128,
17804   IX86_BUILTIN_PCMPESTRA128,
17805   IX86_BUILTIN_PCMPESTRC128,
17806   IX86_BUILTIN_PCMPESTRO128,
17807   IX86_BUILTIN_PCMPESTRS128,
17808   IX86_BUILTIN_PCMPESTRZ128,
17809   IX86_BUILTIN_PCMPISTRI128,
17810   IX86_BUILTIN_PCMPISTRM128,
17811   IX86_BUILTIN_PCMPISTRA128,
17812   IX86_BUILTIN_PCMPISTRC128,
17813   IX86_BUILTIN_PCMPISTRO128,
17814   IX86_BUILTIN_PCMPISTRS128,
17815   IX86_BUILTIN_PCMPISTRZ128,
17816
17817   IX86_BUILTIN_PCMPGTQ,
17818
17819   /* AES instructions */
17820   IX86_BUILTIN_AESENC128,
17821   IX86_BUILTIN_AESENCLAST128,
17822   IX86_BUILTIN_AESDEC128,
17823   IX86_BUILTIN_AESDECLAST128,
17824   IX86_BUILTIN_AESIMC128,
17825   IX86_BUILTIN_AESKEYGENASSIST128,
17826
17827   /* PCLMUL instruction */
17828   IX86_BUILTIN_PCLMULQDQ128,
17829
17830   /* TFmode support builtins.  */
17831   IX86_BUILTIN_INFQ,
17832   IX86_BUILTIN_FABSQ,
17833   IX86_BUILTIN_COPYSIGNQ,
17834
17835   /* SSE5 instructions */
17836   IX86_BUILTIN_FMADDSS,
17837   IX86_BUILTIN_FMADDSD,
17838   IX86_BUILTIN_FMADDPS,
17839   IX86_BUILTIN_FMADDPD,
17840   IX86_BUILTIN_FMSUBSS,
17841   IX86_BUILTIN_FMSUBSD,
17842   IX86_BUILTIN_FMSUBPS,
17843   IX86_BUILTIN_FMSUBPD,
17844   IX86_BUILTIN_FNMADDSS,
17845   IX86_BUILTIN_FNMADDSD,
17846   IX86_BUILTIN_FNMADDPS,
17847   IX86_BUILTIN_FNMADDPD,
17848   IX86_BUILTIN_FNMSUBSS,
17849   IX86_BUILTIN_FNMSUBSD,
17850   IX86_BUILTIN_FNMSUBPS,
17851   IX86_BUILTIN_FNMSUBPD,
17852   IX86_BUILTIN_PCMOV_V2DI,
17853   IX86_BUILTIN_PCMOV_V4SI,
17854   IX86_BUILTIN_PCMOV_V8HI,
17855   IX86_BUILTIN_PCMOV_V16QI,
17856   IX86_BUILTIN_PCMOV_V4SF,
17857   IX86_BUILTIN_PCMOV_V2DF,
17858   IX86_BUILTIN_PPERM,
17859   IX86_BUILTIN_PERMPS,
17860   IX86_BUILTIN_PERMPD,
17861   IX86_BUILTIN_PMACSSWW,
17862   IX86_BUILTIN_PMACSWW,
17863   IX86_BUILTIN_PMACSSWD,
17864   IX86_BUILTIN_PMACSWD,
17865   IX86_BUILTIN_PMACSSDD,
17866   IX86_BUILTIN_PMACSDD,
17867   IX86_BUILTIN_PMACSSDQL,
17868   IX86_BUILTIN_PMACSSDQH,
17869   IX86_BUILTIN_PMACSDQL,
17870   IX86_BUILTIN_PMACSDQH,
17871   IX86_BUILTIN_PMADCSSWD,
17872   IX86_BUILTIN_PMADCSWD,
17873   IX86_BUILTIN_PHADDBW,
17874   IX86_BUILTIN_PHADDBD,
17875   IX86_BUILTIN_PHADDBQ,
17876   IX86_BUILTIN_PHADDWD,
17877   IX86_BUILTIN_PHADDWQ,
17878   IX86_BUILTIN_PHADDDQ,
17879   IX86_BUILTIN_PHADDUBW,
17880   IX86_BUILTIN_PHADDUBD,
17881   IX86_BUILTIN_PHADDUBQ,
17882   IX86_BUILTIN_PHADDUWD,
17883   IX86_BUILTIN_PHADDUWQ,
17884   IX86_BUILTIN_PHADDUDQ,
17885   IX86_BUILTIN_PHSUBBW,
17886   IX86_BUILTIN_PHSUBWD,
17887   IX86_BUILTIN_PHSUBDQ,
17888   IX86_BUILTIN_PROTB,
17889   IX86_BUILTIN_PROTW,
17890   IX86_BUILTIN_PROTD,
17891   IX86_BUILTIN_PROTQ,
17892   IX86_BUILTIN_PROTB_IMM,
17893   IX86_BUILTIN_PROTW_IMM,
17894   IX86_BUILTIN_PROTD_IMM,
17895   IX86_BUILTIN_PROTQ_IMM,
17896   IX86_BUILTIN_PSHLB,
17897   IX86_BUILTIN_PSHLW,
17898   IX86_BUILTIN_PSHLD,
17899   IX86_BUILTIN_PSHLQ,
17900   IX86_BUILTIN_PSHAB,
17901   IX86_BUILTIN_PSHAW,
17902   IX86_BUILTIN_PSHAD,
17903   IX86_BUILTIN_PSHAQ,
17904   IX86_BUILTIN_FRCZSS,
17905   IX86_BUILTIN_FRCZSD,
17906   IX86_BUILTIN_FRCZPS,
17907   IX86_BUILTIN_FRCZPD,
17908   IX86_BUILTIN_CVTPH2PS,
17909   IX86_BUILTIN_CVTPS2PH,
17910
17911   IX86_BUILTIN_COMEQSS,
17912   IX86_BUILTIN_COMNESS,
17913   IX86_BUILTIN_COMLTSS,
17914   IX86_BUILTIN_COMLESS,
17915   IX86_BUILTIN_COMGTSS,
17916   IX86_BUILTIN_COMGESS,
17917   IX86_BUILTIN_COMUEQSS,
17918   IX86_BUILTIN_COMUNESS,
17919   IX86_BUILTIN_COMULTSS,
17920   IX86_BUILTIN_COMULESS,
17921   IX86_BUILTIN_COMUGTSS,
17922   IX86_BUILTIN_COMUGESS,
17923   IX86_BUILTIN_COMORDSS,
17924   IX86_BUILTIN_COMUNORDSS,
17925   IX86_BUILTIN_COMFALSESS,
17926   IX86_BUILTIN_COMTRUESS,
17927
17928   IX86_BUILTIN_COMEQSD,
17929   IX86_BUILTIN_COMNESD,
17930   IX86_BUILTIN_COMLTSD,
17931   IX86_BUILTIN_COMLESD,
17932   IX86_BUILTIN_COMGTSD,
17933   IX86_BUILTIN_COMGESD,
17934   IX86_BUILTIN_COMUEQSD,
17935   IX86_BUILTIN_COMUNESD,
17936   IX86_BUILTIN_COMULTSD,
17937   IX86_BUILTIN_COMULESD,
17938   IX86_BUILTIN_COMUGTSD,
17939   IX86_BUILTIN_COMUGESD,
17940   IX86_BUILTIN_COMORDSD,
17941   IX86_BUILTIN_COMUNORDSD,
17942   IX86_BUILTIN_COMFALSESD,
17943   IX86_BUILTIN_COMTRUESD,
17944
17945   IX86_BUILTIN_COMEQPS,
17946   IX86_BUILTIN_COMNEPS,
17947   IX86_BUILTIN_COMLTPS,
17948   IX86_BUILTIN_COMLEPS,
17949   IX86_BUILTIN_COMGTPS,
17950   IX86_BUILTIN_COMGEPS,
17951   IX86_BUILTIN_COMUEQPS,
17952   IX86_BUILTIN_COMUNEPS,
17953   IX86_BUILTIN_COMULTPS,
17954   IX86_BUILTIN_COMULEPS,
17955   IX86_BUILTIN_COMUGTPS,
17956   IX86_BUILTIN_COMUGEPS,
17957   IX86_BUILTIN_COMORDPS,
17958   IX86_BUILTIN_COMUNORDPS,
17959   IX86_BUILTIN_COMFALSEPS,
17960   IX86_BUILTIN_COMTRUEPS,
17961
17962   IX86_BUILTIN_COMEQPD,
17963   IX86_BUILTIN_COMNEPD,
17964   IX86_BUILTIN_COMLTPD,
17965   IX86_BUILTIN_COMLEPD,
17966   IX86_BUILTIN_COMGTPD,
17967   IX86_BUILTIN_COMGEPD,
17968   IX86_BUILTIN_COMUEQPD,
17969   IX86_BUILTIN_COMUNEPD,
17970   IX86_BUILTIN_COMULTPD,
17971   IX86_BUILTIN_COMULEPD,
17972   IX86_BUILTIN_COMUGTPD,
17973   IX86_BUILTIN_COMUGEPD,
17974   IX86_BUILTIN_COMORDPD,
17975   IX86_BUILTIN_COMUNORDPD,
17976   IX86_BUILTIN_COMFALSEPD,
17977   IX86_BUILTIN_COMTRUEPD,
17978
17979   IX86_BUILTIN_PCOMEQUB,
17980   IX86_BUILTIN_PCOMNEUB,
17981   IX86_BUILTIN_PCOMLTUB,
17982   IX86_BUILTIN_PCOMLEUB,
17983   IX86_BUILTIN_PCOMGTUB,
17984   IX86_BUILTIN_PCOMGEUB,
17985   IX86_BUILTIN_PCOMFALSEUB,
17986   IX86_BUILTIN_PCOMTRUEUB,
17987   IX86_BUILTIN_PCOMEQUW,
17988   IX86_BUILTIN_PCOMNEUW,
17989   IX86_BUILTIN_PCOMLTUW,
17990   IX86_BUILTIN_PCOMLEUW,
17991   IX86_BUILTIN_PCOMGTUW,
17992   IX86_BUILTIN_PCOMGEUW,
17993   IX86_BUILTIN_PCOMFALSEUW,
17994   IX86_BUILTIN_PCOMTRUEUW,
17995   IX86_BUILTIN_PCOMEQUD,
17996   IX86_BUILTIN_PCOMNEUD,
17997   IX86_BUILTIN_PCOMLTUD,
17998   IX86_BUILTIN_PCOMLEUD,
17999   IX86_BUILTIN_PCOMGTUD,
18000   IX86_BUILTIN_PCOMGEUD,
18001   IX86_BUILTIN_PCOMFALSEUD,
18002   IX86_BUILTIN_PCOMTRUEUD,
18003   IX86_BUILTIN_PCOMEQUQ,
18004   IX86_BUILTIN_PCOMNEUQ,
18005   IX86_BUILTIN_PCOMLTUQ,
18006   IX86_BUILTIN_PCOMLEUQ,
18007   IX86_BUILTIN_PCOMGTUQ,
18008   IX86_BUILTIN_PCOMGEUQ,
18009   IX86_BUILTIN_PCOMFALSEUQ,
18010   IX86_BUILTIN_PCOMTRUEUQ,
18011
18012   IX86_BUILTIN_PCOMEQB,
18013   IX86_BUILTIN_PCOMNEB,
18014   IX86_BUILTIN_PCOMLTB,
18015   IX86_BUILTIN_PCOMLEB,
18016   IX86_BUILTIN_PCOMGTB,
18017   IX86_BUILTIN_PCOMGEB,
18018   IX86_BUILTIN_PCOMFALSEB,
18019   IX86_BUILTIN_PCOMTRUEB,
18020   IX86_BUILTIN_PCOMEQW,
18021   IX86_BUILTIN_PCOMNEW,
18022   IX86_BUILTIN_PCOMLTW,
18023   IX86_BUILTIN_PCOMLEW,
18024   IX86_BUILTIN_PCOMGTW,
18025   IX86_BUILTIN_PCOMGEW,
18026   IX86_BUILTIN_PCOMFALSEW,
18027   IX86_BUILTIN_PCOMTRUEW,
18028   IX86_BUILTIN_PCOMEQD,
18029   IX86_BUILTIN_PCOMNED,
18030   IX86_BUILTIN_PCOMLTD,
18031   IX86_BUILTIN_PCOMLED,
18032   IX86_BUILTIN_PCOMGTD,
18033   IX86_BUILTIN_PCOMGED,
18034   IX86_BUILTIN_PCOMFALSED,
18035   IX86_BUILTIN_PCOMTRUED,
18036   IX86_BUILTIN_PCOMEQQ,
18037   IX86_BUILTIN_PCOMNEQ,
18038   IX86_BUILTIN_PCOMLTQ,
18039   IX86_BUILTIN_PCOMLEQ,
18040   IX86_BUILTIN_PCOMGTQ,
18041   IX86_BUILTIN_PCOMGEQ,
18042   IX86_BUILTIN_PCOMFALSEQ,
18043   IX86_BUILTIN_PCOMTRUEQ,
18044
18045   IX86_BUILTIN_MAX
18046 };
18047
18048 /* Table for the ix86 builtin decls.  */
18049 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
18050
18051 /* Add an ix86 target builtin function with CODE, NAME and TYPE.  Do so,
18052  * if the target_flags include one of MASK.  Stores the function decl
18053  * in the ix86_builtins array.
18054  * Returns the function decl or NULL_TREE, if the builtin was not added.  */
18055
18056 static inline tree
18057 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
18058 {
18059   tree decl = NULL_TREE;
18060
18061   if (mask & ix86_isa_flags
18062       && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
18063     {
18064       decl = add_builtin_function (name, type, code, BUILT_IN_MD,
18065                                    NULL, NULL_TREE);
18066       ix86_builtins[(int) code] = decl;
18067     }
18068
18069   return decl;
18070 }
18071
18072 /* Like def_builtin, but also marks the function decl "const".  */
18073
18074 static inline tree
18075 def_builtin_const (int mask, const char *name, tree type,
18076                    enum ix86_builtins code)
18077 {
18078   tree decl = def_builtin (mask, name, type, code);
18079   if (decl)
18080     TREE_READONLY (decl) = 1;
18081   return decl;
18082 }
18083
18084 /* Bits for builtin_description.flag.  */
18085
18086 /* Set when we don't support the comparison natively, and should
18087    swap_comparison in order to support it.  */
18088 #define BUILTIN_DESC_SWAP_OPERANDS      1
18089
18090 struct builtin_description
18091 {
18092   const unsigned int mask;
18093   const enum insn_code icode;
18094   const char *const name;
18095   const enum ix86_builtins code;
18096   const enum rtx_code comparison;
18097   const int flag;
18098 };
18099
18100 static const struct builtin_description bdesc_comi[] =
18101 {
18102   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
18103   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
18104   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
18105   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
18106   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
18107   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
18108   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
18109   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
18110   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
18111   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
18112   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
18113   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
18114   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
18115   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
18116   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
18117   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
18118   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
18119   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
18120   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
18121   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
18122   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
18123   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
18124   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
18125   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
18126 };
18127
18128 static const struct builtin_description bdesc_pcmpestr[] =
18129 {
18130   /* SSE4.2 */
18131   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
18132   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
18133   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
18134   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
18135   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
18136   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
18137   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
18138 };
18139
18140 static const struct builtin_description bdesc_pcmpistr[] =
18141 {
18142   /* SSE4.2 */
18143   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
18144   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
18145   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
18146   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
18147   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
18148   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
18149   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
18150 };
18151
18152 /* Special builtin types */
18153 enum ix86_special_builtin_type
18154 {
18155   SPECIAL_FTYPE_UNKNOWN,
18156   VOID_FTYPE_VOID,
18157   V16QI_FTYPE_PCCHAR,
18158   V4SF_FTYPE_PCFLOAT,
18159   V2DF_FTYPE_PCDOUBLE,
18160   V4SF_FTYPE_V4SF_PCV2SF,
18161   V2DF_FTYPE_V2DF_PCDOUBLE,
18162   V2DI_FTYPE_PV2DI,
18163   VOID_FTYPE_PV2SF_V4SF,
18164   VOID_FTYPE_PV2DI_V2DI,
18165   VOID_FTYPE_PCHAR_V16QI,
18166   VOID_FTYPE_PFLOAT_V4SF,
18167   VOID_FTYPE_PDOUBLE_V2DF,
18168   VOID_FTYPE_PDI_DI,
18169   VOID_FTYPE_PINT_INT
18170 };
18171
18172 /* Builtin types */
18173 enum ix86_builtin_type
18174 {
18175   FTYPE_UNKNOWN,
18176   FLOAT128_FTYPE_FLOAT128,
18177   FLOAT_FTYPE_FLOAT,
18178   FLOAT128_FTYPE_FLOAT128_FLOAT128,
18179   INT_FTYPE_V2DI_V2DI_PTEST,
18180   INT64_FTYPE_V4SF,
18181   INT64_FTYPE_V2DF,
18182   INT_FTYPE_V16QI,
18183   INT_FTYPE_V8QI,
18184   INT_FTYPE_V4SF,
18185   INT_FTYPE_V2DF,
18186   V16QI_FTYPE_V16QI,
18187   V8HI_FTYPE_V8HI,
18188   V8HI_FTYPE_V16QI,
18189   V8QI_FTYPE_V8QI,
18190   V4SI_FTYPE_V4SI,
18191   V4SI_FTYPE_V16QI,
18192   V4SI_FTYPE_V8HI,
18193   V4SI_FTYPE_V4SF,
18194   V4SI_FTYPE_V2DF,
18195   V4HI_FTYPE_V4HI,
18196   V4SF_FTYPE_V4SF,
18197   V4SF_FTYPE_V4SF_VEC_MERGE,
18198   V4SF_FTYPE_V4SI,
18199   V4SF_FTYPE_V2DF,
18200   V2DI_FTYPE_V2DI,
18201   V2DI_FTYPE_V16QI,
18202   V2DI_FTYPE_V8HI,
18203   V2DI_FTYPE_V4SI,
18204   V2DF_FTYPE_V2DF,
18205   V2DF_FTYPE_V2DF_VEC_MERGE,
18206   V2DF_FTYPE_V4SI,
18207   V2DF_FTYPE_V4SF,
18208   V2DF_FTYPE_V2SI,
18209   V2SI_FTYPE_V2SI,
18210   V2SI_FTYPE_V4SF,
18211   V2SI_FTYPE_V2SF,
18212   V2SI_FTYPE_V2DF,
18213   V2SF_FTYPE_V2SF,
18214   V2SF_FTYPE_V2SI,
18215   V16QI_FTYPE_V16QI_V16QI,
18216   V16QI_FTYPE_V8HI_V8HI,
18217   V8QI_FTYPE_V8QI_V8QI,
18218   V8QI_FTYPE_V4HI_V4HI,
18219   V8HI_FTYPE_V8HI_V8HI,
18220   V8HI_FTYPE_V8HI_V8HI_COUNT,
18221   V8HI_FTYPE_V16QI_V16QI,
18222   V8HI_FTYPE_V4SI_V4SI,
18223   V8HI_FTYPE_V8HI_SI_COUNT,
18224   V4SI_FTYPE_V4SI_V4SI,
18225   V4SI_FTYPE_V4SI_V4SI_COUNT,
18226   V4SI_FTYPE_V8HI_V8HI,
18227   V4SI_FTYPE_V4SF_V4SF,
18228   V4SI_FTYPE_V2DF_V2DF,
18229   V4SI_FTYPE_V4SI_SI_COUNT,
18230   V4HI_FTYPE_V4HI_V4HI,
18231   V4HI_FTYPE_V4HI_V4HI_COUNT,
18232   V4HI_FTYPE_V8QI_V8QI,
18233   V4HI_FTYPE_V2SI_V2SI,
18234   V4HI_FTYPE_V4HI_SI_COUNT,
18235   V4SF_FTYPE_V4SF_V4SF,
18236   V4SF_FTYPE_V4SF_V4SF_SWAP,
18237   V4SF_FTYPE_V4SF_V2SI,
18238   V4SF_FTYPE_V4SF_V2DF,
18239   V4SF_FTYPE_V4SF_DI,
18240   V4SF_FTYPE_V4SF_SI,
18241   V2DI_FTYPE_V2DI_V2DI,
18242   V2DI_FTYPE_V2DI_V2DI_COUNT,
18243   V2DI_FTYPE_V16QI_V16QI,
18244   V2DI_FTYPE_V4SI_V4SI,
18245   V2DI_FTYPE_V2DI_V16QI,
18246   V2DI_FTYPE_V2DF_V2DF,
18247   V2DI_FTYPE_V2DI_SI_COUNT,
18248   V2SI_FTYPE_V2SI_V2SI,
18249   V2SI_FTYPE_V2SI_V2SI_COUNT,
18250   V2SI_FTYPE_V4HI_V4HI,
18251   V2SI_FTYPE_V2SF_V2SF,
18252   V2SI_FTYPE_V2SI_SI_COUNT,
18253   V2DF_FTYPE_V2DF_V2DF,
18254   V2DF_FTYPE_V2DF_V2DF_SWAP,
18255   V2DF_FTYPE_V2DF_V4SF,
18256   V2DF_FTYPE_V2DF_DI,
18257   V2DF_FTYPE_V2DF_SI,
18258   V2SF_FTYPE_V2SF_V2SF,
18259   V1DI_FTYPE_V1DI_V1DI,
18260   V1DI_FTYPE_V1DI_V1DI_COUNT,
18261   V1DI_FTYPE_V8QI_V8QI,
18262   V1DI_FTYPE_V2SI_V2SI,
18263   V1DI_FTYPE_V1DI_SI_COUNT,
18264   UINT64_FTYPE_UINT64_UINT64,
18265   UINT_FTYPE_UINT_UINT,
18266   UINT_FTYPE_UINT_USHORT,
18267   UINT_FTYPE_UINT_UCHAR,
18268   V8HI_FTYPE_V8HI_INT,
18269   V4SI_FTYPE_V4SI_INT,
18270   V4HI_FTYPE_V4HI_INT,
18271   V4SF_FTYPE_V4SF_INT,
18272   V2DI_FTYPE_V2DI_INT,
18273   V2DI2TI_FTYPE_V2DI_INT,
18274   V2DF_FTYPE_V2DF_INT,
18275   V16QI_FTYPE_V16QI_V16QI_V16QI,
18276   V4SF_FTYPE_V4SF_V4SF_V4SF,
18277   V2DF_FTYPE_V2DF_V2DF_V2DF,
18278   V16QI_FTYPE_V16QI_V16QI_INT,
18279   V8HI_FTYPE_V8HI_V8HI_INT,
18280   V4SI_FTYPE_V4SI_V4SI_INT,
18281   V4SF_FTYPE_V4SF_V4SF_INT,
18282   V2DI_FTYPE_V2DI_V2DI_INT,
18283   V2DI2TI_FTYPE_V2DI_V2DI_INT,
18284   V1DI2DI_FTYPE_V1DI_V1DI_INT,
18285   V2DF_FTYPE_V2DF_V2DF_INT,
18286   V2DI_FTYPE_V2DI_UINT_UINT,
18287   V2DI_FTYPE_V2DI_V2DI_UINT_UINT
18288 };
18289
18290 /* Special builtins with variable number of arguments.  */
18291 static const struct builtin_description bdesc_special_args[] =
18292 {
18293   /* MMX */
18294   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
18295
18296   /* 3DNow! */
18297   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
18298
18299   /* SSE */
18300   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18301   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18302   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
18303
18304   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
18305   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
18306   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
18307   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
18308
18309   /* SSE or 3DNow!A  */
18310   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18311   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
18312
18313   /* SSE2 */
18314   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18315   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18316   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18317   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
18318   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18319   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
18320   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
18321   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
18322   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
18323
18324   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
18325   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
18326
18327   /* SSE3 */
18328   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
18329
18330   /* SSE4.1 */
18331   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
18332
18333   /* SSE4A */
18334   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18335   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18336 };
18337
18338 /* Builtins with variable number of arguments.  */
18339 static const struct builtin_description bdesc_args[] =
18340 {
18341   /* MMX */
18342   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18343   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18344   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18345   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18346   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18347   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18348
18349   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18350   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18351   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18352   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18353   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18354   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18355   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18356   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18357
18358   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18359   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18360
18361   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18362   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18363   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18364   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18365
18366   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18367   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18368   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18369   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18370   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18371   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18372
18373   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18374   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18375   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18376   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18377   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
18378   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
18379
18380   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
18381   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
18382   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
18383
18384   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
18385
18386   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18387   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18388   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
18389   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18390   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18391   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
18392
18393   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18394   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18395   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
18396   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18397   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18398   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
18399
18400   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18401   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18402   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18403   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18404
18405   /* 3DNow! */
18406   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
18407   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
18408   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18409   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18410
18411   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18412   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18413   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18414   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18415   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18416   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18417   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18418   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18419   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18420   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18421   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18422   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18423   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18424   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18425   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18426
18427   /* 3DNow!A */
18428   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
18429   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
18430   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
18431   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18432   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18433   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18434
18435   /* SSE */
18436   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
18437   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18438   { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18439   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18440   { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18441   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18442   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
18443   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
18444   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
18445   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
18446   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
18447   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
18448
18449   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18450
18451   { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18452   { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18453   { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18454   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18455   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18456   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18457   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18458   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18459
18460   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
18461   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
18462   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
18463   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18464   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18465   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18466   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
18467   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
18468   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
18469   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18470   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
18471   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18472   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
18473   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
18474   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
18475   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18476   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
18477   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
18478   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
18479   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18480   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18481   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18482
18483   { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18484   { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18485   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18486   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18487
18488   { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18489   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18490   { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18491   { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18492
18493   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18494   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18495   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18496   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18497   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18498
18499   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
18500   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
18501   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
18502
18503   { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
18504
18505   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18506   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18507   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18508
18509   /* SSE MMX or 3Dnow!A */
18510   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18511   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18512   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18513
18514   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18515   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18516   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18517   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18518
18519   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
18520   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
18521
18522   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
18523
18524   /* SSE2 */
18525   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18526
18527   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF  },
18528   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
18529   { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
18530   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
18531   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
18532
18533   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
18534   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
18535   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
18536   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
18537   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
18538
18539   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
18540
18541   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
18542   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
18543   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
18544   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
18545
18546   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
18547   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
18548   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
18549
18550   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18551   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18552   { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18553   { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18554   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18555   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18556   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18557   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18558
18559   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
18560   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
18561   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
18562   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18563   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
18564   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18565   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
18566   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
18567   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
18568   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18569   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18570   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18571   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
18572   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
18573   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
18574   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18575   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
18576   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
18577   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
18578   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18579
18580   { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18581   { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18582   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18583   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18584
18585   { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18586   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18587   { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18588   { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18589
18590   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18591   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18592   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18593
18594   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
18595
18596   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18597   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18598   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18599   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18600   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18601   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18602   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18603   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18604
18605   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18606   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18607   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18608   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18609   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18610   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18611   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18612   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18613
18614   { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18615   { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
18616
18617   { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18618   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18619   { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18620   { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18621
18622   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18623   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18624
18625   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18626   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18627   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI  },
18628   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18629   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18630   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI  },
18631
18632   { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18633   { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18634   { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18635   { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18636
18637   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18638   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI  },
18639   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN,  (int) V4SI_FTYPE_V4SI_V4SI },
18640   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18641   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18642   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18643   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18644   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18645
18646   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
18647   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
18648   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
18649
18650   { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18651   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
18652
18653   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
18654   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
18655
18656   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
18657
18658   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
18659   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
18660   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
18661   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
18662
18663   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
18664   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18665   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18666   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
18667   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18668   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18669   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
18670
18671   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
18672   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18673   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18674   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
18675   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18676   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18677   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
18678
18679   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18680   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18681   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18682   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18683
18684   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
18685   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
18686   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
18687
18688   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
18689
18690   /* SSE2 MMX */
18691   { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
18692   { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
18693
18694   /* SSE3 */
18695   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
18696   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18697
18698   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18699   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18700   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18701   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18702   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18703   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18704
18705   /* SSSE3 */
18706   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
18707   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
18708   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
18709   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
18710   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
18711   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
18712
18713   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18714   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18715   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18716   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18717   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18718   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18719   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18720   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18721   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18722   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18723   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18724   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18725   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
18726   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
18727   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18728   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18729   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18730   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18731   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18732   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18733   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18734   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18735   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18736   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18737
18738   /* SSSE3.  */
18739   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
18740   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
18741
18742   /* SSE4.1 */
18743   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18744   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18745   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
18746   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
18747   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18748   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18749   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18750   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
18751   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
18752   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
18753
18754   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
18755   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
18756   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
18757   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
18758   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
18759   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
18760   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
18761   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
18762   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
18763   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
18764   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
18765   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
18766   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
18767
18768   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
18769   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18770   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18771   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18772   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18773   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18774   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18775   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18776   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18777   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18778   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
18779   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18780
18781   /* SSE4.1 and SSE5 */
18782   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
18783   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
18784   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18785   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18786
18787   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18788   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18789   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18790
18791   /* SSE4.2 */
18792   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18793   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
18794   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
18795   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
18796   { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
18797
18798   /* SSE4A */
18799   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
18800   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
18801   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
18802   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18803
18804   /* AES */
18805   { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
18806   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
18807
18808   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18809   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18810   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18811   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18812
18813   /* PCLMUL */
18814   { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
18815
18816    /* 64bit */
18817   { OPTION_MASK_ISA_64BIT, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
18818   { OPTION_MASK_ISA_64BIT, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
18819 };
18820
18821 /* SSE5 */
18822 enum multi_arg_type {
18823   MULTI_ARG_UNKNOWN,
18824   MULTI_ARG_3_SF,
18825   MULTI_ARG_3_DF,
18826   MULTI_ARG_3_DI,
18827   MULTI_ARG_3_SI,
18828   MULTI_ARG_3_SI_DI,
18829   MULTI_ARG_3_HI,
18830   MULTI_ARG_3_HI_SI,
18831   MULTI_ARG_3_QI,
18832   MULTI_ARG_3_PERMPS,
18833   MULTI_ARG_3_PERMPD,
18834   MULTI_ARG_2_SF,
18835   MULTI_ARG_2_DF,
18836   MULTI_ARG_2_DI,
18837   MULTI_ARG_2_SI,
18838   MULTI_ARG_2_HI,
18839   MULTI_ARG_2_QI,
18840   MULTI_ARG_2_DI_IMM,
18841   MULTI_ARG_2_SI_IMM,
18842   MULTI_ARG_2_HI_IMM,
18843   MULTI_ARG_2_QI_IMM,
18844   MULTI_ARG_2_SF_CMP,
18845   MULTI_ARG_2_DF_CMP,
18846   MULTI_ARG_2_DI_CMP,
18847   MULTI_ARG_2_SI_CMP,
18848   MULTI_ARG_2_HI_CMP,
18849   MULTI_ARG_2_QI_CMP,
18850   MULTI_ARG_2_DI_TF,
18851   MULTI_ARG_2_SI_TF,
18852   MULTI_ARG_2_HI_TF,
18853   MULTI_ARG_2_QI_TF,
18854   MULTI_ARG_2_SF_TF,
18855   MULTI_ARG_2_DF_TF,
18856   MULTI_ARG_1_SF,
18857   MULTI_ARG_1_DF,
18858   MULTI_ARG_1_DI,
18859   MULTI_ARG_1_SI,
18860   MULTI_ARG_1_HI,
18861   MULTI_ARG_1_QI,
18862   MULTI_ARG_1_SI_DI,
18863   MULTI_ARG_1_HI_DI,
18864   MULTI_ARG_1_HI_SI,
18865   MULTI_ARG_1_QI_DI,
18866   MULTI_ARG_1_QI_SI,
18867   MULTI_ARG_1_QI_HI,
18868   MULTI_ARG_1_PH2PS,
18869   MULTI_ARG_1_PS2PH
18870 };
18871
18872 static const struct builtin_description bdesc_multi_arg[] =
18873 {
18874   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4,     "__builtin_ia32_fmaddss",    IX86_BUILTIN_FMADDSS,    0,            (int)MULTI_ARG_3_SF },
18875   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4,     "__builtin_ia32_fmaddsd",    IX86_BUILTIN_FMADDSD,    0,            (int)MULTI_ARG_3_DF },
18876   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4,       "__builtin_ia32_fmaddps",    IX86_BUILTIN_FMADDPS,    0,            (int)MULTI_ARG_3_SF },
18877   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4,       "__builtin_ia32_fmaddpd",    IX86_BUILTIN_FMADDPD,    0,            (int)MULTI_ARG_3_DF },
18878   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4,     "__builtin_ia32_fmsubss",    IX86_BUILTIN_FMSUBSS,    0,            (int)MULTI_ARG_3_SF },
18879   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4,     "__builtin_ia32_fmsubsd",    IX86_BUILTIN_FMSUBSD,    0,            (int)MULTI_ARG_3_DF },
18880   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4,       "__builtin_ia32_fmsubps",    IX86_BUILTIN_FMSUBPS,    0,            (int)MULTI_ARG_3_SF },
18881   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4,       "__builtin_ia32_fmsubpd",    IX86_BUILTIN_FMSUBPD,    0,            (int)MULTI_ARG_3_DF },
18882   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4,    "__builtin_ia32_fnmaddss",   IX86_BUILTIN_FNMADDSS,   0,            (int)MULTI_ARG_3_SF },
18883   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4,    "__builtin_ia32_fnmaddsd",   IX86_BUILTIN_FNMADDSD,   0,            (int)MULTI_ARG_3_DF },
18884   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4,      "__builtin_ia32_fnmaddps",   IX86_BUILTIN_FNMADDPS,   0,            (int)MULTI_ARG_3_SF },
18885   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4,      "__builtin_ia32_fnmaddpd",   IX86_BUILTIN_FNMADDPD,   0,            (int)MULTI_ARG_3_DF },
18886   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4,    "__builtin_ia32_fnmsubss",   IX86_BUILTIN_FNMSUBSS,   0,            (int)MULTI_ARG_3_SF },
18887   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4,    "__builtin_ia32_fnmsubsd",   IX86_BUILTIN_FNMSUBSD,   0,            (int)MULTI_ARG_3_DF },
18888   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4,      "__builtin_ia32_fnmsubps",   IX86_BUILTIN_FNMSUBPS,   0,            (int)MULTI_ARG_3_SF },
18889   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4,      "__builtin_ia32_fnmsubpd",   IX86_BUILTIN_FNMSUBPD,   0,            (int)MULTI_ARG_3_DF },
18890   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di,        "__builtin_ia32_pcmov",      IX86_BUILTIN_PCMOV_V2DI, 0,            (int)MULTI_ARG_3_DI },
18891   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di,        "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0,            (int)MULTI_ARG_3_DI },
18892   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si,        "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0,            (int)MULTI_ARG_3_SI },
18893   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi,        "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0,            (int)MULTI_ARG_3_HI },
18894   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi,       "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0,            (int)MULTI_ARG_3_QI },
18895   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df,        "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0,            (int)MULTI_ARG_3_DF },
18896   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf,        "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0,            (int)MULTI_ARG_3_SF },
18897   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm,             "__builtin_ia32_pperm",      IX86_BUILTIN_PPERM,      0,            (int)MULTI_ARG_3_QI },
18898   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf,          "__builtin_ia32_permps",     IX86_BUILTIN_PERMPS,     0,            (int)MULTI_ARG_3_PERMPS },
18899   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df,          "__builtin_ia32_permpd",     IX86_BUILTIN_PERMPD,     0,            (int)MULTI_ARG_3_PERMPD },
18900   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww,          "__builtin_ia32_pmacssww",   IX86_BUILTIN_PMACSSWW,   0,            (int)MULTI_ARG_3_HI },
18901   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww,           "__builtin_ia32_pmacsww",    IX86_BUILTIN_PMACSWW,    0,            (int)MULTI_ARG_3_HI },
18902   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd,          "__builtin_ia32_pmacsswd",   IX86_BUILTIN_PMACSSWD,   0,            (int)MULTI_ARG_3_HI_SI },
18903   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd,           "__builtin_ia32_pmacswd",    IX86_BUILTIN_PMACSWD,    0,            (int)MULTI_ARG_3_HI_SI },
18904   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd,          "__builtin_ia32_pmacssdd",   IX86_BUILTIN_PMACSSDD,   0,            (int)MULTI_ARG_3_SI },
18905   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd,           "__builtin_ia32_pmacsdd",    IX86_BUILTIN_PMACSDD,    0,            (int)MULTI_ARG_3_SI },
18906   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql,         "__builtin_ia32_pmacssdql",  IX86_BUILTIN_PMACSSDQL,  0,            (int)MULTI_ARG_3_SI_DI },
18907   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh,         "__builtin_ia32_pmacssdqh",  IX86_BUILTIN_PMACSSDQH,  0,            (int)MULTI_ARG_3_SI_DI },
18908   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql,          "__builtin_ia32_pmacsdql",   IX86_BUILTIN_PMACSDQL,   0,            (int)MULTI_ARG_3_SI_DI },
18909   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh,          "__builtin_ia32_pmacsdqh",   IX86_BUILTIN_PMACSDQH,   0,            (int)MULTI_ARG_3_SI_DI },
18910   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd,         "__builtin_ia32_pmadcsswd",  IX86_BUILTIN_PMADCSSWD,  0,            (int)MULTI_ARG_3_HI_SI },
18911   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd,          "__builtin_ia32_pmadcswd",   IX86_BUILTIN_PMADCSWD,   0,            (int)MULTI_ARG_3_HI_SI },
18912   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3,        "__builtin_ia32_protq",      IX86_BUILTIN_PROTQ,      0,            (int)MULTI_ARG_2_DI },
18913   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3,        "__builtin_ia32_protd",      IX86_BUILTIN_PROTD,      0,            (int)MULTI_ARG_2_SI },
18914   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3,        "__builtin_ia32_protw",      IX86_BUILTIN_PROTW,      0,            (int)MULTI_ARG_2_HI },
18915   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3,       "__builtin_ia32_protb",      IX86_BUILTIN_PROTB,      0,            (int)MULTI_ARG_2_QI },
18916   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3,         "__builtin_ia32_protqi",     IX86_BUILTIN_PROTQ_IMM,  0,            (int)MULTI_ARG_2_DI_IMM },
18917   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3,         "__builtin_ia32_protdi",     IX86_BUILTIN_PROTD_IMM,  0,            (int)MULTI_ARG_2_SI_IMM },
18918   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3,         "__builtin_ia32_protwi",     IX86_BUILTIN_PROTW_IMM,  0,            (int)MULTI_ARG_2_HI_IMM },
18919   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3,        "__builtin_ia32_protbi",     IX86_BUILTIN_PROTB_IMM,  0,            (int)MULTI_ARG_2_QI_IMM },
18920   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3,         "__builtin_ia32_pshaq",      IX86_BUILTIN_PSHAQ,      0,            (int)MULTI_ARG_2_DI },
18921   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3,         "__builtin_ia32_pshad",      IX86_BUILTIN_PSHAD,      0,            (int)MULTI_ARG_2_SI },
18922   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3,         "__builtin_ia32_pshaw",      IX86_BUILTIN_PSHAW,      0,            (int)MULTI_ARG_2_HI },
18923   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3,        "__builtin_ia32_pshab",      IX86_BUILTIN_PSHAB,      0,            (int)MULTI_ARG_2_QI },
18924   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3,         "__builtin_ia32_pshlq",      IX86_BUILTIN_PSHLQ,      0,            (int)MULTI_ARG_2_DI },
18925   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3,         "__builtin_ia32_pshld",      IX86_BUILTIN_PSHLD,      0,            (int)MULTI_ARG_2_SI },
18926   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3,         "__builtin_ia32_pshlw",      IX86_BUILTIN_PSHLW,      0,            (int)MULTI_ARG_2_HI },
18927   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3,        "__builtin_ia32_pshlb",      IX86_BUILTIN_PSHLB,      0,            (int)MULTI_ARG_2_QI },
18928   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2,       "__builtin_ia32_frczss",     IX86_BUILTIN_FRCZSS,     0,            (int)MULTI_ARG_2_SF },
18929   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2,       "__builtin_ia32_frczsd",     IX86_BUILTIN_FRCZSD,     0,            (int)MULTI_ARG_2_DF },
18930   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2,         "__builtin_ia32_frczps",     IX86_BUILTIN_FRCZPS,     0,            (int)MULTI_ARG_1_SF },
18931   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2,         "__builtin_ia32_frczpd",     IX86_BUILTIN_FRCZPD,     0,            (int)MULTI_ARG_1_DF },
18932   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps,          "__builtin_ia32_cvtph2ps",   IX86_BUILTIN_CVTPH2PS,   0,            (int)MULTI_ARG_1_PH2PS },
18933   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph,          "__builtin_ia32_cvtps2ph",   IX86_BUILTIN_CVTPS2PH,   0,            (int)MULTI_ARG_1_PS2PH },
18934   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw,           "__builtin_ia32_phaddbw",    IX86_BUILTIN_PHADDBW,    0,            (int)MULTI_ARG_1_QI_HI },
18935   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd,           "__builtin_ia32_phaddbd",    IX86_BUILTIN_PHADDBD,    0,            (int)MULTI_ARG_1_QI_SI },
18936   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq,           "__builtin_ia32_phaddbq",    IX86_BUILTIN_PHADDBQ,    0,            (int)MULTI_ARG_1_QI_DI },
18937   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd,           "__builtin_ia32_phaddwd",    IX86_BUILTIN_PHADDWD,    0,            (int)MULTI_ARG_1_HI_SI },
18938   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq,           "__builtin_ia32_phaddwq",    IX86_BUILTIN_PHADDWQ,    0,            (int)MULTI_ARG_1_HI_DI },
18939   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq,           "__builtin_ia32_phadddq",    IX86_BUILTIN_PHADDDQ,    0,            (int)MULTI_ARG_1_SI_DI },
18940   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw,          "__builtin_ia32_phaddubw",   IX86_BUILTIN_PHADDUBW,   0,            (int)MULTI_ARG_1_QI_HI },
18941   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd,          "__builtin_ia32_phaddubd",   IX86_BUILTIN_PHADDUBD,   0,            (int)MULTI_ARG_1_QI_SI },
18942   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq,          "__builtin_ia32_phaddubq",   IX86_BUILTIN_PHADDUBQ,   0,            (int)MULTI_ARG_1_QI_DI },
18943   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd,          "__builtin_ia32_phadduwd",   IX86_BUILTIN_PHADDUWD,   0,            (int)MULTI_ARG_1_HI_SI },
18944   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq,          "__builtin_ia32_phadduwq",   IX86_BUILTIN_PHADDUWQ,   0,            (int)MULTI_ARG_1_HI_DI },
18945   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq,          "__builtin_ia32_phaddudq",   IX86_BUILTIN_PHADDUDQ,   0,            (int)MULTI_ARG_1_SI_DI },
18946   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw,           "__builtin_ia32_phsubbw",    IX86_BUILTIN_PHSUBBW,    0,            (int)MULTI_ARG_1_QI_HI },
18947   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd,           "__builtin_ia32_phsubwd",    IX86_BUILTIN_PHSUBWD,    0,            (int)MULTI_ARG_1_HI_SI },
18948   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq,           "__builtin_ia32_phsubdq",    IX86_BUILTIN_PHSUBDQ,    0,            (int)MULTI_ARG_1_SI_DI },
18949
18950   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comeqss",    IX86_BUILTIN_COMEQSS,    EQ,           (int)MULTI_ARG_2_SF_CMP },
18951   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comness",    IX86_BUILTIN_COMNESS,    NE,           (int)MULTI_ARG_2_SF_CMP },
18952   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comneqss",   IX86_BUILTIN_COMNESS,    NE,           (int)MULTI_ARG_2_SF_CMP },
18953   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comltss",    IX86_BUILTIN_COMLTSS,    LT,           (int)MULTI_ARG_2_SF_CMP },
18954   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comless",    IX86_BUILTIN_COMLESS,    LE,           (int)MULTI_ARG_2_SF_CMP },
18955   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comgtss",    IX86_BUILTIN_COMGTSS,    GT,           (int)MULTI_ARG_2_SF_CMP },
18956   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comgess",    IX86_BUILTIN_COMGESS,    GE,           (int)MULTI_ARG_2_SF_CMP },
18957   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comueqss",   IX86_BUILTIN_COMUEQSS,   UNEQ,         (int)MULTI_ARG_2_SF_CMP },
18958   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comuness",   IX86_BUILTIN_COMUNESS,   LTGT,         (int)MULTI_ARG_2_SF_CMP },
18959   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comuneqss",  IX86_BUILTIN_COMUNESS,   LTGT,         (int)MULTI_ARG_2_SF_CMP },
18960   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comunltss",  IX86_BUILTIN_COMULTSS,   UNLT,         (int)MULTI_ARG_2_SF_CMP },
18961   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comunless",  IX86_BUILTIN_COMULESS,   UNLE,         (int)MULTI_ARG_2_SF_CMP },
18962   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comungtss",  IX86_BUILTIN_COMUGTSS,   UNGT,         (int)MULTI_ARG_2_SF_CMP },
18963   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comungess",  IX86_BUILTIN_COMUGESS,   UNGE,         (int)MULTI_ARG_2_SF_CMP },
18964   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comordss",   IX86_BUILTIN_COMORDSS,   ORDERED,      (int)MULTI_ARG_2_SF_CMP },
18965   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED,    (int)MULTI_ARG_2_SF_CMP },
18966
18967   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comeqsd",    IX86_BUILTIN_COMEQSD,    EQ,           (int)MULTI_ARG_2_DF_CMP },
18968   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comnesd",    IX86_BUILTIN_COMNESD,    NE,           (int)MULTI_ARG_2_DF_CMP },
18969   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comneqsd",   IX86_BUILTIN_COMNESD,    NE,           (int)MULTI_ARG_2_DF_CMP },
18970   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comltsd",    IX86_BUILTIN_COMLTSD,    LT,           (int)MULTI_ARG_2_DF_CMP },
18971   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comlesd",    IX86_BUILTIN_COMLESD,    LE,           (int)MULTI_ARG_2_DF_CMP },
18972   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comgtsd",    IX86_BUILTIN_COMGTSD,    GT,           (int)MULTI_ARG_2_DF_CMP },
18973   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comgesd",    IX86_BUILTIN_COMGESD,    GE,           (int)MULTI_ARG_2_DF_CMP },
18974   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comueqsd",   IX86_BUILTIN_COMUEQSD,   UNEQ,         (int)MULTI_ARG_2_DF_CMP },
18975   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comunesd",   IX86_BUILTIN_COMUNESD,   LTGT,         (int)MULTI_ARG_2_DF_CMP },
18976   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comuneqsd",  IX86_BUILTIN_COMUNESD,   LTGT,         (int)MULTI_ARG_2_DF_CMP },
18977   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comunltsd",  IX86_BUILTIN_COMULTSD,   UNLT,         (int)MULTI_ARG_2_DF_CMP },
18978   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comunlesd",  IX86_BUILTIN_COMULESD,   UNLE,         (int)MULTI_ARG_2_DF_CMP },
18979   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comungtsd",  IX86_BUILTIN_COMUGTSD,   UNGT,         (int)MULTI_ARG_2_DF_CMP },
18980   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comungesd",  IX86_BUILTIN_COMUGESD,   UNGE,         (int)MULTI_ARG_2_DF_CMP },
18981   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comordsd",   IX86_BUILTIN_COMORDSD,   ORDERED,      (int)MULTI_ARG_2_DF_CMP },
18982   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED,    (int)MULTI_ARG_2_DF_CMP },
18983
18984   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comeqps",    IX86_BUILTIN_COMEQPS,    EQ,           (int)MULTI_ARG_2_SF_CMP },
18985   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comneps",    IX86_BUILTIN_COMNEPS,    NE,           (int)MULTI_ARG_2_SF_CMP },
18986   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comneqps",   IX86_BUILTIN_COMNEPS,    NE,           (int)MULTI_ARG_2_SF_CMP },
18987   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comltps",    IX86_BUILTIN_COMLTPS,    LT,           (int)MULTI_ARG_2_SF_CMP },
18988   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comleps",    IX86_BUILTIN_COMLEPS,    LE,           (int)MULTI_ARG_2_SF_CMP },
18989   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comgtps",    IX86_BUILTIN_COMGTPS,    GT,           (int)MULTI_ARG_2_SF_CMP },
18990   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comgeps",    IX86_BUILTIN_COMGEPS,    GE,           (int)MULTI_ARG_2_SF_CMP },
18991   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comueqps",   IX86_BUILTIN_COMUEQPS,   UNEQ,         (int)MULTI_ARG_2_SF_CMP },
18992   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comuneps",   IX86_BUILTIN_COMUNEPS,   LTGT,         (int)MULTI_ARG_2_SF_CMP },
18993   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comuneqps",  IX86_BUILTIN_COMUNEPS,   LTGT,         (int)MULTI_ARG_2_SF_CMP },
18994   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comunltps",  IX86_BUILTIN_COMULTPS,   UNLT,         (int)MULTI_ARG_2_SF_CMP },
18995   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comunleps",  IX86_BUILTIN_COMULEPS,   UNLE,         (int)MULTI_ARG_2_SF_CMP },
18996   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comungtps",  IX86_BUILTIN_COMUGTPS,   UNGT,         (int)MULTI_ARG_2_SF_CMP },
18997   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comungeps",  IX86_BUILTIN_COMUGEPS,   UNGE,         (int)MULTI_ARG_2_SF_CMP },
18998   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comordps",   IX86_BUILTIN_COMORDPS,   ORDERED,      (int)MULTI_ARG_2_SF_CMP },
18999   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED,    (int)MULTI_ARG_2_SF_CMP },
19000
19001   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comeqpd",    IX86_BUILTIN_COMEQPD,    EQ,           (int)MULTI_ARG_2_DF_CMP },
19002   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comnepd",    IX86_BUILTIN_COMNEPD,    NE,           (int)MULTI_ARG_2_DF_CMP },
19003   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comneqpd",   IX86_BUILTIN_COMNEPD,    NE,           (int)MULTI_ARG_2_DF_CMP },
19004   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comltpd",    IX86_BUILTIN_COMLTPD,    LT,           (int)MULTI_ARG_2_DF_CMP },
19005   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comlepd",    IX86_BUILTIN_COMLEPD,    LE,           (int)MULTI_ARG_2_DF_CMP },
19006   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comgtpd",    IX86_BUILTIN_COMGTPD,    GT,           (int)MULTI_ARG_2_DF_CMP },
19007   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comgepd",    IX86_BUILTIN_COMGEPD,    GE,           (int)MULTI_ARG_2_DF_CMP },
19008   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comueqpd",   IX86_BUILTIN_COMUEQPD,   UNEQ,         (int)MULTI_ARG_2_DF_CMP },
19009   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comunepd",   IX86_BUILTIN_COMUNEPD,   LTGT,         (int)MULTI_ARG_2_DF_CMP },
19010   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comuneqpd",  IX86_BUILTIN_COMUNEPD,   LTGT,         (int)MULTI_ARG_2_DF_CMP },
19011   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comunltpd",  IX86_BUILTIN_COMULTPD,   UNLT,         (int)MULTI_ARG_2_DF_CMP },
19012   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comunlepd",  IX86_BUILTIN_COMULEPD,   UNLE,         (int)MULTI_ARG_2_DF_CMP },
19013   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comungtpd",  IX86_BUILTIN_COMUGTPD,   UNGT,         (int)MULTI_ARG_2_DF_CMP },
19014   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comungepd",  IX86_BUILTIN_COMUGEPD,   UNGE,         (int)MULTI_ARG_2_DF_CMP },
19015   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comordpd",   IX86_BUILTIN_COMORDPD,   ORDERED,      (int)MULTI_ARG_2_DF_CMP },
19016   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED,    (int)MULTI_ARG_2_DF_CMP },
19017
19018   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomeqb",    IX86_BUILTIN_PCOMEQB,    EQ,           (int)MULTI_ARG_2_QI_CMP },
19019   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomneb",    IX86_BUILTIN_PCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
19020   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomneqb",   IX86_BUILTIN_PCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
19021   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomltb",    IX86_BUILTIN_PCOMLTB,    LT,           (int)MULTI_ARG_2_QI_CMP },
19022   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomleb",    IX86_BUILTIN_PCOMLEB,    LE,           (int)MULTI_ARG_2_QI_CMP },
19023   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomgtb",    IX86_BUILTIN_PCOMGTB,    GT,           (int)MULTI_ARG_2_QI_CMP },
19024   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomgeb",    IX86_BUILTIN_PCOMGEB,    GE,           (int)MULTI_ARG_2_QI_CMP },
19025
19026   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomeqw",    IX86_BUILTIN_PCOMEQW,    EQ,           (int)MULTI_ARG_2_HI_CMP },
19027   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomnew",    IX86_BUILTIN_PCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
19028   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomneqw",   IX86_BUILTIN_PCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
19029   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomltw",    IX86_BUILTIN_PCOMLTW,    LT,           (int)MULTI_ARG_2_HI_CMP },
19030   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomlew",    IX86_BUILTIN_PCOMLEW,    LE,           (int)MULTI_ARG_2_HI_CMP },
19031   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomgtw",    IX86_BUILTIN_PCOMGTW,    GT,           (int)MULTI_ARG_2_HI_CMP },
19032   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomgew",    IX86_BUILTIN_PCOMGEW,    GE,           (int)MULTI_ARG_2_HI_CMP },
19033
19034   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomeqd",    IX86_BUILTIN_PCOMEQD,    EQ,           (int)MULTI_ARG_2_SI_CMP },
19035   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomned",    IX86_BUILTIN_PCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
19036   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomneqd",   IX86_BUILTIN_PCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
19037   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomltd",    IX86_BUILTIN_PCOMLTD,    LT,           (int)MULTI_ARG_2_SI_CMP },
19038   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomled",    IX86_BUILTIN_PCOMLED,    LE,           (int)MULTI_ARG_2_SI_CMP },
19039   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomgtd",    IX86_BUILTIN_PCOMGTD,    GT,           (int)MULTI_ARG_2_SI_CMP },
19040   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomged",    IX86_BUILTIN_PCOMGED,    GE,           (int)MULTI_ARG_2_SI_CMP },
19041
19042   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomeqq",    IX86_BUILTIN_PCOMEQQ,    EQ,           (int)MULTI_ARG_2_DI_CMP },
19043   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomneq",    IX86_BUILTIN_PCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
19044   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomneqq",   IX86_BUILTIN_PCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
19045   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomltq",    IX86_BUILTIN_PCOMLTQ,    LT,           (int)MULTI_ARG_2_DI_CMP },
19046   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomleq",    IX86_BUILTIN_PCOMLEQ,    LE,           (int)MULTI_ARG_2_DI_CMP },
19047   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomgtq",    IX86_BUILTIN_PCOMGTQ,    GT,           (int)MULTI_ARG_2_DI_CMP },
19048   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomgeq",    IX86_BUILTIN_PCOMGEQ,    GE,           (int)MULTI_ARG_2_DI_CMP },
19049
19050   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb",   IX86_BUILTIN_PCOMEQUB,   EQ,           (int)MULTI_ARG_2_QI_CMP },
19051   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub",   IX86_BUILTIN_PCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
19052   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb",  IX86_BUILTIN_PCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
19053   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub",   IX86_BUILTIN_PCOMLTUB,   LTU,          (int)MULTI_ARG_2_QI_CMP },
19054   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub",   IX86_BUILTIN_PCOMLEUB,   LEU,          (int)MULTI_ARG_2_QI_CMP },
19055   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub",   IX86_BUILTIN_PCOMGTUB,   GTU,          (int)MULTI_ARG_2_QI_CMP },
19056   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub",   IX86_BUILTIN_PCOMGEUB,   GEU,          (int)MULTI_ARG_2_QI_CMP },
19057
19058   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw",   IX86_BUILTIN_PCOMEQUW,   EQ,           (int)MULTI_ARG_2_HI_CMP },
19059   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw",   IX86_BUILTIN_PCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
19060   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw",  IX86_BUILTIN_PCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
19061   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3,  "__builtin_ia32_pcomltuw",   IX86_BUILTIN_PCOMLTUW,   LTU,          (int)MULTI_ARG_2_HI_CMP },
19062   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3,  "__builtin_ia32_pcomleuw",   IX86_BUILTIN_PCOMLEUW,   LEU,          (int)MULTI_ARG_2_HI_CMP },
19063   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3,  "__builtin_ia32_pcomgtuw",   IX86_BUILTIN_PCOMGTUW,   GTU,          (int)MULTI_ARG_2_HI_CMP },
19064   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3,  "__builtin_ia32_pcomgeuw",   IX86_BUILTIN_PCOMGEUW,   GEU,          (int)MULTI_ARG_2_HI_CMP },
19065
19066   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd",   IX86_BUILTIN_PCOMEQUD,   EQ,           (int)MULTI_ARG_2_SI_CMP },
19067   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud",   IX86_BUILTIN_PCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
19068   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd",  IX86_BUILTIN_PCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
19069   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3,  "__builtin_ia32_pcomltud",   IX86_BUILTIN_PCOMLTUD,   LTU,          (int)MULTI_ARG_2_SI_CMP },
19070   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3,  "__builtin_ia32_pcomleud",   IX86_BUILTIN_PCOMLEUD,   LEU,          (int)MULTI_ARG_2_SI_CMP },
19071   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3,  "__builtin_ia32_pcomgtud",   IX86_BUILTIN_PCOMGTUD,   GTU,          (int)MULTI_ARG_2_SI_CMP },
19072   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3,  "__builtin_ia32_pcomgeud",   IX86_BUILTIN_PCOMGEUD,   GEU,          (int)MULTI_ARG_2_SI_CMP },
19073
19074   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq",   IX86_BUILTIN_PCOMEQUQ,   EQ,           (int)MULTI_ARG_2_DI_CMP },
19075   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq",   IX86_BUILTIN_PCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
19076   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq",  IX86_BUILTIN_PCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
19077   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3,  "__builtin_ia32_pcomltuq",   IX86_BUILTIN_PCOMLTUQ,   LTU,          (int)MULTI_ARG_2_DI_CMP },
19078   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3,  "__builtin_ia32_pcomleuq",   IX86_BUILTIN_PCOMLEUQ,   LEU,          (int)MULTI_ARG_2_DI_CMP },
19079   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3,  "__builtin_ia32_pcomgtuq",   IX86_BUILTIN_PCOMGTUQ,   GTU,          (int)MULTI_ARG_2_DI_CMP },
19080   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3,  "__builtin_ia32_pcomgeuq",   IX86_BUILTIN_PCOMGEUQ,   GEU,          (int)MULTI_ARG_2_DI_CMP },
19081
19082   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3,       "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S,  (int)MULTI_ARG_2_SF_TF },
19083   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3,       "__builtin_ia32_comtruess",  IX86_BUILTIN_COMTRUESS,  COM_TRUE_S,   (int)MULTI_ARG_2_SF_TF },
19084   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3,       "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P,  (int)MULTI_ARG_2_SF_TF },
19085   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3,       "__builtin_ia32_comtrueps",  IX86_BUILTIN_COMTRUEPS,  COM_TRUE_P,   (int)MULTI_ARG_2_SF_TF },
19086   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3,       "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S,  (int)MULTI_ARG_2_DF_TF },
19087   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3,       "__builtin_ia32_comtruesd",  IX86_BUILTIN_COMTRUESD,  COM_TRUE_S,   (int)MULTI_ARG_2_DF_TF },
19088   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3,       "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P,  (int)MULTI_ARG_2_DF_TF },
19089   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3,       "__builtin_ia32_comtruepd",  IX86_BUILTIN_COMTRUEPD,  COM_TRUE_P,   (int)MULTI_ARG_2_DF_TF },
19090
19091   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3,     "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
19092   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3,      "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
19093   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3,      "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
19094   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3,      "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
19095   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3,     "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
19096   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3,      "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
19097   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3,      "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
19098   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3,      "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
19099
19100   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3,     "__builtin_ia32_pcomtrueb",  IX86_BUILTIN_PCOMTRUEB,  PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
19101   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3,      "__builtin_ia32_pcomtruew",  IX86_BUILTIN_PCOMTRUEW,  PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
19102   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3,      "__builtin_ia32_pcomtrued",  IX86_BUILTIN_PCOMTRUED,  PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
19103   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3,      "__builtin_ia32_pcomtrueq",  IX86_BUILTIN_PCOMTRUEQ,  PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
19104   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3,     "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
19105   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3,      "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
19106   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3,      "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
19107   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3,      "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
19108 };
19109
19110 /* Set up all the MMX/SSE builtins.  This is not called if TARGET_MMX
19111    is zero.  Otherwise, if TARGET_SSE is not set, only expand the MMX
19112    builtins.  */
19113 static void
19114 ix86_init_mmx_sse_builtins (void)
19115 {
19116   const struct builtin_description * d;
19117   size_t i;
19118
19119   tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
19120   tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
19121   tree V1DI_type_node
19122     = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
19123   tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
19124   tree V2DI_type_node
19125     = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
19126   tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
19127   tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
19128   tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
19129   tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
19130   tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
19131   tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
19132
19133   tree pchar_type_node = build_pointer_type (char_type_node);
19134   tree pcchar_type_node
19135     = build_pointer_type (build_type_variant (char_type_node, 1, 0));
19136   tree pfloat_type_node = build_pointer_type (float_type_node);
19137   tree pcfloat_type_node
19138     = build_pointer_type (build_type_variant (float_type_node, 1, 0));
19139   tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
19140   tree pcv2sf_type_node
19141     = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
19142   tree pv2di_type_node = build_pointer_type (V2DI_type_node);
19143   tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
19144
19145   /* Comparisons.  */
19146   tree int_ftype_v4sf_v4sf
19147     = build_function_type_list (integer_type_node,
19148                                 V4SF_type_node, V4SF_type_node, NULL_TREE);
19149   tree v4si_ftype_v4sf_v4sf
19150     = build_function_type_list (V4SI_type_node,
19151                                 V4SF_type_node, V4SF_type_node, NULL_TREE);
19152   /* MMX/SSE/integer conversions.  */
19153   tree int_ftype_v4sf
19154     = build_function_type_list (integer_type_node,
19155                                 V4SF_type_node, NULL_TREE);
19156   tree int64_ftype_v4sf
19157     = build_function_type_list (long_long_integer_type_node,
19158                                 V4SF_type_node, NULL_TREE);
19159   tree int_ftype_v8qi
19160     = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
19161   tree v4sf_ftype_v4sf_int
19162     = build_function_type_list (V4SF_type_node,
19163                                 V4SF_type_node, integer_type_node, NULL_TREE);
19164   tree v4sf_ftype_v4sf_int64
19165     = build_function_type_list (V4SF_type_node,
19166                                 V4SF_type_node, long_long_integer_type_node,
19167                                 NULL_TREE);
19168   tree v4sf_ftype_v4sf_v2si
19169     = build_function_type_list (V4SF_type_node,
19170                                 V4SF_type_node, V2SI_type_node, NULL_TREE);
19171
19172   /* Miscellaneous.  */
19173   tree v8qi_ftype_v4hi_v4hi
19174     = build_function_type_list (V8QI_type_node,
19175                                 V4HI_type_node, V4HI_type_node, NULL_TREE);
19176   tree v4hi_ftype_v2si_v2si
19177     = build_function_type_list (V4HI_type_node,
19178                                 V2SI_type_node, V2SI_type_node, NULL_TREE);
19179   tree v4sf_ftype_v4sf_v4sf_int
19180     = build_function_type_list (V4SF_type_node,
19181                                 V4SF_type_node, V4SF_type_node,
19182                                 integer_type_node, NULL_TREE);
19183   tree v2si_ftype_v4hi_v4hi
19184     = build_function_type_list (V2SI_type_node,
19185                                 V4HI_type_node, V4HI_type_node, NULL_TREE);
19186   tree v4hi_ftype_v4hi_int
19187     = build_function_type_list (V4HI_type_node,
19188                                 V4HI_type_node, integer_type_node, NULL_TREE);
19189   tree v2si_ftype_v2si_int
19190     = build_function_type_list (V2SI_type_node,
19191                                 V2SI_type_node, integer_type_node, NULL_TREE);
19192   tree v1di_ftype_v1di_int
19193     = build_function_type_list (V1DI_type_node,
19194                                 V1DI_type_node, integer_type_node, NULL_TREE);
19195
19196   tree void_ftype_void
19197     = build_function_type (void_type_node, void_list_node);
19198   tree void_ftype_unsigned
19199     = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
19200   tree void_ftype_unsigned_unsigned
19201     = build_function_type_list (void_type_node, unsigned_type_node,
19202                                 unsigned_type_node, NULL_TREE);
19203   tree void_ftype_pcvoid_unsigned_unsigned
19204     = build_function_type_list (void_type_node, const_ptr_type_node,
19205                                 unsigned_type_node, unsigned_type_node,
19206                                 NULL_TREE);
19207   tree unsigned_ftype_void
19208     = build_function_type (unsigned_type_node, void_list_node);
19209   tree v2si_ftype_v4sf
19210     = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
19211   /* Loads/stores.  */
19212   tree void_ftype_v8qi_v8qi_pchar
19213     = build_function_type_list (void_type_node,
19214                                 V8QI_type_node, V8QI_type_node,
19215                                 pchar_type_node, NULL_TREE);
19216   tree v4sf_ftype_pcfloat
19217     = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
19218   tree v4sf_ftype_v4sf_pcv2sf
19219     = build_function_type_list (V4SF_type_node,
19220                                 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
19221   tree void_ftype_pv2sf_v4sf
19222     = build_function_type_list (void_type_node,
19223                                 pv2sf_type_node, V4SF_type_node, NULL_TREE);
19224   tree void_ftype_pfloat_v4sf
19225     = build_function_type_list (void_type_node,
19226                                 pfloat_type_node, V4SF_type_node, NULL_TREE);
19227   tree void_ftype_pdi_di
19228     = build_function_type_list (void_type_node,
19229                                 pdi_type_node, long_long_unsigned_type_node,
19230                                 NULL_TREE);
19231   tree void_ftype_pv2di_v2di
19232     = build_function_type_list (void_type_node,
19233                                 pv2di_type_node, V2DI_type_node, NULL_TREE);
19234   /* Normal vector unops.  */
19235   tree v4sf_ftype_v4sf
19236     = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
19237   tree v16qi_ftype_v16qi
19238     = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
19239   tree v8hi_ftype_v8hi
19240     = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
19241   tree v4si_ftype_v4si
19242     = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
19243   tree v8qi_ftype_v8qi
19244     = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
19245   tree v4hi_ftype_v4hi
19246     = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
19247
19248   /* Normal vector binops.  */
19249   tree v4sf_ftype_v4sf_v4sf
19250     = build_function_type_list (V4SF_type_node,
19251                                 V4SF_type_node, V4SF_type_node, NULL_TREE);
19252   tree v8qi_ftype_v8qi_v8qi
19253     = build_function_type_list (V8QI_type_node,
19254                                 V8QI_type_node, V8QI_type_node, NULL_TREE);
19255   tree v4hi_ftype_v4hi_v4hi
19256     = build_function_type_list (V4HI_type_node,
19257                                 V4HI_type_node, V4HI_type_node, NULL_TREE);
19258   tree v2si_ftype_v2si_v2si
19259     = build_function_type_list (V2SI_type_node,
19260                                 V2SI_type_node, V2SI_type_node, NULL_TREE);
19261   tree v1di_ftype_v1di_v1di
19262     = build_function_type_list (V1DI_type_node,
19263                                 V1DI_type_node, V1DI_type_node, NULL_TREE);
19264   tree v1di_ftype_v1di_v1di_int
19265     = build_function_type_list (V1DI_type_node,
19266                                 V1DI_type_node, V1DI_type_node,
19267                                 integer_type_node, NULL_TREE);
19268   tree v2si_ftype_v2sf
19269     = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
19270   tree v2sf_ftype_v2si
19271     = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
19272   tree v2si_ftype_v2si
19273     = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
19274   tree v2sf_ftype_v2sf
19275     = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
19276   tree v2sf_ftype_v2sf_v2sf
19277     = build_function_type_list (V2SF_type_node,
19278                                 V2SF_type_node, V2SF_type_node, NULL_TREE);
19279   tree v2si_ftype_v2sf_v2sf
19280     = build_function_type_list (V2SI_type_node,
19281                                 V2SF_type_node, V2SF_type_node, NULL_TREE);
19282   tree pint_type_node    = build_pointer_type (integer_type_node);
19283   tree pdouble_type_node = build_pointer_type (double_type_node);
19284   tree pcdouble_type_node = build_pointer_type (
19285                                 build_type_variant (double_type_node, 1, 0));
19286   tree int_ftype_v2df_v2df
19287     = build_function_type_list (integer_type_node,
19288                                 V2DF_type_node, V2DF_type_node, NULL_TREE);
19289
19290   tree void_ftype_pcvoid
19291     = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
19292   tree v4sf_ftype_v4si
19293     = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
19294   tree v4si_ftype_v4sf
19295     = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
19296   tree v2df_ftype_v4si
19297     = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
19298   tree v4si_ftype_v2df
19299     = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
19300   tree v4si_ftype_v2df_v2df
19301     = build_function_type_list (V4SI_type_node,
19302                                 V2DF_type_node, V2DF_type_node, NULL_TREE);
19303   tree v2si_ftype_v2df
19304     = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
19305   tree v4sf_ftype_v2df
19306     = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
19307   tree v2df_ftype_v2si
19308     = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
19309   tree v2df_ftype_v4sf
19310     = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
19311   tree int_ftype_v2df
19312     = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
19313   tree int64_ftype_v2df
19314     = build_function_type_list (long_long_integer_type_node,
19315                                 V2DF_type_node, NULL_TREE);
19316   tree v2df_ftype_v2df_int
19317     = build_function_type_list (V2DF_type_node,
19318                                 V2DF_type_node, integer_type_node, NULL_TREE);
19319   tree v2df_ftype_v2df_int64
19320     = build_function_type_list (V2DF_type_node,
19321                                 V2DF_type_node, long_long_integer_type_node,
19322                                 NULL_TREE);
19323   tree v4sf_ftype_v4sf_v2df
19324     = build_function_type_list (V4SF_type_node,
19325                                 V4SF_type_node, V2DF_type_node, NULL_TREE);
19326   tree v2df_ftype_v2df_v4sf
19327     = build_function_type_list (V2DF_type_node,
19328                                 V2DF_type_node, V4SF_type_node, NULL_TREE);
19329   tree v2df_ftype_v2df_v2df_int
19330     = build_function_type_list (V2DF_type_node,
19331                                 V2DF_type_node, V2DF_type_node,
19332                                 integer_type_node,
19333                                 NULL_TREE);
19334   tree v2df_ftype_v2df_pcdouble
19335     = build_function_type_list (V2DF_type_node,
19336                                 V2DF_type_node, pcdouble_type_node, NULL_TREE);
19337   tree void_ftype_pdouble_v2df
19338     = build_function_type_list (void_type_node,
19339                                 pdouble_type_node, V2DF_type_node, NULL_TREE);
19340   tree void_ftype_pint_int
19341     = build_function_type_list (void_type_node,
19342                                 pint_type_node, integer_type_node, NULL_TREE);
19343   tree void_ftype_v16qi_v16qi_pchar
19344     = build_function_type_list (void_type_node,
19345                                 V16QI_type_node, V16QI_type_node,
19346                                 pchar_type_node, NULL_TREE);
19347   tree v2df_ftype_pcdouble
19348     = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
19349   tree v2df_ftype_v2df_v2df
19350     = build_function_type_list (V2DF_type_node,
19351                                 V2DF_type_node, V2DF_type_node, NULL_TREE);
19352   tree v16qi_ftype_v16qi_v16qi
19353     = build_function_type_list (V16QI_type_node,
19354                                 V16QI_type_node, V16QI_type_node, NULL_TREE);
19355   tree v8hi_ftype_v8hi_v8hi
19356     = build_function_type_list (V8HI_type_node,
19357                                 V8HI_type_node, V8HI_type_node, NULL_TREE);
19358   tree v4si_ftype_v4si_v4si
19359     = build_function_type_list (V4SI_type_node,
19360                                 V4SI_type_node, V4SI_type_node, NULL_TREE);
19361   tree v2di_ftype_v2di_v2di
19362     = build_function_type_list (V2DI_type_node,
19363                                 V2DI_type_node, V2DI_type_node, NULL_TREE);
19364   tree v2di_ftype_v2df_v2df
19365     = build_function_type_list (V2DI_type_node,
19366                                 V2DF_type_node, V2DF_type_node, NULL_TREE);
19367   tree v2df_ftype_v2df
19368     = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
19369   tree v2di_ftype_v2di_int
19370     = build_function_type_list (V2DI_type_node,
19371                                 V2DI_type_node, integer_type_node, NULL_TREE);
19372   tree v2di_ftype_v2di_v2di_int
19373     = build_function_type_list (V2DI_type_node, V2DI_type_node,
19374                                 V2DI_type_node, integer_type_node, NULL_TREE);
19375   tree v4si_ftype_v4si_int
19376     = build_function_type_list (V4SI_type_node,
19377                                 V4SI_type_node, integer_type_node, NULL_TREE);
19378   tree v8hi_ftype_v8hi_int
19379     = build_function_type_list (V8HI_type_node,
19380                                 V8HI_type_node, integer_type_node, NULL_TREE);
19381   tree v4si_ftype_v8hi_v8hi
19382     = build_function_type_list (V4SI_type_node,
19383                                 V8HI_type_node, V8HI_type_node, NULL_TREE);
19384   tree v1di_ftype_v8qi_v8qi
19385     = build_function_type_list (V1DI_type_node,
19386                                 V8QI_type_node, V8QI_type_node, NULL_TREE);
19387   tree v1di_ftype_v2si_v2si
19388     = build_function_type_list (V1DI_type_node,
19389                                 V2SI_type_node, V2SI_type_node, NULL_TREE);
19390   tree v2di_ftype_v16qi_v16qi
19391     = build_function_type_list (V2DI_type_node,
19392                                 V16QI_type_node, V16QI_type_node, NULL_TREE);
19393   tree v2di_ftype_v4si_v4si
19394     = build_function_type_list (V2DI_type_node,
19395                                 V4SI_type_node, V4SI_type_node, NULL_TREE);
19396   tree int_ftype_v16qi
19397     = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
19398   tree v16qi_ftype_pcchar
19399     = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
19400   tree void_ftype_pchar_v16qi
19401     = build_function_type_list (void_type_node,
19402                                 pchar_type_node, V16QI_type_node, NULL_TREE);
19403
19404   tree v2di_ftype_v2di_unsigned_unsigned
19405     = build_function_type_list (V2DI_type_node, V2DI_type_node,
19406                                 unsigned_type_node, unsigned_type_node,
19407                                 NULL_TREE);
19408   tree v2di_ftype_v2di_v2di_unsigned_unsigned
19409     = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
19410                                 unsigned_type_node, unsigned_type_node,
19411                                 NULL_TREE);
19412   tree v2di_ftype_v2di_v16qi
19413     = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
19414                                 NULL_TREE);
19415   tree v2df_ftype_v2df_v2df_v2df
19416     = build_function_type_list (V2DF_type_node,
19417                                 V2DF_type_node, V2DF_type_node,
19418                                 V2DF_type_node, NULL_TREE);
19419   tree v4sf_ftype_v4sf_v4sf_v4sf
19420     = build_function_type_list (V4SF_type_node,
19421                                 V4SF_type_node, V4SF_type_node,
19422                                 V4SF_type_node, NULL_TREE);
19423   tree v8hi_ftype_v16qi
19424     = build_function_type_list (V8HI_type_node, V16QI_type_node,
19425                                 NULL_TREE);
19426   tree v4si_ftype_v16qi
19427     = build_function_type_list (V4SI_type_node, V16QI_type_node,
19428                                 NULL_TREE);
19429   tree v2di_ftype_v16qi
19430     = build_function_type_list (V2DI_type_node, V16QI_type_node,
19431                                 NULL_TREE);
19432   tree v4si_ftype_v8hi
19433     = build_function_type_list (V4SI_type_node, V8HI_type_node,
19434                                 NULL_TREE);
19435   tree v2di_ftype_v8hi
19436     = build_function_type_list (V2DI_type_node, V8HI_type_node,
19437                                 NULL_TREE);
19438   tree v2di_ftype_v4si
19439     = build_function_type_list (V2DI_type_node, V4SI_type_node,
19440                                 NULL_TREE);
19441   tree v2di_ftype_pv2di
19442     = build_function_type_list (V2DI_type_node, pv2di_type_node,
19443                                 NULL_TREE);
19444   tree v16qi_ftype_v16qi_v16qi_int
19445     = build_function_type_list (V16QI_type_node, V16QI_type_node,
19446                                 V16QI_type_node, integer_type_node,
19447                                 NULL_TREE);
19448   tree v16qi_ftype_v16qi_v16qi_v16qi
19449     = build_function_type_list (V16QI_type_node, V16QI_type_node,
19450                                 V16QI_type_node, V16QI_type_node,
19451                                 NULL_TREE);
19452   tree v8hi_ftype_v8hi_v8hi_int
19453     = build_function_type_list (V8HI_type_node, V8HI_type_node,
19454                                 V8HI_type_node, integer_type_node,
19455                                 NULL_TREE);
19456   tree v4si_ftype_v4si_v4si_int
19457     = build_function_type_list (V4SI_type_node, V4SI_type_node,
19458                                 V4SI_type_node, integer_type_node,
19459                                 NULL_TREE);
19460   tree int_ftype_v2di_v2di
19461     = build_function_type_list (integer_type_node,
19462                                 V2DI_type_node, V2DI_type_node,
19463                                 NULL_TREE);
19464   tree int_ftype_v16qi_int_v16qi_int_int
19465     = build_function_type_list (integer_type_node,
19466                                 V16QI_type_node,
19467                                 integer_type_node,
19468                                 V16QI_type_node,
19469                                 integer_type_node,
19470                                 integer_type_node,
19471                                 NULL_TREE);
19472   tree v16qi_ftype_v16qi_int_v16qi_int_int
19473     = build_function_type_list (V16QI_type_node,
19474                                 V16QI_type_node,
19475                                 integer_type_node,
19476                                 V16QI_type_node,
19477                                 integer_type_node,
19478                                 integer_type_node,
19479                                 NULL_TREE);
19480   tree int_ftype_v16qi_v16qi_int
19481     = build_function_type_list (integer_type_node,
19482                                 V16QI_type_node,
19483                                 V16QI_type_node,
19484                                 integer_type_node,
19485                                 NULL_TREE);
19486
19487   /* SSE5 instructions */
19488   tree v2di_ftype_v2di_v2di_v2di
19489     = build_function_type_list (V2DI_type_node,
19490                                 V2DI_type_node,
19491                                 V2DI_type_node,
19492                                 V2DI_type_node,
19493                                 NULL_TREE);
19494
19495   tree v4si_ftype_v4si_v4si_v4si
19496     = build_function_type_list (V4SI_type_node,
19497                                 V4SI_type_node,
19498                                 V4SI_type_node,
19499                                 V4SI_type_node,
19500                                 NULL_TREE);
19501
19502   tree v4si_ftype_v4si_v4si_v2di
19503     = build_function_type_list (V4SI_type_node,
19504                                 V4SI_type_node,
19505                                 V4SI_type_node,
19506                                 V2DI_type_node,
19507                                 NULL_TREE);
19508
19509   tree v8hi_ftype_v8hi_v8hi_v8hi
19510     = build_function_type_list (V8HI_type_node,
19511                                 V8HI_type_node,
19512                                 V8HI_type_node,
19513                                 V8HI_type_node,
19514                                 NULL_TREE);
19515
19516   tree v8hi_ftype_v8hi_v8hi_v4si
19517     = build_function_type_list (V8HI_type_node,
19518                                 V8HI_type_node,
19519                                 V8HI_type_node,
19520                                 V4SI_type_node,
19521                                 NULL_TREE);
19522
19523   tree v2df_ftype_v2df_v2df_v16qi
19524     = build_function_type_list (V2DF_type_node,
19525                                 V2DF_type_node,
19526                                 V2DF_type_node,
19527                                 V16QI_type_node,
19528                                 NULL_TREE);
19529
19530   tree v4sf_ftype_v4sf_v4sf_v16qi
19531     = build_function_type_list (V4SF_type_node,
19532                                 V4SF_type_node,
19533                                 V4SF_type_node,
19534                                 V16QI_type_node,
19535                                 NULL_TREE);
19536
19537   tree v2di_ftype_v2di_si
19538     = build_function_type_list (V2DI_type_node,
19539                                 V2DI_type_node,
19540                                 integer_type_node,
19541                                 NULL_TREE);
19542
19543   tree v4si_ftype_v4si_si
19544     = build_function_type_list (V4SI_type_node,
19545                                 V4SI_type_node,
19546                                 integer_type_node,
19547                                 NULL_TREE);
19548
19549   tree v8hi_ftype_v8hi_si
19550     = build_function_type_list (V8HI_type_node,
19551                                 V8HI_type_node,
19552                                 integer_type_node,
19553                                 NULL_TREE);
19554
19555   tree v16qi_ftype_v16qi_si
19556     = build_function_type_list (V16QI_type_node,
19557                                 V16QI_type_node,
19558                                 integer_type_node,
19559                                 NULL_TREE);
19560   tree v4sf_ftype_v4hi
19561     = build_function_type_list (V4SF_type_node,
19562                                 V4HI_type_node,
19563                                 NULL_TREE);
19564
19565   tree v4hi_ftype_v4sf
19566     = build_function_type_list (V4HI_type_node,
19567                                 V4SF_type_node,
19568                                 NULL_TREE);
19569
19570   tree v2di_ftype_v2di
19571     = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
19572
19573   tree v16qi_ftype_v8hi_v8hi
19574     = build_function_type_list (V16QI_type_node,
19575                                 V8HI_type_node, V8HI_type_node,
19576                                 NULL_TREE);
19577   tree v8hi_ftype_v4si_v4si
19578     = build_function_type_list (V8HI_type_node,
19579                                 V4SI_type_node, V4SI_type_node,
19580                                 NULL_TREE);
19581   tree v8hi_ftype_v16qi_v16qi
19582     = build_function_type_list (V8HI_type_node,
19583                                 V16QI_type_node, V16QI_type_node,
19584                                 NULL_TREE);
19585   tree v4hi_ftype_v8qi_v8qi
19586     = build_function_type_list (V4HI_type_node,
19587                                 V8QI_type_node, V8QI_type_node,
19588                                 NULL_TREE);
19589   tree unsigned_ftype_unsigned_uchar
19590     = build_function_type_list (unsigned_type_node,
19591                                 unsigned_type_node,
19592                                 unsigned_char_type_node,
19593                                 NULL_TREE);
19594   tree unsigned_ftype_unsigned_ushort
19595     = build_function_type_list (unsigned_type_node,
19596                                 unsigned_type_node,
19597                                 short_unsigned_type_node,
19598                                 NULL_TREE);
19599   tree unsigned_ftype_unsigned_unsigned
19600     = build_function_type_list (unsigned_type_node,
19601                                 unsigned_type_node,
19602                                 unsigned_type_node,
19603                                 NULL_TREE);
19604   tree uint64_ftype_uint64_uint64
19605     = build_function_type_list (long_long_unsigned_type_node,
19606                                 long_long_unsigned_type_node,
19607                                 long_long_unsigned_type_node,
19608                                 NULL_TREE);
19609   tree float_ftype_float
19610     = build_function_type_list (float_type_node,
19611                                 float_type_node,
19612                                 NULL_TREE);
19613
19614   tree ftype;
19615
19616   /* The __float80 type.  */
19617   if (TYPE_MODE (long_double_type_node) == XFmode)
19618     (*lang_hooks.types.register_builtin_type) (long_double_type_node,
19619                                                "__float80");
19620   else
19621     {
19622       /* The __float80 type.  */
19623       tree float80_type_node = make_node (REAL_TYPE);
19624
19625       TYPE_PRECISION (float80_type_node) = 80;
19626       layout_type (float80_type_node);
19627       (*lang_hooks.types.register_builtin_type) (float80_type_node,
19628                                                  "__float80");
19629     }
19630
19631   if (TARGET_64BIT)
19632     {
19633       tree float128_type_node = make_node (REAL_TYPE);
19634
19635       TYPE_PRECISION (float128_type_node) = 128;
19636       layout_type (float128_type_node);
19637       (*lang_hooks.types.register_builtin_type) (float128_type_node,
19638                                                  "__float128");
19639
19640       /* TFmode support builtins.  */
19641       ftype = build_function_type (float128_type_node,
19642                                    void_list_node);
19643       def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
19644
19645       ftype = build_function_type_list (float128_type_node,
19646                                         float128_type_node,
19647                                         NULL_TREE);
19648       def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
19649
19650       ftype = build_function_type_list (float128_type_node,
19651                                         float128_type_node,
19652                                         float128_type_node,
19653                                         NULL_TREE);
19654       def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
19655     }
19656
19657   /* Add all special builtins with variable number of operands.  */
19658   for (i = 0, d = bdesc_special_args;
19659        i < ARRAY_SIZE (bdesc_special_args);
19660        i++, d++)
19661     {
19662       tree type;
19663
19664       if (d->name == 0)
19665         continue;
19666
19667       switch ((enum ix86_special_builtin_type) d->flag)
19668         {
19669         case VOID_FTYPE_VOID:
19670           type = void_ftype_void;
19671           break;
19672         case V16QI_FTYPE_PCCHAR:
19673           type = v16qi_ftype_pcchar;
19674           break;
19675         case V4SF_FTYPE_PCFLOAT:
19676           type = v4sf_ftype_pcfloat;
19677           break;
19678         case V2DI_FTYPE_PV2DI:
19679           type = v2di_ftype_pv2di;
19680           break;
19681         case V2DF_FTYPE_PCDOUBLE:
19682           type = v2df_ftype_pcdouble;
19683           break;
19684         case V4SF_FTYPE_V4SF_PCV2SF:
19685           type = v4sf_ftype_v4sf_pcv2sf;
19686           break;
19687         case V2DF_FTYPE_V2DF_PCDOUBLE:
19688           type = v2df_ftype_v2df_pcdouble;
19689           break;
19690         case VOID_FTYPE_PV2SF_V4SF:
19691           type = void_ftype_pv2sf_v4sf;
19692           break;
19693         case VOID_FTYPE_PV2DI_V2DI:
19694           type = void_ftype_pv2di_v2di;
19695           break;
19696         case VOID_FTYPE_PCHAR_V16QI:
19697           type = void_ftype_pchar_v16qi;
19698           break;
19699         case VOID_FTYPE_PFLOAT_V4SF:
19700           type = void_ftype_pfloat_v4sf;
19701           break;
19702         case VOID_FTYPE_PDOUBLE_V2DF:
19703           type = void_ftype_pdouble_v2df;
19704           break;
19705         case VOID_FTYPE_PDI_DI:
19706           type = void_ftype_pdi_di;
19707           break;
19708         case VOID_FTYPE_PINT_INT:
19709           type = void_ftype_pint_int;
19710           break;
19711         default:
19712           gcc_unreachable ();
19713         }
19714
19715       def_builtin (d->mask, d->name, type, d->code);
19716     }
19717
19718   /* Add all builtins with variable number of operands.  */
19719   for (i = 0, d = bdesc_args;
19720        i < ARRAY_SIZE (bdesc_args);
19721        i++, d++)
19722     {
19723       tree type;
19724
19725       if (d->name == 0)
19726         continue;
19727
19728       switch ((enum ix86_builtin_type) d->flag)
19729         {
19730         case FLOAT_FTYPE_FLOAT:
19731           type = float_ftype_float;
19732           break;
19733         case INT_FTYPE_V2DI_V2DI_PTEST:
19734           type = int_ftype_v2di_v2di;
19735           break;
19736         case INT64_FTYPE_V4SF:
19737           type = int64_ftype_v4sf;
19738           break;
19739         case INT64_FTYPE_V2DF:
19740           type = int64_ftype_v2df;
19741           break;
19742         case INT_FTYPE_V16QI:
19743           type = int_ftype_v16qi;
19744           break;
19745         case INT_FTYPE_V8QI:
19746           type = int_ftype_v8qi;
19747           break;
19748         case INT_FTYPE_V4SF:
19749           type = int_ftype_v4sf;
19750           break;
19751         case INT_FTYPE_V2DF:
19752           type = int_ftype_v2df;
19753           break;
19754         case V16QI_FTYPE_V16QI:
19755           type = v16qi_ftype_v16qi;
19756           break;
19757         case V8HI_FTYPE_V8HI:
19758           type = v8hi_ftype_v8hi;
19759           break;
19760         case V8HI_FTYPE_V16QI:
19761           type = v8hi_ftype_v16qi;
19762           break;
19763         case V8QI_FTYPE_V8QI:
19764           type = v8qi_ftype_v8qi;
19765           break;
19766         case V4SI_FTYPE_V4SI:
19767           type = v4si_ftype_v4si;
19768           break;
19769         case V4SI_FTYPE_V16QI:
19770           type = v4si_ftype_v16qi;
19771           break;
19772         case V4SI_FTYPE_V8HI:
19773           type = v4si_ftype_v8hi;
19774           break;
19775         case V4SI_FTYPE_V4SF:
19776           type = v4si_ftype_v4sf;
19777           break;
19778         case V4SI_FTYPE_V2DF:
19779           type = v4si_ftype_v2df;
19780           break;
19781         case V4HI_FTYPE_V4HI:
19782           type = v4hi_ftype_v4hi;
19783           break;
19784         case V4SF_FTYPE_V4SF:
19785         case V4SF_FTYPE_V4SF_VEC_MERGE:
19786           type = v4sf_ftype_v4sf;
19787           break;
19788         case V4SF_FTYPE_V4SI:
19789           type = v4sf_ftype_v4si;
19790           break;
19791         case V4SF_FTYPE_V2DF:
19792           type = v4sf_ftype_v2df;
19793           break;
19794         case V2DI_FTYPE_V2DI:
19795           type = v2di_ftype_v2di;
19796           break;
19797         case V2DI_FTYPE_V16QI:
19798           type = v2di_ftype_v16qi;
19799           break;
19800         case V2DI_FTYPE_V8HI:
19801           type = v2di_ftype_v8hi;
19802           break;
19803         case V2DI_FTYPE_V4SI:
19804           type = v2di_ftype_v4si;
19805           break;
19806         case V2SI_FTYPE_V2SI:
19807           type = v2si_ftype_v2si;
19808           break;
19809         case V2SI_FTYPE_V4SF:
19810           type = v2si_ftype_v4sf;
19811           break;
19812         case V2SI_FTYPE_V2DF:
19813           type = v2si_ftype_v2df;
19814           break;
19815         case V2SI_FTYPE_V2SF:
19816           type = v2si_ftype_v2sf;
19817           break;
19818         case V2DF_FTYPE_V4SF:
19819           type = v2df_ftype_v4sf;
19820           break;
19821         case V2DF_FTYPE_V2DF:
19822         case V2DF_FTYPE_V2DF_VEC_MERGE:
19823           type = v2df_ftype_v2df;
19824           break;
19825         case V2DF_FTYPE_V2SI:
19826           type = v2df_ftype_v2si;
19827           break;
19828         case V2DF_FTYPE_V4SI:
19829           type = v2df_ftype_v4si;
19830           break;
19831         case V2SF_FTYPE_V2SF:
19832           type = v2sf_ftype_v2sf;
19833           break;
19834         case V2SF_FTYPE_V2SI:
19835           type = v2sf_ftype_v2si;
19836           break;
19837         case V16QI_FTYPE_V16QI_V16QI:
19838           type = v16qi_ftype_v16qi_v16qi;
19839           break;
19840         case V16QI_FTYPE_V8HI_V8HI:
19841           type = v16qi_ftype_v8hi_v8hi;
19842           break;
19843         case V8QI_FTYPE_V8QI_V8QI:
19844           type = v8qi_ftype_v8qi_v8qi;
19845           break;
19846         case V8QI_FTYPE_V4HI_V4HI:
19847           type = v8qi_ftype_v4hi_v4hi;
19848           break;
19849         case V8HI_FTYPE_V8HI_V8HI:
19850         case V8HI_FTYPE_V8HI_V8HI_COUNT:
19851           type = v8hi_ftype_v8hi_v8hi;
19852           break;
19853         case V8HI_FTYPE_V16QI_V16QI:
19854           type = v8hi_ftype_v16qi_v16qi;
19855           break;
19856         case V8HI_FTYPE_V4SI_V4SI:
19857           type = v8hi_ftype_v4si_v4si;
19858           break;
19859         case V8HI_FTYPE_V8HI_SI_COUNT:
19860           type = v8hi_ftype_v8hi_int;
19861           break;
19862         case V4SI_FTYPE_V4SI_V4SI:
19863         case V4SI_FTYPE_V4SI_V4SI_COUNT:
19864           type = v4si_ftype_v4si_v4si;
19865           break;
19866         case V4SI_FTYPE_V8HI_V8HI:
19867           type = v4si_ftype_v8hi_v8hi;
19868           break;
19869         case V4SI_FTYPE_V4SF_V4SF:
19870           type = v4si_ftype_v4sf_v4sf;
19871           break;
19872         case V4SI_FTYPE_V2DF_V2DF:
19873           type = v4si_ftype_v2df_v2df;
19874           break;
19875         case V4SI_FTYPE_V4SI_SI_COUNT:
19876           type = v4si_ftype_v4si_int;
19877           break;
19878         case V4HI_FTYPE_V4HI_V4HI:
19879         case V4HI_FTYPE_V4HI_V4HI_COUNT:
19880           type = v4hi_ftype_v4hi_v4hi;
19881           break;
19882         case V4HI_FTYPE_V8QI_V8QI:
19883           type = v4hi_ftype_v8qi_v8qi;
19884           break;
19885         case V4HI_FTYPE_V2SI_V2SI:
19886           type = v4hi_ftype_v2si_v2si;
19887           break;
19888         case V4HI_FTYPE_V4HI_SI_COUNT:
19889           type = v4hi_ftype_v4hi_int;
19890           break;
19891         case V4SF_FTYPE_V4SF_V4SF:
19892         case V4SF_FTYPE_V4SF_V4SF_SWAP:
19893           type = v4sf_ftype_v4sf_v4sf;
19894           break;
19895         case V4SF_FTYPE_V4SF_V2SI:
19896           type = v4sf_ftype_v4sf_v2si;
19897           break;
19898         case V4SF_FTYPE_V4SF_V2DF:
19899           type = v4sf_ftype_v4sf_v2df;
19900           break;
19901         case V4SF_FTYPE_V4SF_DI:
19902           type = v4sf_ftype_v4sf_int64;
19903           break;
19904         case V4SF_FTYPE_V4SF_SI:
19905           type = v4sf_ftype_v4sf_int;
19906           break;
19907         case V2DI_FTYPE_V2DI_V2DI:
19908         case V2DI_FTYPE_V2DI_V2DI_COUNT:
19909           type = v2di_ftype_v2di_v2di;
19910           break;
19911         case V2DI_FTYPE_V16QI_V16QI:
19912           type = v2di_ftype_v16qi_v16qi;
19913           break;
19914         case V2DI_FTYPE_V4SI_V4SI:
19915           type = v2di_ftype_v4si_v4si;
19916           break;
19917         case V2DI_FTYPE_V2DI_V16QI:
19918           type = v2di_ftype_v2di_v16qi;
19919           break;
19920         case V2DI_FTYPE_V2DF_V2DF:
19921           type = v2di_ftype_v2df_v2df;
19922           break;
19923         case V2DI_FTYPE_V2DI_SI_COUNT:
19924           type = v2di_ftype_v2di_int;
19925           break;
19926         case V2SI_FTYPE_V2SI_V2SI:
19927         case V2SI_FTYPE_V2SI_V2SI_COUNT:
19928           type = v2si_ftype_v2si_v2si;
19929           break;
19930         case V2SI_FTYPE_V4HI_V4HI:
19931           type = v2si_ftype_v4hi_v4hi;
19932           break;
19933         case V2SI_FTYPE_V2SF_V2SF:
19934           type = v2si_ftype_v2sf_v2sf;
19935           break;
19936         case V2SI_FTYPE_V2SI_SI_COUNT:
19937           type = v2si_ftype_v2si_int;
19938           break;
19939         case V2DF_FTYPE_V2DF_V2DF:
19940         case V2DF_FTYPE_V2DF_V2DF_SWAP:
19941           type = v2df_ftype_v2df_v2df;
19942           break;
19943         case V2DF_FTYPE_V2DF_V4SF:
19944           type = v2df_ftype_v2df_v4sf;
19945           break;
19946         case V2DF_FTYPE_V2DF_DI:
19947           type = v2df_ftype_v2df_int64;
19948           break;
19949         case V2DF_FTYPE_V2DF_SI:
19950           type = v2df_ftype_v2df_int;
19951           break;
19952         case V2SF_FTYPE_V2SF_V2SF:
19953           type = v2sf_ftype_v2sf_v2sf;
19954           break;
19955         case V1DI_FTYPE_V1DI_V1DI:
19956         case V1DI_FTYPE_V1DI_V1DI_COUNT:
19957           type = v1di_ftype_v1di_v1di;
19958           break;
19959         case V1DI_FTYPE_V8QI_V8QI:
19960           type = v1di_ftype_v8qi_v8qi;
19961           break;
19962         case V1DI_FTYPE_V2SI_V2SI:
19963           type = v1di_ftype_v2si_v2si;
19964           break;
19965         case V1DI_FTYPE_V1DI_SI_COUNT:
19966           type = v1di_ftype_v1di_int;
19967           break;
19968         case UINT64_FTYPE_UINT64_UINT64:
19969           type = uint64_ftype_uint64_uint64;
19970           break;
19971         case UINT_FTYPE_UINT_UINT:
19972           type = unsigned_ftype_unsigned_unsigned;
19973           break;
19974         case UINT_FTYPE_UINT_USHORT:
19975           type = unsigned_ftype_unsigned_ushort;
19976           break;
19977         case UINT_FTYPE_UINT_UCHAR:
19978           type = unsigned_ftype_unsigned_uchar;
19979           break;
19980         case V8HI_FTYPE_V8HI_INT:
19981           type = v8hi_ftype_v8hi_int;
19982           break;
19983         case V4SI_FTYPE_V4SI_INT:
19984           type = v4si_ftype_v4si_int;
19985           break;
19986         case V4HI_FTYPE_V4HI_INT:
19987           type = v4hi_ftype_v4hi_int;
19988           break;
19989         case V4SF_FTYPE_V4SF_INT:
19990           type = v4sf_ftype_v4sf_int;
19991           break;
19992         case V2DI_FTYPE_V2DI_INT:
19993         case V2DI2TI_FTYPE_V2DI_INT:
19994           type = v2di_ftype_v2di_int;
19995           break;
19996         case V2DF_FTYPE_V2DF_INT:
19997           type = v2df_ftype_v2df_int;
19998           break;
19999         case V16QI_FTYPE_V16QI_V16QI_V16QI:
20000           type = v16qi_ftype_v16qi_v16qi_v16qi;
20001           break;
20002         case V4SF_FTYPE_V4SF_V4SF_V4SF:
20003           type = v4sf_ftype_v4sf_v4sf_v4sf;
20004           break;
20005         case V2DF_FTYPE_V2DF_V2DF_V2DF:
20006           type = v2df_ftype_v2df_v2df_v2df;
20007           break;
20008         case V16QI_FTYPE_V16QI_V16QI_INT:
20009           type = v16qi_ftype_v16qi_v16qi_int;
20010           break;
20011         case V8HI_FTYPE_V8HI_V8HI_INT:
20012           type = v8hi_ftype_v8hi_v8hi_int;
20013           break;
20014         case V4SI_FTYPE_V4SI_V4SI_INT:
20015           type = v4si_ftype_v4si_v4si_int;
20016           break;
20017         case V4SF_FTYPE_V4SF_V4SF_INT:
20018           type = v4sf_ftype_v4sf_v4sf_int;
20019           break;
20020         case V2DI_FTYPE_V2DI_V2DI_INT:
20021         case V2DI2TI_FTYPE_V2DI_V2DI_INT:
20022           type = v2di_ftype_v2di_v2di_int;
20023           break;
20024         case V2DF_FTYPE_V2DF_V2DF_INT:
20025           type = v2df_ftype_v2df_v2df_int;
20026           break;
20027         case V2DI_FTYPE_V2DI_UINT_UINT:
20028           type = v2di_ftype_v2di_unsigned_unsigned;
20029           break;
20030         case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
20031           type = v2di_ftype_v2di_v2di_unsigned_unsigned;
20032           break;
20033         case V1DI2DI_FTYPE_V1DI_V1DI_INT:
20034           type = v1di_ftype_v1di_v1di_int;
20035           break;
20036         default:
20037           gcc_unreachable ();
20038         }
20039
20040       def_builtin_const (d->mask, d->name, type, d->code);
20041     }
20042
20043   /* pcmpestr[im] insns.  */
20044   for (i = 0, d = bdesc_pcmpestr;
20045        i < ARRAY_SIZE (bdesc_pcmpestr);
20046        i++, d++)
20047     {
20048       if (d->code == IX86_BUILTIN_PCMPESTRM128)
20049         ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
20050       else
20051         ftype = int_ftype_v16qi_int_v16qi_int_int;
20052       def_builtin_const (d->mask, d->name, ftype, d->code);
20053     }
20054
20055   /* pcmpistr[im] insns.  */
20056   for (i = 0, d = bdesc_pcmpistr;
20057        i < ARRAY_SIZE (bdesc_pcmpistr);
20058        i++, d++)
20059     {
20060       if (d->code == IX86_BUILTIN_PCMPISTRM128)
20061         ftype = v16qi_ftype_v16qi_v16qi_int;
20062       else
20063         ftype = int_ftype_v16qi_v16qi_int;
20064       def_builtin_const (d->mask, d->name, ftype, d->code);
20065     }
20066
20067   /* comi/ucomi insns.  */
20068   for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
20069     if (d->mask == OPTION_MASK_ISA_SSE2)
20070       def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
20071     else
20072       def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
20073
20074   /* SSE */
20075   def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
20076   def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
20077
20078   /* SSE or 3DNow!A */
20079   def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
20080
20081   /* SSE2 */
20082   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
20083
20084   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
20085   x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
20086
20087   /* SSE3.  */
20088   def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
20089   def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
20090
20091   /* AES */
20092   if (TARGET_AES)
20093     {
20094       /* Define AES built-in functions only if AES is enabled.  */
20095       def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
20096       def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
20097       def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
20098       def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
20099       def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
20100       def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
20101     }
20102
20103   /* PCLMUL */
20104   if (TARGET_PCLMUL)
20105     {
20106       /* Define PCLMUL built-in function only if PCLMUL is enabled.  */
20107       def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
20108     }
20109
20110   /* Access to the vec_init patterns.  */
20111   ftype = build_function_type_list (V2SI_type_node, integer_type_node,
20112                                     integer_type_node, NULL_TREE);
20113   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
20114
20115   ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
20116                                     short_integer_type_node,
20117                                     short_integer_type_node,
20118                                     short_integer_type_node, NULL_TREE);
20119   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
20120
20121   ftype = build_function_type_list (V8QI_type_node, char_type_node,
20122                                     char_type_node, char_type_node,
20123                                     char_type_node, char_type_node,
20124                                     char_type_node, char_type_node,
20125                                     char_type_node, NULL_TREE);
20126   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
20127
20128   /* Access to the vec_extract patterns.  */
20129   ftype = build_function_type_list (double_type_node, V2DF_type_node,
20130                                     integer_type_node, NULL_TREE);
20131   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
20132
20133   ftype = build_function_type_list (long_long_integer_type_node,
20134                                     V2DI_type_node, integer_type_node,
20135                                     NULL_TREE);
20136   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
20137
20138   ftype = build_function_type_list (float_type_node, V4SF_type_node,
20139                                     integer_type_node, NULL_TREE);
20140   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
20141
20142   ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
20143                                     integer_type_node, NULL_TREE);
20144   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
20145
20146   ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
20147                                     integer_type_node, NULL_TREE);
20148   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
20149
20150   ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
20151                                     integer_type_node, NULL_TREE);
20152   def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
20153
20154   ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
20155                                     integer_type_node, NULL_TREE);
20156   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
20157
20158   ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
20159                                     integer_type_node, NULL_TREE);
20160   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
20161
20162   /* Access to the vec_set patterns.  */
20163   ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
20164                                     intDI_type_node,
20165                                     integer_type_node, NULL_TREE);
20166   def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
20167
20168   ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
20169                                     float_type_node,
20170                                     integer_type_node, NULL_TREE);
20171   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
20172
20173   ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
20174                                     intSI_type_node,
20175                                     integer_type_node, NULL_TREE);
20176   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
20177
20178   ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
20179                                     intHI_type_node,
20180                                     integer_type_node, NULL_TREE);
20181   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
20182
20183   ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
20184                                     intHI_type_node,
20185                                     integer_type_node, NULL_TREE);
20186   def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
20187
20188   ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
20189                                     intQI_type_node,
20190                                     integer_type_node, NULL_TREE);
20191   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
20192
20193   /* Add SSE5 multi-arg argument instructions */
20194   for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
20195     {
20196       tree mtype = NULL_TREE;
20197
20198       if (d->name == 0)
20199         continue;
20200
20201       switch ((enum multi_arg_type)d->flag)
20202         {
20203         case MULTI_ARG_3_SF:     mtype = v4sf_ftype_v4sf_v4sf_v4sf;     break;
20204         case MULTI_ARG_3_DF:     mtype = v2df_ftype_v2df_v2df_v2df;     break;
20205         case MULTI_ARG_3_DI:     mtype = v2di_ftype_v2di_v2di_v2di;     break;
20206         case MULTI_ARG_3_SI:     mtype = v4si_ftype_v4si_v4si_v4si;     break;
20207         case MULTI_ARG_3_SI_DI:  mtype = v4si_ftype_v4si_v4si_v2di;     break;
20208         case MULTI_ARG_3_HI:     mtype = v8hi_ftype_v8hi_v8hi_v8hi;     break;
20209         case MULTI_ARG_3_HI_SI:  mtype = v8hi_ftype_v8hi_v8hi_v4si;     break;
20210         case MULTI_ARG_3_QI:     mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
20211         case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi;    break;
20212         case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi;    break;
20213         case MULTI_ARG_2_SF:     mtype = v4sf_ftype_v4sf_v4sf;          break;
20214         case MULTI_ARG_2_DF:     mtype = v2df_ftype_v2df_v2df;          break;
20215         case MULTI_ARG_2_DI:     mtype = v2di_ftype_v2di_v2di;          break;
20216         case MULTI_ARG_2_SI:     mtype = v4si_ftype_v4si_v4si;          break;
20217         case MULTI_ARG_2_HI:     mtype = v8hi_ftype_v8hi_v8hi;          break;
20218         case MULTI_ARG_2_QI:     mtype = v16qi_ftype_v16qi_v16qi;       break;
20219         case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si;            break;
20220         case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si;            break;
20221         case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si;            break;
20222         case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si;          break;
20223         case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf;          break;
20224         case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df;          break;
20225         case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di;          break;
20226         case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si;          break;
20227         case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi;          break;
20228         case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi;       break;
20229         case MULTI_ARG_2_SF_TF:  mtype = v4sf_ftype_v4sf_v4sf;          break;
20230         case MULTI_ARG_2_DF_TF:  mtype = v2df_ftype_v2df_v2df;          break;
20231         case MULTI_ARG_2_DI_TF:  mtype = v2di_ftype_v2di_v2di;          break;
20232         case MULTI_ARG_2_SI_TF:  mtype = v4si_ftype_v4si_v4si;          break;
20233         case MULTI_ARG_2_HI_TF:  mtype = v8hi_ftype_v8hi_v8hi;          break;
20234         case MULTI_ARG_2_QI_TF:  mtype = v16qi_ftype_v16qi_v16qi;       break;
20235         case MULTI_ARG_1_SF:     mtype = v4sf_ftype_v4sf;               break;
20236         case MULTI_ARG_1_DF:     mtype = v2df_ftype_v2df;               break;
20237         case MULTI_ARG_1_DI:     mtype = v2di_ftype_v2di;               break;
20238         case MULTI_ARG_1_SI:     mtype = v4si_ftype_v4si;               break;
20239         case MULTI_ARG_1_HI:     mtype = v8hi_ftype_v8hi;               break;
20240         case MULTI_ARG_1_QI:     mtype = v16qi_ftype_v16qi;             break;
20241         case MULTI_ARG_1_SI_DI:  mtype = v2di_ftype_v4si;               break;
20242         case MULTI_ARG_1_HI_DI:  mtype = v2di_ftype_v8hi;               break;
20243         case MULTI_ARG_1_HI_SI:  mtype = v4si_ftype_v8hi;               break;
20244         case MULTI_ARG_1_QI_DI:  mtype = v2di_ftype_v16qi;              break;
20245         case MULTI_ARG_1_QI_SI:  mtype = v4si_ftype_v16qi;              break;
20246         case MULTI_ARG_1_QI_HI:  mtype = v8hi_ftype_v16qi;              break;
20247         case MULTI_ARG_1_PH2PS:  mtype = v4sf_ftype_v4hi;               break;
20248         case MULTI_ARG_1_PS2PH:  mtype = v4hi_ftype_v4sf;               break;
20249         case MULTI_ARG_UNKNOWN:
20250         default:
20251           gcc_unreachable ();
20252         }
20253
20254       if (mtype)
20255         def_builtin_const (d->mask, d->name, mtype, d->code);
20256     }
20257 }
20258
20259 static void
20260 ix86_init_builtins (void)
20261 {
20262   if (TARGET_MMX)
20263     ix86_init_mmx_sse_builtins ();
20264 }
20265
20266 /* Errors in the source file can cause expand_expr to return const0_rtx
20267    where we expect a vector.  To avoid crashing, use one of the vector
20268    clear instructions.  */
20269 static rtx
20270 safe_vector_operand (rtx x, enum machine_mode mode)
20271 {
20272   if (x == const0_rtx)
20273     x = CONST0_RTX (mode);
20274   return x;
20275 }
20276
20277 /* Subroutine of ix86_expand_builtin to take care of binop insns.  */
20278
20279 static rtx
20280 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
20281 {
20282   rtx pat;
20283   tree arg0 = CALL_EXPR_ARG (exp, 0);
20284   tree arg1 = CALL_EXPR_ARG (exp, 1);
20285   rtx op0 = expand_normal (arg0);
20286   rtx op1 = expand_normal (arg1);
20287   enum machine_mode tmode = insn_data[icode].operand[0].mode;
20288   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20289   enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20290
20291   if (VECTOR_MODE_P (mode0))
20292     op0 = safe_vector_operand (op0, mode0);
20293   if (VECTOR_MODE_P (mode1))
20294     op1 = safe_vector_operand (op1, mode1);
20295
20296   if (optimize || !target
20297       || GET_MODE (target) != tmode
20298       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20299     target = gen_reg_rtx (tmode);
20300
20301   if (GET_MODE (op1) == SImode && mode1 == TImode)
20302     {
20303       rtx x = gen_reg_rtx (V4SImode);
20304       emit_insn (gen_sse2_loadd (x, op1));
20305       op1 = gen_lowpart (TImode, x);
20306     }
20307
20308   if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
20309     op0 = copy_to_mode_reg (mode0, op0);
20310   if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
20311     op1 = copy_to_mode_reg (mode1, op1);
20312
20313   pat = GEN_FCN (icode) (target, op0, op1);
20314   if (! pat)
20315     return 0;
20316
20317   emit_insn (pat);
20318
20319   return target;
20320 }
20321
20322 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns.  */
20323
20324 static rtx
20325 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
20326                                enum multi_arg_type m_type,
20327                                enum insn_code sub_code)
20328 {
20329   rtx pat;
20330   int i;
20331   int nargs;
20332   bool comparison_p = false;
20333   bool tf_p = false;
20334   bool last_arg_constant = false;
20335   int num_memory = 0;
20336   struct {
20337     rtx op;
20338     enum machine_mode mode;
20339   } args[4];
20340
20341   enum machine_mode tmode = insn_data[icode].operand[0].mode;
20342
20343   switch (m_type)
20344     {
20345     case MULTI_ARG_3_SF:
20346     case MULTI_ARG_3_DF:
20347     case MULTI_ARG_3_DI:
20348     case MULTI_ARG_3_SI:
20349     case MULTI_ARG_3_SI_DI:
20350     case MULTI_ARG_3_HI:
20351     case MULTI_ARG_3_HI_SI:
20352     case MULTI_ARG_3_QI:
20353     case MULTI_ARG_3_PERMPS:
20354     case MULTI_ARG_3_PERMPD:
20355       nargs = 3;
20356       break;
20357
20358     case MULTI_ARG_2_SF:
20359     case MULTI_ARG_2_DF:
20360     case MULTI_ARG_2_DI:
20361     case MULTI_ARG_2_SI:
20362     case MULTI_ARG_2_HI:
20363     case MULTI_ARG_2_QI:
20364       nargs = 2;
20365       break;
20366
20367     case MULTI_ARG_2_DI_IMM:
20368     case MULTI_ARG_2_SI_IMM:
20369     case MULTI_ARG_2_HI_IMM:
20370     case MULTI_ARG_2_QI_IMM:
20371       nargs = 2;
20372       last_arg_constant = true;
20373       break;
20374
20375     case MULTI_ARG_1_SF:
20376     case MULTI_ARG_1_DF:
20377     case MULTI_ARG_1_DI:
20378     case MULTI_ARG_1_SI:
20379     case MULTI_ARG_1_HI:
20380     case MULTI_ARG_1_QI:
20381     case MULTI_ARG_1_SI_DI:
20382     case MULTI_ARG_1_HI_DI:
20383     case MULTI_ARG_1_HI_SI:
20384     case MULTI_ARG_1_QI_DI:
20385     case MULTI_ARG_1_QI_SI:
20386     case MULTI_ARG_1_QI_HI:
20387     case MULTI_ARG_1_PH2PS:
20388     case MULTI_ARG_1_PS2PH:
20389       nargs = 1;
20390       break;
20391
20392     case MULTI_ARG_2_SF_CMP:
20393     case MULTI_ARG_2_DF_CMP:
20394     case MULTI_ARG_2_DI_CMP:
20395     case MULTI_ARG_2_SI_CMP:
20396     case MULTI_ARG_2_HI_CMP:
20397     case MULTI_ARG_2_QI_CMP:
20398       nargs = 2;
20399       comparison_p = true;
20400       break;
20401
20402     case MULTI_ARG_2_SF_TF:
20403     case MULTI_ARG_2_DF_TF:
20404     case MULTI_ARG_2_DI_TF:
20405     case MULTI_ARG_2_SI_TF:
20406     case MULTI_ARG_2_HI_TF:
20407     case MULTI_ARG_2_QI_TF:
20408       nargs = 2;
20409       tf_p = true;
20410       break;
20411
20412     case MULTI_ARG_UNKNOWN:
20413     default:
20414       gcc_unreachable ();
20415     }
20416
20417   if (optimize || !target
20418       || GET_MODE (target) != tmode
20419       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20420     target = gen_reg_rtx (tmode);
20421
20422   gcc_assert (nargs <= 4);
20423
20424   for (i = 0; i < nargs; i++)
20425     {
20426       tree arg = CALL_EXPR_ARG (exp, i);
20427       rtx op = expand_normal (arg);
20428       int adjust = (comparison_p) ? 1 : 0;
20429       enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
20430
20431       if (last_arg_constant && i == nargs-1)
20432         {
20433           if (GET_CODE (op) != CONST_INT)
20434             {
20435               error ("last argument must be an immediate");
20436               return gen_reg_rtx (tmode);
20437             }
20438         }
20439       else
20440         {
20441           if (VECTOR_MODE_P (mode))
20442             op = safe_vector_operand (op, mode);
20443
20444           /* If we aren't optimizing, only allow one memory operand to be
20445              generated.  */
20446           if (memory_operand (op, mode))
20447             num_memory++;
20448
20449           gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
20450
20451           if (optimize
20452               || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
20453               || num_memory > 1)
20454             op = force_reg (mode, op);
20455         }
20456
20457       args[i].op = op;
20458       args[i].mode = mode;
20459     }
20460
20461   switch (nargs)
20462     {
20463     case 1:
20464       pat = GEN_FCN (icode) (target, args[0].op);
20465       break;
20466
20467     case 2:
20468       if (tf_p)
20469         pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
20470                                GEN_INT ((int)sub_code));
20471       else if (! comparison_p)
20472         pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
20473       else
20474         {
20475           rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
20476                                        args[0].op,
20477                                        args[1].op);
20478
20479           pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
20480         }
20481       break;
20482
20483     case 3:
20484       pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
20485       break;
20486
20487     default:
20488       gcc_unreachable ();
20489     }
20490
20491   if (! pat)
20492     return 0;
20493
20494   emit_insn (pat);
20495   return target;
20496 }
20497
20498 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
20499    insns with vec_merge.  */
20500
20501 static rtx
20502 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
20503                                     rtx target)
20504 {
20505   rtx pat;
20506   tree arg0 = CALL_EXPR_ARG (exp, 0);
20507   rtx op1, op0 = expand_normal (arg0);
20508   enum machine_mode tmode = insn_data[icode].operand[0].mode;
20509   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20510
20511   if (optimize || !target
20512       || GET_MODE (target) != tmode
20513       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20514     target = gen_reg_rtx (tmode);
20515
20516   if (VECTOR_MODE_P (mode0))
20517     op0 = safe_vector_operand (op0, mode0);
20518
20519   if ((optimize && !register_operand (op0, mode0))
20520       || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20521     op0 = copy_to_mode_reg (mode0, op0);
20522
20523   op1 = op0;
20524   if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
20525     op1 = copy_to_mode_reg (mode0, op1);
20526
20527   pat = GEN_FCN (icode) (target, op0, op1);
20528   if (! pat)
20529     return 0;
20530   emit_insn (pat);
20531   return target;
20532 }
20533
20534 /* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
20535
20536 static rtx
20537 ix86_expand_sse_compare (const struct builtin_description *d,
20538                          tree exp, rtx target, bool swap)
20539 {
20540   rtx pat;
20541   tree arg0 = CALL_EXPR_ARG (exp, 0);
20542   tree arg1 = CALL_EXPR_ARG (exp, 1);
20543   rtx op0 = expand_normal (arg0);
20544   rtx op1 = expand_normal (arg1);
20545   rtx op2;
20546   enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
20547   enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
20548   enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
20549   enum rtx_code comparison = d->comparison;
20550
20551   if (VECTOR_MODE_P (mode0))
20552     op0 = safe_vector_operand (op0, mode0);
20553   if (VECTOR_MODE_P (mode1))
20554     op1 = safe_vector_operand (op1, mode1);
20555
20556   /* Swap operands if we have a comparison that isn't available in
20557      hardware.  */
20558   if (swap)
20559     {
20560       rtx tmp = gen_reg_rtx (mode1);
20561       emit_move_insn (tmp, op1);
20562       op1 = op0;
20563       op0 = tmp;
20564     }
20565
20566   if (optimize || !target
20567       || GET_MODE (target) != tmode
20568       || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
20569     target = gen_reg_rtx (tmode);
20570
20571   if ((optimize && !register_operand (op0, mode0))
20572       || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
20573     op0 = copy_to_mode_reg (mode0, op0);
20574   if ((optimize && !register_operand (op1, mode1))
20575       || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
20576     op1 = copy_to_mode_reg (mode1, op1);
20577
20578   op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
20579   pat = GEN_FCN (d->icode) (target, op0, op1, op2);
20580   if (! pat)
20581     return 0;
20582   emit_insn (pat);
20583   return target;
20584 }
20585
20586 /* Subroutine of ix86_expand_builtin to take care of comi insns.  */
20587
20588 static rtx
20589 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
20590                       rtx target)
20591 {
20592   rtx pat;
20593   tree arg0 = CALL_EXPR_ARG (exp, 0);
20594   tree arg1 = CALL_EXPR_ARG (exp, 1);
20595   rtx op0 = expand_normal (arg0);
20596   rtx op1 = expand_normal (arg1);
20597   enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20598   enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20599   enum rtx_code comparison = d->comparison;
20600
20601   if (VECTOR_MODE_P (mode0))
20602     op0 = safe_vector_operand (op0, mode0);
20603   if (VECTOR_MODE_P (mode1))
20604     op1 = safe_vector_operand (op1, mode1);
20605
20606   /* Swap operands if we have a comparison that isn't available in
20607      hardware.  */
20608   if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20609     {
20610       rtx tmp = op1;
20611       op1 = op0;
20612       op0 = tmp;
20613     }
20614
20615   target = gen_reg_rtx (SImode);
20616   emit_move_insn (target, const0_rtx);
20617   target = gen_rtx_SUBREG (QImode, target, 0);
20618
20619   if ((optimize && !register_operand (op0, mode0))
20620       || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20621     op0 = copy_to_mode_reg (mode0, op0);
20622   if ((optimize && !register_operand (op1, mode1))
20623       || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20624     op1 = copy_to_mode_reg (mode1, op1);
20625
20626   pat = GEN_FCN (d->icode) (op0, op1);
20627   if (! pat)
20628     return 0;
20629   emit_insn (pat);
20630   emit_insn (gen_rtx_SET (VOIDmode,
20631                           gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20632                           gen_rtx_fmt_ee (comparison, QImode,
20633                                           SET_DEST (pat),
20634                                           const0_rtx)));
20635
20636   return SUBREG_REG (target);
20637 }
20638
20639 /* Subroutine of ix86_expand_builtin to take care of ptest insns.  */
20640
20641 static rtx
20642 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
20643                        rtx target)
20644 {
20645   rtx pat;
20646   tree arg0 = CALL_EXPR_ARG (exp, 0);
20647   tree arg1 = CALL_EXPR_ARG (exp, 1);
20648   rtx op0 = expand_normal (arg0);
20649   rtx op1 = expand_normal (arg1);
20650   enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20651   enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20652   enum rtx_code comparison = d->comparison;
20653
20654   if (VECTOR_MODE_P (mode0))
20655     op0 = safe_vector_operand (op0, mode0);
20656   if (VECTOR_MODE_P (mode1))
20657     op1 = safe_vector_operand (op1, mode1);
20658
20659   target = gen_reg_rtx (SImode);
20660   emit_move_insn (target, const0_rtx);
20661   target = gen_rtx_SUBREG (QImode, target, 0);
20662
20663   if ((optimize && !register_operand (op0, mode0))
20664       || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20665     op0 = copy_to_mode_reg (mode0, op0);
20666   if ((optimize && !register_operand (op1, mode1))
20667       || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20668     op1 = copy_to_mode_reg (mode1, op1);
20669
20670   pat = GEN_FCN (d->icode) (op0, op1);
20671   if (! pat)
20672     return 0;
20673   emit_insn (pat);
20674   emit_insn (gen_rtx_SET (VOIDmode,
20675                           gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20676                           gen_rtx_fmt_ee (comparison, QImode,
20677                                           SET_DEST (pat),
20678                                           const0_rtx)));
20679
20680   return SUBREG_REG (target);
20681 }
20682
20683 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns.  */
20684
20685 static rtx
20686 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
20687                           tree exp, rtx target)
20688 {
20689   rtx pat;
20690   tree arg0 = CALL_EXPR_ARG (exp, 0);
20691   tree arg1 = CALL_EXPR_ARG (exp, 1);
20692   tree arg2 = CALL_EXPR_ARG (exp, 2);
20693   tree arg3 = CALL_EXPR_ARG (exp, 3);
20694   tree arg4 = CALL_EXPR_ARG (exp, 4);
20695   rtx scratch0, scratch1;
20696   rtx op0 = expand_normal (arg0);
20697   rtx op1 = expand_normal (arg1);
20698   rtx op2 = expand_normal (arg2);
20699   rtx op3 = expand_normal (arg3);
20700   rtx op4 = expand_normal (arg4);
20701   enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
20702
20703   tmode0 = insn_data[d->icode].operand[0].mode;
20704   tmode1 = insn_data[d->icode].operand[1].mode;
20705   modev2 = insn_data[d->icode].operand[2].mode;
20706   modei3 = insn_data[d->icode].operand[3].mode;
20707   modev4 = insn_data[d->icode].operand[4].mode;
20708   modei5 = insn_data[d->icode].operand[5].mode;
20709   modeimm = insn_data[d->icode].operand[6].mode;
20710
20711   if (VECTOR_MODE_P (modev2))
20712     op0 = safe_vector_operand (op0, modev2);
20713   if (VECTOR_MODE_P (modev4))
20714     op2 = safe_vector_operand (op2, modev4);
20715
20716   if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20717     op0 = copy_to_mode_reg (modev2, op0);
20718   if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
20719     op1 = copy_to_mode_reg (modei3, op1);
20720   if ((optimize && !register_operand (op2, modev4))
20721       || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
20722     op2 = copy_to_mode_reg (modev4, op2);
20723   if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
20724     op3 = copy_to_mode_reg (modei5, op3);
20725
20726   if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
20727     {
20728       error ("the fifth argument must be a 8-bit immediate");
20729       return const0_rtx;
20730     }
20731
20732   if (d->code == IX86_BUILTIN_PCMPESTRI128)
20733     {
20734       if (optimize || !target
20735           || GET_MODE (target) != tmode0
20736           || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20737         target = gen_reg_rtx (tmode0);
20738
20739       scratch1 = gen_reg_rtx (tmode1);
20740
20741       pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
20742     }
20743   else if (d->code == IX86_BUILTIN_PCMPESTRM128)
20744     {
20745       if (optimize || !target
20746           || GET_MODE (target) != tmode1
20747           || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20748         target = gen_reg_rtx (tmode1);
20749
20750       scratch0 = gen_reg_rtx (tmode0);
20751
20752       pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
20753     }
20754   else
20755     {
20756       gcc_assert (d->flag);
20757
20758       scratch0 = gen_reg_rtx (tmode0);
20759       scratch1 = gen_reg_rtx (tmode1);
20760
20761       pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
20762     }
20763
20764   if (! pat)
20765     return 0;
20766
20767   emit_insn (pat);
20768
20769   if (d->flag)
20770     {
20771       target = gen_reg_rtx (SImode);
20772       emit_move_insn (target, const0_rtx);
20773       target = gen_rtx_SUBREG (QImode, target, 0);
20774
20775       emit_insn
20776         (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20777                       gen_rtx_fmt_ee (EQ, QImode,
20778                                       gen_rtx_REG ((enum machine_mode) d->flag,
20779                                                    FLAGS_REG),
20780                                       const0_rtx)));
20781       return SUBREG_REG (target);
20782     }
20783   else
20784     return target;
20785 }
20786
20787
20788 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns.  */
20789
20790 static rtx
20791 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
20792                           tree exp, rtx target)
20793 {
20794   rtx pat;
20795   tree arg0 = CALL_EXPR_ARG (exp, 0);
20796   tree arg1 = CALL_EXPR_ARG (exp, 1);
20797   tree arg2 = CALL_EXPR_ARG (exp, 2);
20798   rtx scratch0, scratch1;
20799   rtx op0 = expand_normal (arg0);
20800   rtx op1 = expand_normal (arg1);
20801   rtx op2 = expand_normal (arg2);
20802   enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
20803
20804   tmode0 = insn_data[d->icode].operand[0].mode;
20805   tmode1 = insn_data[d->icode].operand[1].mode;
20806   modev2 = insn_data[d->icode].operand[2].mode;
20807   modev3 = insn_data[d->icode].operand[3].mode;
20808   modeimm = insn_data[d->icode].operand[4].mode;
20809
20810   if (VECTOR_MODE_P (modev2))
20811     op0 = safe_vector_operand (op0, modev2);
20812   if (VECTOR_MODE_P (modev3))
20813     op1 = safe_vector_operand (op1, modev3);
20814
20815   if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20816     op0 = copy_to_mode_reg (modev2, op0);
20817   if ((optimize && !register_operand (op1, modev3))
20818       || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
20819     op1 = copy_to_mode_reg (modev3, op1);
20820
20821   if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
20822     {
20823       error ("the third argument must be a 8-bit immediate");
20824       return const0_rtx;
20825     }
20826
20827   if (d->code == IX86_BUILTIN_PCMPISTRI128)
20828     {
20829       if (optimize || !target
20830           || GET_MODE (target) != tmode0
20831           || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20832         target = gen_reg_rtx (tmode0);
20833
20834       scratch1 = gen_reg_rtx (tmode1);
20835
20836       pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
20837     }
20838   else if (d->code == IX86_BUILTIN_PCMPISTRM128)
20839     {
20840       if (optimize || !target
20841           || GET_MODE (target) != tmode1
20842           || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20843         target = gen_reg_rtx (tmode1);
20844
20845       scratch0 = gen_reg_rtx (tmode0);
20846
20847       pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
20848     }
20849   else
20850     {
20851       gcc_assert (d->flag);
20852
20853       scratch0 = gen_reg_rtx (tmode0);
20854       scratch1 = gen_reg_rtx (tmode1);
20855
20856       pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
20857     }
20858
20859   if (! pat)
20860     return 0;
20861
20862   emit_insn (pat);
20863
20864   if (d->flag)
20865     {
20866       target = gen_reg_rtx (SImode);
20867       emit_move_insn (target, const0_rtx);
20868       target = gen_rtx_SUBREG (QImode, target, 0);
20869
20870       emit_insn
20871         (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20872                       gen_rtx_fmt_ee (EQ, QImode,
20873                                       gen_rtx_REG ((enum machine_mode) d->flag,
20874                                                    FLAGS_REG),
20875                                       const0_rtx)));
20876       return SUBREG_REG (target);
20877     }
20878   else
20879     return target;
20880 }
20881
20882 /* Subroutine of ix86_expand_builtin to take care of insns with
20883    variable number of operands.  */
20884
20885 static rtx
20886 ix86_expand_args_builtin (const struct builtin_description *d,
20887                           tree exp, rtx target)
20888 {
20889   rtx pat, real_target;
20890   unsigned int i, nargs;
20891   unsigned int nargs_constant = 0;
20892   int num_memory = 0;
20893   struct
20894     {
20895       rtx op;
20896       enum machine_mode mode;
20897     } args[4];
20898   bool last_arg_count = false;
20899   enum insn_code icode = d->icode;
20900   const struct insn_data *insn_p = &insn_data[icode];
20901   enum machine_mode tmode = insn_p->operand[0].mode;
20902   enum machine_mode rmode = VOIDmode;
20903   bool swap = false;
20904   enum rtx_code comparison = d->comparison;
20905
20906   switch ((enum ix86_builtin_type) d->flag)
20907     {
20908     case INT_FTYPE_V2DI_V2DI_PTEST:
20909       return ix86_expand_sse_ptest (d, exp, target);
20910     case FLOAT128_FTYPE_FLOAT128:
20911     case FLOAT_FTYPE_FLOAT:
20912     case INT64_FTYPE_V4SF:
20913     case INT64_FTYPE_V2DF:
20914     case INT_FTYPE_V16QI:
20915     case INT_FTYPE_V8QI:
20916     case INT_FTYPE_V4SF:
20917     case INT_FTYPE_V2DF:
20918     case V16QI_FTYPE_V16QI:
20919     case V8HI_FTYPE_V8HI:
20920     case V8HI_FTYPE_V16QI:
20921     case V8QI_FTYPE_V8QI:
20922     case V4SI_FTYPE_V4SI:
20923     case V4SI_FTYPE_V16QI:
20924     case V4SI_FTYPE_V4SF:
20925     case V4SI_FTYPE_V8HI:
20926     case V4SI_FTYPE_V2DF:
20927     case V4HI_FTYPE_V4HI:
20928     case V4SF_FTYPE_V4SF:
20929     case V4SF_FTYPE_V4SI:
20930     case V4SF_FTYPE_V2DF:
20931     case V2DI_FTYPE_V2DI:
20932     case V2DI_FTYPE_V16QI:
20933     case V2DI_FTYPE_V8HI:
20934     case V2DI_FTYPE_V4SI:
20935     case V2DF_FTYPE_V2DF:
20936     case V2DF_FTYPE_V4SI:
20937     case V2DF_FTYPE_V4SF:
20938     case V2DF_FTYPE_V2SI:
20939     case V2SI_FTYPE_V2SI:
20940     case V2SI_FTYPE_V4SF:
20941     case V2SI_FTYPE_V2SF:
20942     case V2SI_FTYPE_V2DF:
20943     case V2SF_FTYPE_V2SF:
20944     case V2SF_FTYPE_V2SI:
20945       nargs = 1;
20946       break;
20947     case V4SF_FTYPE_V4SF_VEC_MERGE:
20948     case V2DF_FTYPE_V2DF_VEC_MERGE:
20949       return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
20950     case FLOAT128_FTYPE_FLOAT128_FLOAT128:
20951     case V16QI_FTYPE_V16QI_V16QI:
20952     case V16QI_FTYPE_V8HI_V8HI:
20953     case V8QI_FTYPE_V8QI_V8QI:
20954     case V8QI_FTYPE_V4HI_V4HI:
20955     case V8HI_FTYPE_V8HI_V8HI:
20956     case V8HI_FTYPE_V16QI_V16QI:
20957     case V8HI_FTYPE_V4SI_V4SI:
20958     case V4SI_FTYPE_V4SI_V4SI:
20959     case V4SI_FTYPE_V8HI_V8HI:
20960     case V4SI_FTYPE_V4SF_V4SF:
20961     case V4SI_FTYPE_V2DF_V2DF:
20962     case V4HI_FTYPE_V4HI_V4HI:
20963     case V4HI_FTYPE_V8QI_V8QI:
20964     case V4HI_FTYPE_V2SI_V2SI:
20965     case V4SF_FTYPE_V4SF_V4SF:
20966     case V4SF_FTYPE_V4SF_V2SI:
20967     case V4SF_FTYPE_V4SF_V2DF:
20968     case V4SF_FTYPE_V4SF_DI:
20969     case V4SF_FTYPE_V4SF_SI:
20970     case V2DI_FTYPE_V2DI_V2DI:
20971     case V2DI_FTYPE_V16QI_V16QI:
20972     case V2DI_FTYPE_V4SI_V4SI:
20973     case V2DI_FTYPE_V2DI_V16QI:
20974     case V2DI_FTYPE_V2DF_V2DF:
20975     case V2SI_FTYPE_V2SI_V2SI:
20976     case V2SI_FTYPE_V4HI_V4HI:
20977     case V2SI_FTYPE_V2SF_V2SF:
20978     case V2DF_FTYPE_V2DF_V2DF:
20979     case V2DF_FTYPE_V2DF_V4SF:
20980     case V2DF_FTYPE_V2DF_DI:
20981     case V2DF_FTYPE_V2DF_SI:
20982     case V2SF_FTYPE_V2SF_V2SF:
20983     case V1DI_FTYPE_V1DI_V1DI:
20984     case V1DI_FTYPE_V8QI_V8QI:
20985     case V1DI_FTYPE_V2SI_V2SI:
20986       if (comparison == UNKNOWN)
20987         return ix86_expand_binop_builtin (icode, exp, target);
20988       nargs = 2;
20989       break;
20990     case V4SF_FTYPE_V4SF_V4SF_SWAP:
20991     case V2DF_FTYPE_V2DF_V2DF_SWAP:
20992       gcc_assert (comparison != UNKNOWN);
20993       nargs = 2;
20994       swap = true;
20995       break;
20996     case V8HI_FTYPE_V8HI_V8HI_COUNT:
20997     case V8HI_FTYPE_V8HI_SI_COUNT:
20998     case V4SI_FTYPE_V4SI_V4SI_COUNT:
20999     case V4SI_FTYPE_V4SI_SI_COUNT:
21000     case V4HI_FTYPE_V4HI_V4HI_COUNT:
21001     case V4HI_FTYPE_V4HI_SI_COUNT:
21002     case V2DI_FTYPE_V2DI_V2DI_COUNT:
21003     case V2DI_FTYPE_V2DI_SI_COUNT:
21004     case V2SI_FTYPE_V2SI_V2SI_COUNT:
21005     case V2SI_FTYPE_V2SI_SI_COUNT:
21006     case V1DI_FTYPE_V1DI_V1DI_COUNT:
21007     case V1DI_FTYPE_V1DI_SI_COUNT:
21008       nargs = 2;
21009       last_arg_count = true;
21010       break;
21011     case UINT64_FTYPE_UINT64_UINT64:
21012     case UINT_FTYPE_UINT_UINT:
21013     case UINT_FTYPE_UINT_USHORT:
21014     case UINT_FTYPE_UINT_UCHAR:
21015       nargs = 2;
21016       break;
21017     case V2DI2TI_FTYPE_V2DI_INT:
21018       nargs = 2;
21019       rmode = V2DImode;
21020       nargs_constant = 1;
21021       break;
21022     case V8HI_FTYPE_V8HI_INT:
21023     case V4SI_FTYPE_V4SI_INT:
21024     case V4HI_FTYPE_V4HI_INT:
21025     case V4SF_FTYPE_V4SF_INT:
21026     case V2DI_FTYPE_V2DI_INT:
21027     case V2DF_FTYPE_V2DF_INT:
21028       nargs = 2;
21029       nargs_constant = 1;
21030       break;
21031     case V16QI_FTYPE_V16QI_V16QI_V16QI:
21032     case V4SF_FTYPE_V4SF_V4SF_V4SF:
21033     case V2DF_FTYPE_V2DF_V2DF_V2DF:
21034       nargs = 3;
21035       break;
21036     case V16QI_FTYPE_V16QI_V16QI_INT:
21037     case V8HI_FTYPE_V8HI_V8HI_INT:
21038     case V4SI_FTYPE_V4SI_V4SI_INT:
21039     case V4SF_FTYPE_V4SF_V4SF_INT:
21040     case V2DI_FTYPE_V2DI_V2DI_INT:
21041     case V2DF_FTYPE_V2DF_V2DF_INT:
21042       nargs = 3;
21043       nargs_constant = 1;
21044       break;
21045     case V2DI2TI_FTYPE_V2DI_V2DI_INT:
21046       nargs = 3;
21047       rmode = V2DImode;
21048       nargs_constant = 1;
21049       break;
21050     case V1DI2DI_FTYPE_V1DI_V1DI_INT:
21051       nargs = 3;
21052       rmode = DImode;
21053       nargs_constant = 1;
21054       break;
21055     case V2DI_FTYPE_V2DI_UINT_UINT:
21056       nargs = 3;
21057       nargs_constant = 2;
21058       break;
21059     case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
21060       nargs = 4;
21061       nargs_constant = 2;
21062       break;
21063     default:
21064       gcc_unreachable ();
21065     }
21066
21067   gcc_assert (nargs <= ARRAY_SIZE (args));
21068
21069   if (comparison != UNKNOWN)
21070     {
21071       gcc_assert (nargs == 2);
21072       return ix86_expand_sse_compare (d, exp, target, swap);
21073     }
21074
21075   if (rmode == VOIDmode || rmode == tmode)
21076     {
21077       if (optimize
21078           || target == 0
21079           || GET_MODE (target) != tmode
21080           || ! (*insn_p->operand[0].predicate) (target, tmode))
21081         target = gen_reg_rtx (tmode);
21082       real_target = target;
21083     }
21084   else
21085     {
21086       target = gen_reg_rtx (rmode);
21087       real_target = simplify_gen_subreg (tmode, target, rmode, 0);
21088     }
21089
21090   for (i = 0; i < nargs; i++)
21091     {
21092       tree arg = CALL_EXPR_ARG (exp, i);
21093       rtx op = expand_normal (arg);
21094       enum machine_mode mode = insn_p->operand[i + 1].mode;
21095       bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
21096
21097       if (last_arg_count && (i + 1) == nargs)
21098         {
21099           /* SIMD shift insns take either an 8-bit immediate or
21100              register as count.  But builtin functions take int as
21101              count.  If count doesn't match, we put it in register.  */
21102           if (!match)
21103             {
21104               op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
21105               if (!(*insn_p->operand[i + 1].predicate) (op, mode))
21106                 op = copy_to_reg (op);
21107             }
21108         }
21109       else if ((nargs - i) <= nargs_constant)
21110         {
21111           if (!match)
21112             switch (icode)
21113               {
21114               case CODE_FOR_sse4_1_roundpd:
21115               case CODE_FOR_sse4_1_roundps:
21116               case CODE_FOR_sse4_1_roundsd:
21117               case CODE_FOR_sse4_1_roundss:
21118               case CODE_FOR_sse4_1_blendps:
21119                 error ("the last argument must be a 4-bit immediate");
21120                 return const0_rtx;
21121
21122               case CODE_FOR_sse4_1_blendpd:
21123                 error ("the last argument must be a 2-bit immediate");
21124                 return const0_rtx;
21125
21126              default:
21127                 switch (nargs_constant)
21128                   {
21129                   case 2:
21130                     if ((nargs - i) == nargs_constant)
21131                       {
21132                         error ("the next to last argument must be an 8-bit immediate");
21133                         break;
21134                       }
21135                   case 1:
21136                     error ("the last argument must be an 8-bit immediate");
21137                     break;
21138                   default:
21139                     gcc_unreachable ();
21140                   }
21141                 return const0_rtx;
21142               }
21143         }
21144       else
21145         {
21146           if (VECTOR_MODE_P (mode))
21147             op = safe_vector_operand (op, mode);
21148
21149           /* If we aren't optimizing, only allow one memory operand to
21150              be generated.  */
21151           if (memory_operand (op, mode))
21152             num_memory++;
21153
21154           if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
21155             {
21156               if (optimize || !match || num_memory > 1)
21157                 op = copy_to_mode_reg (mode, op);
21158             }
21159           else
21160             {
21161               op = copy_to_reg (op);
21162               op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
21163             }
21164         }
21165
21166       args[i].op = op;
21167       args[i].mode = mode;
21168     }
21169
21170   switch (nargs)
21171     {
21172     case 1:
21173       pat = GEN_FCN (icode) (real_target, args[0].op);
21174       break;
21175     case 2:
21176       pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
21177       break;
21178     case 3:
21179       pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
21180                              args[2].op);
21181       break;
21182     case 4:
21183       pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
21184                              args[2].op, args[3].op);
21185       break;
21186     default:
21187       gcc_unreachable ();
21188     }
21189
21190   if (! pat)
21191     return 0;
21192
21193   emit_insn (pat);
21194   return target;
21195 }
21196
21197 /* Subroutine of ix86_expand_builtin to take care of special insns
21198    with variable number of operands.  */
21199
21200 static rtx
21201 ix86_expand_special_args_builtin (const struct builtin_description *d,
21202                                     tree exp, rtx target)
21203 {
21204   tree arg;
21205   rtx pat, op;
21206   unsigned int i, nargs, arg_adjust, memory;
21207   struct
21208     {
21209       rtx op;
21210       enum machine_mode mode;
21211     } args[2];
21212   enum insn_code icode = d->icode;
21213   bool last_arg_constant = false;
21214   const struct insn_data *insn_p = &insn_data[icode];
21215   enum machine_mode tmode = insn_p->operand[0].mode;
21216   enum { load, store } class;
21217
21218   switch ((enum ix86_special_builtin_type) d->flag)
21219     {
21220     case VOID_FTYPE_VOID:
21221       emit_insn (GEN_FCN (icode) (target));
21222       return 0;
21223     case V2DI_FTYPE_PV2DI:
21224     case V16QI_FTYPE_PCCHAR:
21225     case V4SF_FTYPE_PCFLOAT:
21226     case V2DF_FTYPE_PCDOUBLE:
21227       nargs = 1;
21228       class = load;
21229       memory = 0;
21230       break;
21231     case VOID_FTYPE_PV2SF_V4SF:
21232     case VOID_FTYPE_PV2DI_V2DI:
21233     case VOID_FTYPE_PCHAR_V16QI:
21234     case VOID_FTYPE_PFLOAT_V4SF:
21235     case VOID_FTYPE_PDOUBLE_V2DF:
21236     case VOID_FTYPE_PDI_DI:
21237     case VOID_FTYPE_PINT_INT:
21238       nargs = 1;
21239       class = store;
21240       /* Reserve memory operand for target.  */
21241       memory = ARRAY_SIZE (args);
21242       break;
21243     case V4SF_FTYPE_V4SF_PCV2SF:
21244     case V2DF_FTYPE_V2DF_PCDOUBLE:
21245       nargs = 2;
21246       class = load;
21247       memory = 1;
21248       break;
21249     default:
21250       gcc_unreachable ();
21251     }
21252
21253   gcc_assert (nargs <= ARRAY_SIZE (args));
21254
21255   if (class == store)
21256     {
21257       arg = CALL_EXPR_ARG (exp, 0);
21258       op = expand_normal (arg);
21259       gcc_assert (target == 0);
21260       target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
21261       arg_adjust = 1;
21262     }
21263   else
21264     {
21265       arg_adjust = 0;
21266       if (optimize
21267           || target == 0
21268           || GET_MODE (target) != tmode
21269           || ! (*insn_p->operand[0].predicate) (target, tmode))
21270         target = gen_reg_rtx (tmode);
21271     }
21272
21273   for (i = 0; i < nargs; i++)
21274     {
21275       enum machine_mode mode = insn_p->operand[i + 1].mode;
21276       bool match;
21277
21278       arg = CALL_EXPR_ARG (exp, i + arg_adjust);
21279       op = expand_normal (arg);
21280       match = (*insn_p->operand[i + 1].predicate) (op, mode);
21281
21282       if (last_arg_constant && (i + 1) == nargs)
21283         {
21284           if (!match)
21285             switch (icode)
21286               {
21287              default:
21288                 error ("the last argument must be an 8-bit immediate");
21289                 return const0_rtx;
21290               }
21291         }
21292       else
21293         {
21294           if (i == memory)
21295             {
21296               /* This must be the memory operand.  */
21297               op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
21298               gcc_assert (GET_MODE (op) == mode
21299                           || GET_MODE (op) == VOIDmode);
21300             }
21301           else
21302             {
21303               /* This must be register.  */
21304               if (VECTOR_MODE_P (mode))
21305                 op = safe_vector_operand (op, mode);
21306
21307               gcc_assert (GET_MODE (op) == mode
21308                           || GET_MODE (op) == VOIDmode);
21309               op = copy_to_mode_reg (mode, op);
21310             }
21311         }
21312
21313       args[i].op = op;
21314       args[i].mode = mode;
21315     }
21316
21317   switch (nargs)
21318     {
21319     case 1:
21320       pat = GEN_FCN (icode) (target, args[0].op);
21321       break;
21322     case 2:
21323       pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
21324       break;
21325     default:
21326       gcc_unreachable ();
21327     }
21328
21329   if (! pat)
21330     return 0;
21331   emit_insn (pat);
21332   return class == store ? 0 : target;
21333 }
21334
21335 /* Return the integer constant in ARG.  Constrain it to be in the range
21336    of the subparts of VEC_TYPE; issue an error if not.  */
21337
21338 static int
21339 get_element_number (tree vec_type, tree arg)
21340 {
21341   unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
21342
21343   if (!host_integerp (arg, 1)
21344       || (elt = tree_low_cst (arg, 1), elt > max))
21345     {
21346       error ("selector must be an integer constant in the range 0..%wi", max);
21347       return 0;
21348     }
21349
21350   return elt;
21351 }
21352
21353 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
21354    ix86_expand_vector_init.  We DO have language-level syntax for this, in
21355    the form of  (type){ init-list }.  Except that since we can't place emms
21356    instructions from inside the compiler, we can't allow the use of MMX
21357    registers unless the user explicitly asks for it.  So we do *not* define
21358    vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md.  Instead
21359    we have builtins invoked by mmintrin.h that gives us license to emit
21360    these sorts of instructions.  */
21361
21362 static rtx
21363 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
21364 {
21365   enum machine_mode tmode = TYPE_MODE (type);
21366   enum machine_mode inner_mode = GET_MODE_INNER (tmode);
21367   int i, n_elt = GET_MODE_NUNITS (tmode);
21368   rtvec v = rtvec_alloc (n_elt);
21369
21370   gcc_assert (VECTOR_MODE_P (tmode));
21371   gcc_assert (call_expr_nargs (exp) == n_elt);
21372
21373   for (i = 0; i < n_elt; ++i)
21374     {
21375       rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
21376       RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
21377     }
21378
21379   if (!target || !register_operand (target, tmode))
21380     target = gen_reg_rtx (tmode);
21381
21382   ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
21383   return target;
21384 }
21385
21386 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
21387    ix86_expand_vector_extract.  They would be redundant (for non-MMX) if we
21388    had a language-level syntax for referencing vector elements.  */
21389
21390 static rtx
21391 ix86_expand_vec_ext_builtin (tree exp, rtx target)
21392 {
21393   enum machine_mode tmode, mode0;
21394   tree arg0, arg1;
21395   int elt;
21396   rtx op0;
21397
21398   arg0 = CALL_EXPR_ARG (exp, 0);
21399   arg1 = CALL_EXPR_ARG (exp, 1);
21400
21401   op0 = expand_normal (arg0);
21402   elt = get_element_number (TREE_TYPE (arg0), arg1);
21403
21404   tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
21405   mode0 = TYPE_MODE (TREE_TYPE (arg0));
21406   gcc_assert (VECTOR_MODE_P (mode0));
21407
21408   op0 = force_reg (mode0, op0);
21409
21410   if (optimize || !target || !register_operand (target, tmode))
21411     target = gen_reg_rtx (tmode);
21412
21413   ix86_expand_vector_extract (true, target, op0, elt);
21414
21415   return target;
21416 }
21417
21418 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
21419    ix86_expand_vector_set.  They would be redundant (for non-MMX) if we had
21420    a language-level syntax for referencing vector elements.  */
21421
21422 static rtx
21423 ix86_expand_vec_set_builtin (tree exp)
21424 {
21425   enum machine_mode tmode, mode1;
21426   tree arg0, arg1, arg2;
21427   int elt;
21428   rtx op0, op1, target;
21429
21430   arg0 = CALL_EXPR_ARG (exp, 0);
21431   arg1 = CALL_EXPR_ARG (exp, 1);
21432   arg2 = CALL_EXPR_ARG (exp, 2);
21433
21434   tmode = TYPE_MODE (TREE_TYPE (arg0));
21435   mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
21436   gcc_assert (VECTOR_MODE_P (tmode));
21437
21438   op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
21439   op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
21440   elt = get_element_number (TREE_TYPE (arg0), arg2);
21441
21442   if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
21443     op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
21444
21445   op0 = force_reg (tmode, op0);
21446   op1 = force_reg (mode1, op1);
21447
21448   /* OP0 is the source of these builtin functions and shouldn't be
21449      modified.  Create a copy, use it and return it as target.  */
21450   target = gen_reg_rtx (tmode);
21451   emit_move_insn (target, op0);
21452   ix86_expand_vector_set (true, target, op1, elt);
21453
21454   return target;
21455 }
21456
21457 /* Expand an expression EXP that calls a built-in function,
21458    with result going to TARGET if that's convenient
21459    (and in mode MODE if that's convenient).
21460    SUBTARGET may be used as the target for computing one of EXP's operands.
21461    IGNORE is nonzero if the value is to be ignored.  */
21462
21463 static rtx
21464 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
21465                      enum machine_mode mode ATTRIBUTE_UNUSED,
21466                      int ignore ATTRIBUTE_UNUSED)
21467 {
21468   const struct builtin_description *d;
21469   size_t i;
21470   enum insn_code icode;
21471   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
21472   tree arg0, arg1, arg2;
21473   rtx op0, op1, op2, pat;
21474   enum machine_mode mode0, mode1, mode2;
21475   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
21476
21477   switch (fcode)
21478     {
21479     case IX86_BUILTIN_MASKMOVQ:
21480     case IX86_BUILTIN_MASKMOVDQU:
21481       icode = (fcode == IX86_BUILTIN_MASKMOVQ
21482                ? CODE_FOR_mmx_maskmovq
21483                : CODE_FOR_sse2_maskmovdqu);
21484       /* Note the arg order is different from the operand order.  */
21485       arg1 = CALL_EXPR_ARG (exp, 0);
21486       arg2 = CALL_EXPR_ARG (exp, 1);
21487       arg0 = CALL_EXPR_ARG (exp, 2);
21488       op0 = expand_normal (arg0);
21489       op1 = expand_normal (arg1);
21490       op2 = expand_normal (arg2);
21491       mode0 = insn_data[icode].operand[0].mode;
21492       mode1 = insn_data[icode].operand[1].mode;
21493       mode2 = insn_data[icode].operand[2].mode;
21494
21495       op0 = force_reg (Pmode, op0);
21496       op0 = gen_rtx_MEM (mode1, op0);
21497
21498       if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
21499         op0 = copy_to_mode_reg (mode0, op0);
21500       if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
21501         op1 = copy_to_mode_reg (mode1, op1);
21502       if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
21503         op2 = copy_to_mode_reg (mode2, op2);
21504       pat = GEN_FCN (icode) (op0, op1, op2);
21505       if (! pat)
21506         return 0;
21507       emit_insn (pat);
21508       return 0;
21509
21510     case IX86_BUILTIN_LDMXCSR:
21511       op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
21512       target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
21513       emit_move_insn (target, op0);
21514       emit_insn (gen_sse_ldmxcsr (target));
21515       return 0;
21516
21517     case IX86_BUILTIN_STMXCSR:
21518       target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
21519       emit_insn (gen_sse_stmxcsr (target));
21520       return copy_to_mode_reg (SImode, target);
21521
21522     case IX86_BUILTIN_CLFLUSH:
21523         arg0 = CALL_EXPR_ARG (exp, 0);
21524         op0 = expand_normal (arg0);
21525         icode = CODE_FOR_sse2_clflush;
21526         if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
21527             op0 = copy_to_mode_reg (Pmode, op0);
21528
21529         emit_insn (gen_sse2_clflush (op0));
21530         return 0;
21531
21532     case IX86_BUILTIN_MONITOR:
21533       arg0 = CALL_EXPR_ARG (exp, 0);
21534       arg1 = CALL_EXPR_ARG (exp, 1);
21535       arg2 = CALL_EXPR_ARG (exp, 2);
21536       op0 = expand_normal (arg0);
21537       op1 = expand_normal (arg1);
21538       op2 = expand_normal (arg2);
21539       if (!REG_P (op0))
21540         op0 = copy_to_mode_reg (Pmode, op0);
21541       if (!REG_P (op1))
21542         op1 = copy_to_mode_reg (SImode, op1);
21543       if (!REG_P (op2))
21544         op2 = copy_to_mode_reg (SImode, op2);
21545       if (!TARGET_64BIT)
21546         emit_insn (gen_sse3_monitor (op0, op1, op2));
21547       else
21548         emit_insn (gen_sse3_monitor64 (op0, op1, op2));
21549       return 0;
21550
21551     case IX86_BUILTIN_MWAIT:
21552       arg0 = CALL_EXPR_ARG (exp, 0);
21553       arg1 = CALL_EXPR_ARG (exp, 1);
21554       op0 = expand_normal (arg0);
21555       op1 = expand_normal (arg1);
21556       if (!REG_P (op0))
21557         op0 = copy_to_mode_reg (SImode, op0);
21558       if (!REG_P (op1))
21559         op1 = copy_to_mode_reg (SImode, op1);
21560       emit_insn (gen_sse3_mwait (op0, op1));
21561       return 0;
21562
21563     case IX86_BUILTIN_VEC_INIT_V2SI:
21564     case IX86_BUILTIN_VEC_INIT_V4HI:
21565     case IX86_BUILTIN_VEC_INIT_V8QI:
21566       return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
21567
21568     case IX86_BUILTIN_VEC_EXT_V2DF:
21569     case IX86_BUILTIN_VEC_EXT_V2DI:
21570     case IX86_BUILTIN_VEC_EXT_V4SF:
21571     case IX86_BUILTIN_VEC_EXT_V4SI:
21572     case IX86_BUILTIN_VEC_EXT_V8HI:
21573     case IX86_BUILTIN_VEC_EXT_V2SI:
21574     case IX86_BUILTIN_VEC_EXT_V4HI:
21575     case IX86_BUILTIN_VEC_EXT_V16QI:
21576       return ix86_expand_vec_ext_builtin (exp, target);
21577
21578     case IX86_BUILTIN_VEC_SET_V2DI:
21579     case IX86_BUILTIN_VEC_SET_V4SF:
21580     case IX86_BUILTIN_VEC_SET_V4SI:
21581     case IX86_BUILTIN_VEC_SET_V8HI:
21582     case IX86_BUILTIN_VEC_SET_V4HI:
21583     case IX86_BUILTIN_VEC_SET_V16QI:
21584       return ix86_expand_vec_set_builtin (exp);
21585
21586     case IX86_BUILTIN_INFQ:
21587       {
21588         REAL_VALUE_TYPE inf;
21589         rtx tmp;
21590
21591         real_inf (&inf);
21592         tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
21593
21594         tmp = validize_mem (force_const_mem (mode, tmp));
21595
21596         if (target == 0)
21597           target = gen_reg_rtx (mode);
21598
21599         emit_move_insn (target, tmp);
21600         return target;
21601       }
21602
21603     default:
21604       break;
21605     }
21606
21607   for (i = 0, d = bdesc_special_args;
21608        i < ARRAY_SIZE (bdesc_special_args);
21609        i++, d++)
21610     if (d->code == fcode)
21611       return ix86_expand_special_args_builtin (d, exp, target);
21612
21613   for (i = 0, d = bdesc_args;
21614        i < ARRAY_SIZE (bdesc_args);
21615        i++, d++)
21616     if (d->code == fcode)
21617       return ix86_expand_args_builtin (d, exp, target);
21618
21619   for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
21620     if (d->code == fcode)
21621       return ix86_expand_sse_comi (d, exp, target);
21622
21623   for (i = 0, d = bdesc_pcmpestr;
21624        i < ARRAY_SIZE (bdesc_pcmpestr);
21625        i++, d++)
21626     if (d->code == fcode)
21627       return ix86_expand_sse_pcmpestr (d, exp, target);
21628
21629   for (i = 0, d = bdesc_pcmpistr;
21630        i < ARRAY_SIZE (bdesc_pcmpistr);
21631        i++, d++)
21632     if (d->code == fcode)
21633       return ix86_expand_sse_pcmpistr (d, exp, target);
21634
21635   for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21636     if (d->code == fcode)
21637       return ix86_expand_multi_arg_builtin (d->icode, exp, target,
21638                                             (enum multi_arg_type)d->flag,
21639                                             d->comparison);
21640
21641   gcc_unreachable ();
21642 }
21643
21644 /* Returns a function decl for a vectorized version of the builtin function
21645    with builtin function code FN and the result vector type TYPE, or NULL_TREE
21646    if it is not available.  */
21647
21648 static tree
21649 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
21650                                   tree type_in)
21651 {
21652   enum machine_mode in_mode, out_mode;
21653   int in_n, out_n;
21654
21655   if (TREE_CODE (type_out) != VECTOR_TYPE
21656       || TREE_CODE (type_in) != VECTOR_TYPE)
21657     return NULL_TREE;
21658
21659   out_mode = TYPE_MODE (TREE_TYPE (type_out));
21660   out_n = TYPE_VECTOR_SUBPARTS (type_out);
21661   in_mode = TYPE_MODE (TREE_TYPE (type_in));
21662   in_n = TYPE_VECTOR_SUBPARTS (type_in);
21663
21664   switch (fn)
21665     {
21666     case BUILT_IN_SQRT:
21667       if (out_mode == DFmode && out_n == 2
21668           && in_mode == DFmode && in_n == 2)
21669         return ix86_builtins[IX86_BUILTIN_SQRTPD];
21670       break;
21671
21672     case BUILT_IN_SQRTF:
21673       if (out_mode == SFmode && out_n == 4
21674           && in_mode == SFmode && in_n == 4)
21675         return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
21676       break;
21677
21678     case BUILT_IN_LRINT:
21679       if (out_mode == SImode && out_n == 4
21680           && in_mode == DFmode && in_n == 2)
21681         return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
21682       break;
21683
21684     case BUILT_IN_LRINTF:
21685       if (out_mode == SImode && out_n == 4
21686           && in_mode == SFmode && in_n == 4)
21687         return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
21688       break;
21689
21690     default:
21691       ;
21692     }
21693
21694   /* Dispatch to a handler for a vectorization library.  */
21695   if (ix86_veclib_handler)
21696     return (*ix86_veclib_handler)(fn, type_out, type_in);
21697
21698   return NULL_TREE;
21699 }
21700
21701 /* Handler for an SVML-style interface to
21702    a library with vectorized intrinsics.  */
21703
21704 static tree
21705 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
21706 {
21707   char name[20];
21708   tree fntype, new_fndecl, args;
21709   unsigned arity;
21710   const char *bname;
21711   enum machine_mode el_mode, in_mode;
21712   int n, in_n;
21713
21714   /* The SVML is suitable for unsafe math only.  */
21715   if (!flag_unsafe_math_optimizations)
21716     return NULL_TREE;
21717
21718   el_mode = TYPE_MODE (TREE_TYPE (type_out));
21719   n = TYPE_VECTOR_SUBPARTS (type_out);
21720   in_mode = TYPE_MODE (TREE_TYPE (type_in));
21721   in_n = TYPE_VECTOR_SUBPARTS (type_in);
21722   if (el_mode != in_mode
21723       || n != in_n)
21724     return NULL_TREE;
21725
21726   switch (fn)
21727     {
21728     case BUILT_IN_EXP:
21729     case BUILT_IN_LOG:
21730     case BUILT_IN_LOG10:
21731     case BUILT_IN_POW:
21732     case BUILT_IN_TANH:
21733     case BUILT_IN_TAN:
21734     case BUILT_IN_ATAN:
21735     case BUILT_IN_ATAN2:
21736     case BUILT_IN_ATANH:
21737     case BUILT_IN_CBRT:
21738     case BUILT_IN_SINH:
21739     case BUILT_IN_SIN:
21740     case BUILT_IN_ASINH:
21741     case BUILT_IN_ASIN:
21742     case BUILT_IN_COSH:
21743     case BUILT_IN_COS:
21744     case BUILT_IN_ACOSH:
21745     case BUILT_IN_ACOS:
21746       if (el_mode != DFmode || n != 2)
21747         return NULL_TREE;
21748       break;
21749
21750     case BUILT_IN_EXPF:
21751     case BUILT_IN_LOGF:
21752     case BUILT_IN_LOG10F:
21753     case BUILT_IN_POWF:
21754     case BUILT_IN_TANHF:
21755     case BUILT_IN_TANF:
21756     case BUILT_IN_ATANF:
21757     case BUILT_IN_ATAN2F:
21758     case BUILT_IN_ATANHF:
21759     case BUILT_IN_CBRTF:
21760     case BUILT_IN_SINHF:
21761     case BUILT_IN_SINF:
21762     case BUILT_IN_ASINHF:
21763     case BUILT_IN_ASINF:
21764     case BUILT_IN_COSHF:
21765     case BUILT_IN_COSF:
21766     case BUILT_IN_ACOSHF:
21767     case BUILT_IN_ACOSF:
21768       if (el_mode != SFmode || n != 4)
21769         return NULL_TREE;
21770       break;
21771
21772     default:
21773       return NULL_TREE;
21774     }
21775
21776   bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21777
21778   if (fn == BUILT_IN_LOGF)
21779     strcpy (name, "vmlsLn4");
21780   else if (fn == BUILT_IN_LOG)
21781     strcpy (name, "vmldLn2");
21782   else if (n == 4)
21783     {
21784       sprintf (name, "vmls%s", bname+10);
21785       name[strlen (name)-1] = '4';
21786     }
21787   else
21788     sprintf (name, "vmld%s2", bname+10);
21789
21790   /* Convert to uppercase. */
21791   name[4] &= ~0x20;
21792
21793   arity = 0;
21794   for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21795        args = TREE_CHAIN (args))
21796     arity++;
21797
21798   if (arity == 1)
21799     fntype = build_function_type_list (type_out, type_in, NULL);
21800   else
21801     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21802
21803   /* Build a function declaration for the vectorized function.  */
21804   new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21805   TREE_PUBLIC (new_fndecl) = 1;
21806   DECL_EXTERNAL (new_fndecl) = 1;
21807   DECL_IS_NOVOPS (new_fndecl) = 1;
21808   TREE_READONLY (new_fndecl) = 1;
21809
21810   return new_fndecl;
21811 }
21812
21813 /* Handler for an ACML-style interface to
21814    a library with vectorized intrinsics.  */
21815
21816 static tree
21817 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
21818 {
21819   char name[20] = "__vr.._";
21820   tree fntype, new_fndecl, args;
21821   unsigned arity;
21822   const char *bname;
21823   enum machine_mode el_mode, in_mode;
21824   int n, in_n;
21825
21826   /* The ACML is 64bits only and suitable for unsafe math only as
21827      it does not correctly support parts of IEEE with the required
21828      precision such as denormals.  */
21829   if (!TARGET_64BIT
21830       || !flag_unsafe_math_optimizations)
21831     return NULL_TREE;
21832
21833   el_mode = TYPE_MODE (TREE_TYPE (type_out));
21834   n = TYPE_VECTOR_SUBPARTS (type_out);
21835   in_mode = TYPE_MODE (TREE_TYPE (type_in));
21836   in_n = TYPE_VECTOR_SUBPARTS (type_in);
21837   if (el_mode != in_mode
21838       || n != in_n)
21839     return NULL_TREE;
21840
21841   switch (fn)
21842     {
21843     case BUILT_IN_SIN:
21844     case BUILT_IN_COS:
21845     case BUILT_IN_EXP:
21846     case BUILT_IN_LOG:
21847     case BUILT_IN_LOG2:
21848     case BUILT_IN_LOG10:
21849       name[4] = 'd';
21850       name[5] = '2';
21851       if (el_mode != DFmode
21852           || n != 2)
21853         return NULL_TREE;
21854       break;
21855
21856     case BUILT_IN_SINF:
21857     case BUILT_IN_COSF:
21858     case BUILT_IN_EXPF:
21859     case BUILT_IN_POWF:
21860     case BUILT_IN_LOGF:
21861     case BUILT_IN_LOG2F:
21862     case BUILT_IN_LOG10F:
21863       name[4] = 's';
21864       name[5] = '4';
21865       if (el_mode != SFmode
21866           || n != 4)
21867         return NULL_TREE;
21868       break;
21869
21870     default:
21871       return NULL_TREE;
21872     }
21873
21874   bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21875   sprintf (name + 7, "%s", bname+10);
21876
21877   arity = 0;
21878   for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21879        args = TREE_CHAIN (args))
21880     arity++;
21881
21882   if (arity == 1)
21883     fntype = build_function_type_list (type_out, type_in, NULL);
21884   else
21885     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21886
21887   /* Build a function declaration for the vectorized function.  */
21888   new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21889   TREE_PUBLIC (new_fndecl) = 1;
21890   DECL_EXTERNAL (new_fndecl) = 1;
21891   DECL_IS_NOVOPS (new_fndecl) = 1;
21892   TREE_READONLY (new_fndecl) = 1;
21893
21894   return new_fndecl;
21895 }
21896
21897
21898 /* Returns a decl of a function that implements conversion of the
21899    input vector of type TYPE, or NULL_TREE if it is not available.  */
21900
21901 static tree
21902 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
21903 {
21904   if (TREE_CODE (type) != VECTOR_TYPE)
21905     return NULL_TREE;
21906
21907   switch (code)
21908     {
21909     case FLOAT_EXPR:
21910       switch (TYPE_MODE (type))
21911         {
21912         case V4SImode:
21913           return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
21914         default:
21915           return NULL_TREE;
21916         }
21917
21918     case FIX_TRUNC_EXPR:
21919       switch (TYPE_MODE (type))
21920         {
21921         case V4SFmode:
21922           return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
21923         default:
21924           return NULL_TREE;
21925         }
21926     default:
21927       return NULL_TREE;
21928
21929     }
21930 }
21931
21932 /* Returns a code for a target-specific builtin that implements
21933    reciprocal of the function, or NULL_TREE if not available.  */
21934
21935 static tree
21936 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
21937                          bool sqrt ATTRIBUTE_UNUSED)
21938 {
21939   if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
21940          && flag_finite_math_only && !flag_trapping_math
21941          && flag_unsafe_math_optimizations))
21942     return NULL_TREE;
21943
21944   if (md_fn)
21945     /* Machine dependent builtins.  */
21946     switch (fn)
21947       {
21948         /* Vectorized version of sqrt to rsqrt conversion.  */
21949       case IX86_BUILTIN_SQRTPS_NR:
21950         return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
21951
21952       default:
21953         return NULL_TREE;
21954       }
21955   else
21956     /* Normal builtins.  */
21957     switch (fn)
21958       {
21959         /* Sqrt to rsqrt conversion.  */
21960       case BUILT_IN_SQRTF:
21961         return ix86_builtins[IX86_BUILTIN_RSQRTF];
21962
21963       default:
21964         return NULL_TREE;
21965       }
21966 }
21967
21968 /* Store OPERAND to the memory after reload is completed.  This means
21969    that we can't easily use assign_stack_local.  */
21970 rtx
21971 ix86_force_to_memory (enum machine_mode mode, rtx operand)
21972 {
21973   rtx result;
21974
21975   gcc_assert (reload_completed);
21976   if (TARGET_RED_ZONE)
21977     {
21978       result = gen_rtx_MEM (mode,
21979                             gen_rtx_PLUS (Pmode,
21980                                           stack_pointer_rtx,
21981                                           GEN_INT (-RED_ZONE_SIZE)));
21982       emit_move_insn (result, operand);
21983     }
21984   else if (!TARGET_RED_ZONE && TARGET_64BIT)
21985     {
21986       switch (mode)
21987         {
21988         case HImode:
21989         case SImode:
21990           operand = gen_lowpart (DImode, operand);
21991           /* FALLTHRU */
21992         case DImode:
21993           emit_insn (
21994                       gen_rtx_SET (VOIDmode,
21995                                    gen_rtx_MEM (DImode,
21996                                                 gen_rtx_PRE_DEC (DImode,
21997                                                         stack_pointer_rtx)),
21998                                    operand));
21999           break;
22000         default:
22001           gcc_unreachable ();
22002         }
22003       result = gen_rtx_MEM (mode, stack_pointer_rtx);
22004     }
22005   else
22006     {
22007       switch (mode)
22008         {
22009         case DImode:
22010           {
22011             rtx operands[2];
22012             split_di (&operand, 1, operands, operands + 1);
22013             emit_insn (
22014                         gen_rtx_SET (VOIDmode,
22015                                      gen_rtx_MEM (SImode,
22016                                                   gen_rtx_PRE_DEC (Pmode,
22017                                                         stack_pointer_rtx)),
22018                                      operands[1]));
22019             emit_insn (
22020                         gen_rtx_SET (VOIDmode,
22021                                      gen_rtx_MEM (SImode,
22022                                                   gen_rtx_PRE_DEC (Pmode,
22023                                                         stack_pointer_rtx)),
22024                                      operands[0]));
22025           }
22026           break;
22027         case HImode:
22028           /* Store HImodes as SImodes.  */
22029           operand = gen_lowpart (SImode, operand);
22030           /* FALLTHRU */
22031         case SImode:
22032           emit_insn (
22033                       gen_rtx_SET (VOIDmode,
22034                                    gen_rtx_MEM (GET_MODE (operand),
22035                                                 gen_rtx_PRE_DEC (SImode,
22036                                                         stack_pointer_rtx)),
22037                                    operand));
22038           break;
22039         default:
22040           gcc_unreachable ();
22041         }
22042       result = gen_rtx_MEM (mode, stack_pointer_rtx);
22043     }
22044   return result;
22045 }
22046
22047 /* Free operand from the memory.  */
22048 void
22049 ix86_free_from_memory (enum machine_mode mode)
22050 {
22051   if (!TARGET_RED_ZONE)
22052     {
22053       int size;
22054
22055       if (mode == DImode || TARGET_64BIT)
22056         size = 8;
22057       else
22058         size = 4;
22059       /* Use LEA to deallocate stack space.  In peephole2 it will be converted
22060          to pop or add instruction if registers are available.  */
22061       emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22062                               gen_rtx_PLUS (Pmode, stack_pointer_rtx,
22063                                             GEN_INT (size))));
22064     }
22065 }
22066
22067 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
22068    QImode must go into class Q_REGS.
22069    Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
22070    movdf to do mem-to-mem moves through integer regs.  */
22071 enum reg_class
22072 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
22073 {
22074   enum machine_mode mode = GET_MODE (x);
22075
22076   /* We're only allowed to return a subclass of CLASS.  Many of the
22077      following checks fail for NO_REGS, so eliminate that early.  */
22078   if (regclass == NO_REGS)
22079     return NO_REGS;
22080
22081   /* All classes can load zeros.  */
22082   if (x == CONST0_RTX (mode))
22083     return regclass;
22084
22085   /* Force constants into memory if we are loading a (nonzero) constant into
22086      an MMX or SSE register.  This is because there are no MMX/SSE instructions
22087      to load from a constant.  */
22088   if (CONSTANT_P (x)
22089       && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
22090     return NO_REGS;
22091
22092   /* Prefer SSE regs only, if we can use them for math.  */
22093   if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
22094     return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
22095
22096   /* Floating-point constants need more complex checks.  */
22097   if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
22098     {
22099       /* General regs can load everything.  */
22100       if (reg_class_subset_p (regclass, GENERAL_REGS))
22101         return regclass;
22102
22103       /* Floats can load 0 and 1 plus some others.  Note that we eliminated
22104          zero above.  We only want to wind up preferring 80387 registers if
22105          we plan on doing computation with them.  */
22106       if (TARGET_80387
22107           && standard_80387_constant_p (x))
22108         {
22109           /* Limit class to non-sse.  */
22110           if (regclass == FLOAT_SSE_REGS)
22111             return FLOAT_REGS;
22112           if (regclass == FP_TOP_SSE_REGS)
22113             return FP_TOP_REG;
22114           if (regclass == FP_SECOND_SSE_REGS)
22115             return FP_SECOND_REG;
22116           if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
22117             return regclass;
22118         }
22119
22120       return NO_REGS;
22121     }
22122
22123   /* Generally when we see PLUS here, it's the function invariant
22124      (plus soft-fp const_int).  Which can only be computed into general
22125      regs.  */
22126   if (GET_CODE (x) == PLUS)
22127     return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
22128
22129   /* QImode constants are easy to load, but non-constant QImode data
22130      must go into Q_REGS.  */
22131   if (GET_MODE (x) == QImode && !CONSTANT_P (x))
22132     {
22133       if (reg_class_subset_p (regclass, Q_REGS))
22134         return regclass;
22135       if (reg_class_subset_p (Q_REGS, regclass))
22136         return Q_REGS;
22137       return NO_REGS;
22138     }
22139
22140   return regclass;
22141 }
22142
22143 /* Discourage putting floating-point values in SSE registers unless
22144    SSE math is being used, and likewise for the 387 registers.  */
22145 enum reg_class
22146 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
22147 {
22148   enum machine_mode mode = GET_MODE (x);
22149
22150   /* Restrict the output reload class to the register bank that we are doing
22151      math on.  If we would like not to return a subset of CLASS, reject this
22152      alternative: if reload cannot do this, it will still use its choice.  */
22153   mode = GET_MODE (x);
22154   if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
22155     return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
22156
22157   if (X87_FLOAT_MODE_P (mode))
22158     {
22159       if (regclass == FP_TOP_SSE_REGS)
22160         return FP_TOP_REG;
22161       else if (regclass == FP_SECOND_SSE_REGS)
22162         return FP_SECOND_REG;
22163       else
22164         return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
22165     }
22166
22167   return regclass;
22168 }
22169
22170 static enum reg_class
22171 ix86_secondary_reload (bool in_p, rtx x, enum reg_class class,
22172                        enum machine_mode mode,
22173                        secondary_reload_info *sri ATTRIBUTE_UNUSED)
22174 {
22175   /* QImode spills from non-QI registers require
22176      intermediate register on 32bit targets.  */
22177   if (!in_p && mode == QImode && !TARGET_64BIT
22178       && (class == GENERAL_REGS
22179           || class == LEGACY_REGS
22180           || class == INDEX_REGS))
22181     {
22182       int regno;
22183
22184       if (REG_P (x))
22185         regno = REGNO (x);
22186       else
22187         regno = -1;
22188
22189       if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
22190         regno = true_regnum (x);
22191
22192       /* Return Q_REGS if the operand is in memory.  */
22193       if (regno == -1)
22194         return Q_REGS;
22195     }
22196
22197   return NO_REGS;
22198 }
22199
22200 /* If we are copying between general and FP registers, we need a memory
22201    location. The same is true for SSE and MMX registers.
22202
22203    To optimize register_move_cost performance, allow inline variant.
22204
22205    The macro can't work reliably when one of the CLASSES is class containing
22206    registers from multiple units (SSE, MMX, integer).  We avoid this by never
22207    combining those units in single alternative in the machine description.
22208    Ensure that this constraint holds to avoid unexpected surprises.
22209
22210    When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
22211    enforce these sanity checks.  */
22212
22213 static inline int
22214 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22215                               enum machine_mode mode, int strict)
22216 {
22217   if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
22218       || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
22219       || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
22220       || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
22221       || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
22222       || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
22223     {
22224       gcc_assert (!strict);
22225       return true;
22226     }
22227
22228   if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
22229     return true;
22230
22231   /* ??? This is a lie.  We do have moves between mmx/general, and for
22232      mmx/sse2.  But by saying we need secondary memory we discourage the
22233      register allocator from using the mmx registers unless needed.  */
22234   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
22235     return true;
22236
22237   if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22238     {
22239       /* SSE1 doesn't have any direct moves from other classes.  */
22240       if (!TARGET_SSE2)
22241         return true;
22242
22243       /* If the target says that inter-unit moves are more expensive
22244          than moving through memory, then don't generate them.  */
22245       if (!TARGET_INTER_UNIT_MOVES)
22246         return true;
22247
22248       /* Between SSE and general, we have moves no larger than word size.  */
22249       if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22250         return true;
22251     }
22252
22253   return false;
22254 }
22255
22256 int
22257 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22258                               enum machine_mode mode, int strict)
22259 {
22260   return inline_secondary_memory_needed (class1, class2, mode, strict);
22261 }
22262
22263 /* Return true if the registers in CLASS cannot represent the change from
22264    modes FROM to TO.  */
22265
22266 bool
22267 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
22268                                enum reg_class regclass)
22269 {
22270   if (from == to)
22271     return false;
22272
22273   /* x87 registers can't do subreg at all, as all values are reformatted
22274      to extended precision.  */
22275   if (MAYBE_FLOAT_CLASS_P (regclass))
22276     return true;
22277
22278   if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
22279     {
22280       /* Vector registers do not support QI or HImode loads.  If we don't
22281          disallow a change to these modes, reload will assume it's ok to
22282          drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
22283          the vec_dupv4hi pattern.  */
22284       if (GET_MODE_SIZE (from) < 4)
22285         return true;
22286
22287       /* Vector registers do not support subreg with nonzero offsets, which
22288          are otherwise valid for integer registers.  Since we can't see
22289          whether we have a nonzero offset from here, prohibit all
22290          nonparadoxical subregs changing size.  */
22291       if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
22292         return true;
22293     }
22294
22295   return false;
22296 }
22297
22298 /* Return the cost of moving data of mode M between a
22299    register and memory.  A value of 2 is the default; this cost is
22300    relative to those in `REGISTER_MOVE_COST'.
22301
22302    This function is used extensively by register_move_cost that is used to
22303    build tables at startup.  Make it inline in this case.
22304    When IN is 2, return maximum of in and out move cost.
22305
22306    If moving between registers and memory is more expensive than
22307    between two registers, you should define this macro to express the
22308    relative cost.
22309
22310    Model also increased moving costs of QImode registers in non
22311    Q_REGS classes.
22312  */
22313 static inline int
22314 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
22315                          int in)
22316 {
22317   int cost;
22318   if (FLOAT_CLASS_P (regclass))
22319     {
22320       int index;
22321       switch (mode)
22322         {
22323           case SFmode:
22324             index = 0;
22325             break;
22326           case DFmode:
22327             index = 1;
22328             break;
22329           case XFmode:
22330             index = 2;
22331             break;
22332           default:
22333             return 100;
22334         }
22335       if (in == 2)
22336         return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
22337       return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
22338     }
22339   if (SSE_CLASS_P (regclass))
22340     {
22341       int index;
22342       switch (GET_MODE_SIZE (mode))
22343         {
22344           case 4:
22345             index = 0;
22346             break;
22347           case 8:
22348             index = 1;
22349             break;
22350           case 16:
22351             index = 2;
22352             break;
22353           default:
22354             return 100;
22355         }
22356       if (in == 2)
22357         return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
22358       return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
22359     }
22360   if (MMX_CLASS_P (regclass))
22361     {
22362       int index;
22363       switch (GET_MODE_SIZE (mode))
22364         {
22365           case 4:
22366             index = 0;
22367             break;
22368           case 8:
22369             index = 1;
22370             break;
22371           default:
22372             return 100;
22373         }
22374       if (in)
22375         return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
22376       return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
22377     }
22378   switch (GET_MODE_SIZE (mode))
22379     {
22380       case 1:
22381         if (Q_CLASS_P (regclass) || TARGET_64BIT)
22382           {
22383             if (!in)
22384               return ix86_cost->int_store[0];
22385             if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
22386               cost = ix86_cost->movzbl_load;
22387             else
22388               cost = ix86_cost->int_load[0];
22389             if (in == 2)
22390               return MAX (cost, ix86_cost->int_store[0]);
22391             return cost;
22392           }
22393         else
22394           {
22395            if (in == 2)
22396              return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
22397            if (in)
22398              return ix86_cost->movzbl_load;
22399            else
22400              return ix86_cost->int_store[0] + 4;
22401           }
22402         break;
22403       case 2:
22404         if (in == 2)
22405           return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
22406         return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
22407       default:
22408         /* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
22409         if (mode == TFmode)
22410           mode = XFmode;
22411         if (in == 2)
22412           cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
22413         else if (in)
22414           cost = ix86_cost->int_load[2];
22415         else
22416           cost = ix86_cost->int_store[2];
22417         return (cost * (((int) GET_MODE_SIZE (mode)
22418                         + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
22419     }
22420 }
22421
22422 int
22423 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
22424 {
22425   return inline_memory_move_cost (mode, regclass, in);
22426 }
22427
22428
22429 /* Return the cost of moving data from a register in class CLASS1 to
22430    one in class CLASS2.
22431
22432    It is not required that the cost always equal 2 when FROM is the same as TO;
22433    on some machines it is expensive to move between registers if they are not
22434    general registers.  */
22435
22436 int
22437 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
22438                          enum reg_class class2)
22439 {
22440   /* In case we require secondary memory, compute cost of the store followed
22441      by load.  In order to avoid bad register allocation choices, we need
22442      for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
22443
22444   if (inline_secondary_memory_needed (class1, class2, mode, 0))
22445     {
22446       int cost = 1;
22447
22448       cost += inline_memory_move_cost (mode, class1, 2);
22449       cost += inline_memory_move_cost (mode, class2, 2);
22450
22451       /* In case of copying from general_purpose_register we may emit multiple
22452          stores followed by single load causing memory size mismatch stall.
22453          Count this as arbitrarily high cost of 20.  */
22454       if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
22455         cost += 20;
22456
22457       /* In the case of FP/MMX moves, the registers actually overlap, and we
22458          have to switch modes in order to treat them differently.  */
22459       if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
22460           || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
22461         cost += 20;
22462
22463       return cost;
22464     }
22465
22466   /* Moves between SSE/MMX and integer unit are expensive.  */
22467   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
22468       || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22469
22470     /* ??? By keeping returned value relatively high, we limit the number
22471        of moves between integer and MMX/SSE registers for all targets.
22472        Additionally, high value prevents problem with x86_modes_tieable_p(),
22473        where integer modes in MMX/SSE registers are not tieable
22474        because of missing QImode and HImode moves to, from or between
22475        MMX/SSE registers.  */
22476     return MAX (8, ix86_cost->mmxsse_to_integer);
22477
22478   if (MAYBE_FLOAT_CLASS_P (class1))
22479     return ix86_cost->fp_move;
22480   if (MAYBE_SSE_CLASS_P (class1))
22481     return ix86_cost->sse_move;
22482   if (MAYBE_MMX_CLASS_P (class1))
22483     return ix86_cost->mmx_move;
22484   return 2;
22485 }
22486
22487 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
22488
22489 bool
22490 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
22491 {
22492   /* Flags and only flags can only hold CCmode values.  */
22493   if (CC_REGNO_P (regno))
22494     return GET_MODE_CLASS (mode) == MODE_CC;
22495   if (GET_MODE_CLASS (mode) == MODE_CC
22496       || GET_MODE_CLASS (mode) == MODE_RANDOM
22497       || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
22498     return 0;
22499   if (FP_REGNO_P (regno))
22500     return VALID_FP_MODE_P (mode);
22501   if (SSE_REGNO_P (regno))
22502     {
22503       /* We implement the move patterns for all vector modes into and
22504          out of SSE registers, even when no operation instructions
22505          are available.  */
22506       return (VALID_SSE_REG_MODE (mode)
22507               || VALID_SSE2_REG_MODE (mode)
22508               || VALID_MMX_REG_MODE (mode)
22509               || VALID_MMX_REG_MODE_3DNOW (mode));
22510     }
22511   if (MMX_REGNO_P (regno))
22512     {
22513       /* We implement the move patterns for 3DNOW modes even in MMX mode,
22514          so if the register is available at all, then we can move data of
22515          the given mode into or out of it.  */
22516       return (VALID_MMX_REG_MODE (mode)
22517               || VALID_MMX_REG_MODE_3DNOW (mode));
22518     }
22519
22520   if (mode == QImode)
22521     {
22522       /* Take care for QImode values - they can be in non-QI regs,
22523          but then they do cause partial register stalls.  */
22524       if (regno < 4 || TARGET_64BIT)
22525         return 1;
22526       if (!TARGET_PARTIAL_REG_STALL)
22527         return 1;
22528       return reload_in_progress || reload_completed;
22529     }
22530   /* We handle both integer and floats in the general purpose registers.  */
22531   else if (VALID_INT_MODE_P (mode))
22532     return 1;
22533   else if (VALID_FP_MODE_P (mode))
22534     return 1;
22535   else if (VALID_DFP_MODE_P (mode))
22536     return 1;
22537   /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
22538      on to use that value in smaller contexts, this can easily force a
22539      pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
22540      supporting DImode, allow it.  */
22541   else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
22542     return 1;
22543
22544   return 0;
22545 }
22546
22547 /* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
22548    tieable integer mode.  */
22549
22550 static bool
22551 ix86_tieable_integer_mode_p (enum machine_mode mode)
22552 {
22553   switch (mode)
22554     {
22555     case HImode:
22556     case SImode:
22557       return true;
22558
22559     case QImode:
22560       return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
22561
22562     case DImode:
22563       return TARGET_64BIT;
22564
22565     default:
22566       return false;
22567     }
22568 }
22569
22570 /* Return true if MODE1 is accessible in a register that can hold MODE2
22571    without copying.  That is, all register classes that can hold MODE2
22572    can also hold MODE1.  */
22573
22574 bool
22575 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22576 {
22577   if (mode1 == mode2)
22578     return true;
22579
22580   if (ix86_tieable_integer_mode_p (mode1)
22581       && ix86_tieable_integer_mode_p (mode2))
22582     return true;
22583
22584   /* MODE2 being XFmode implies fp stack or general regs, which means we
22585      can tie any smaller floating point modes to it.  Note that we do not
22586      tie this with TFmode.  */
22587   if (mode2 == XFmode)
22588     return mode1 == SFmode || mode1 == DFmode;
22589
22590   /* MODE2 being DFmode implies fp stack, general or sse regs, which means
22591      that we can tie it with SFmode.  */
22592   if (mode2 == DFmode)
22593     return mode1 == SFmode;
22594
22595   /* If MODE2 is only appropriate for an SSE register, then tie with
22596      any other mode acceptable to SSE registers.  */
22597   if (GET_MODE_SIZE (mode2) == 16
22598       && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
22599     return (GET_MODE_SIZE (mode1) == 16
22600             && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
22601
22602   /* If MODE2 is appropriate for an MMX register, then tie
22603      with any other mode acceptable to MMX registers.  */
22604   if (GET_MODE_SIZE (mode2) == 8
22605       && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
22606     return (GET_MODE_SIZE (mode1) == 8
22607             && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
22608
22609   return false;
22610 }
22611
22612 /* Compute a (partial) cost for rtx X.  Return true if the complete
22613    cost has been computed, and false if subexpressions should be
22614    scanned.  In either case, *TOTAL contains the cost result.  */
22615
22616 static bool
22617 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
22618 {
22619   enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22620   enum machine_mode mode = GET_MODE (x);
22621
22622   switch (code)
22623     {
22624     case CONST_INT:
22625     case CONST:
22626     case LABEL_REF:
22627     case SYMBOL_REF:
22628       if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
22629         *total = 3;
22630       else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
22631         *total = 2;
22632       else if (flag_pic && SYMBOLIC_CONST (x)
22633                && (!TARGET_64BIT
22634                    || (!GET_CODE (x) != LABEL_REF
22635                        && (GET_CODE (x) != SYMBOL_REF
22636                            || !SYMBOL_REF_LOCAL_P (x)))))
22637         *total = 1;
22638       else
22639         *total = 0;
22640       return true;
22641
22642     case CONST_DOUBLE:
22643       if (mode == VOIDmode)
22644         *total = 0;
22645       else
22646         switch (standard_80387_constant_p (x))
22647           {
22648           case 1: /* 0.0 */
22649             *total = 1;
22650             break;
22651           default: /* Other constants */
22652             *total = 2;
22653             break;
22654           case 0:
22655           case -1:
22656             /* Start with (MEM (SYMBOL_REF)), since that's where
22657                it'll probably end up.  Add a penalty for size.  */
22658             *total = (COSTS_N_INSNS (1)
22659                       + (flag_pic != 0 && !TARGET_64BIT)
22660                       + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
22661             break;
22662           }
22663       return true;
22664
22665     case ZERO_EXTEND:
22666       /* The zero extensions is often completely free on x86_64, so make
22667          it as cheap as possible.  */
22668       if (TARGET_64BIT && mode == DImode
22669           && GET_MODE (XEXP (x, 0)) == SImode)
22670         *total = 1;
22671       else if (TARGET_ZERO_EXTEND_WITH_AND)
22672         *total = ix86_cost->add;
22673       else
22674         *total = ix86_cost->movzx;
22675       return false;
22676
22677     case SIGN_EXTEND:
22678       *total = ix86_cost->movsx;
22679       return false;
22680
22681     case ASHIFT:
22682       if (CONST_INT_P (XEXP (x, 1))
22683           && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
22684         {
22685           HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22686           if (value == 1)
22687             {
22688               *total = ix86_cost->add;
22689               return false;
22690             }
22691           if ((value == 2 || value == 3)
22692               && ix86_cost->lea <= ix86_cost->shift_const)
22693             {
22694               *total = ix86_cost->lea;
22695               return false;
22696             }
22697         }
22698       /* FALLTHRU */
22699
22700     case ROTATE:
22701     case ASHIFTRT:
22702     case LSHIFTRT:
22703     case ROTATERT:
22704       if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
22705         {
22706           if (CONST_INT_P (XEXP (x, 1)))
22707             {
22708               if (INTVAL (XEXP (x, 1)) > 32)
22709                 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
22710               else
22711                 *total = ix86_cost->shift_const * 2;
22712             }
22713           else
22714             {
22715               if (GET_CODE (XEXP (x, 1)) == AND)
22716                 *total = ix86_cost->shift_var * 2;
22717               else
22718                 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
22719             }
22720         }
22721       else
22722         {
22723           if (CONST_INT_P (XEXP (x, 1)))
22724             *total = ix86_cost->shift_const;
22725           else
22726             *total = ix86_cost->shift_var;
22727         }
22728       return false;
22729
22730     case MULT:
22731       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22732         {
22733           /* ??? SSE scalar cost should be used here.  */
22734           *total = ix86_cost->fmul;
22735           return false;
22736         }
22737       else if (X87_FLOAT_MODE_P (mode))
22738         {
22739           *total = ix86_cost->fmul;
22740           return false;
22741         }
22742       else if (FLOAT_MODE_P (mode))
22743         {
22744           /* ??? SSE vector cost should be used here.  */
22745           *total = ix86_cost->fmul;
22746           return false;
22747         }
22748       else
22749         {
22750           rtx op0 = XEXP (x, 0);
22751           rtx op1 = XEXP (x, 1);
22752           int nbits;
22753           if (CONST_INT_P (XEXP (x, 1)))
22754             {
22755               unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22756               for (nbits = 0; value != 0; value &= value - 1)
22757                 nbits++;
22758             }
22759           else
22760             /* This is arbitrary.  */
22761             nbits = 7;
22762
22763           /* Compute costs correctly for widening multiplication.  */
22764           if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
22765               && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22766                  == GET_MODE_SIZE (mode))
22767             {
22768               int is_mulwiden = 0;
22769               enum machine_mode inner_mode = GET_MODE (op0);
22770
22771               if (GET_CODE (op0) == GET_CODE (op1))
22772                 is_mulwiden = 1, op1 = XEXP (op1, 0);
22773               else if (CONST_INT_P (op1))
22774                 {
22775                   if (GET_CODE (op0) == SIGN_EXTEND)
22776                     is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22777                                   == INTVAL (op1);
22778                   else
22779                     is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22780                 }
22781
22782               if (is_mulwiden)
22783                 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22784             }
22785
22786           *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
22787                     + nbits * ix86_cost->mult_bit
22788                     + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
22789
22790           return true;
22791         }
22792
22793     case DIV:
22794     case UDIV:
22795     case MOD:
22796     case UMOD:
22797       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22798         /* ??? SSE cost should be used here.  */
22799         *total = ix86_cost->fdiv;
22800       else if (X87_FLOAT_MODE_P (mode))
22801         *total = ix86_cost->fdiv;
22802       else if (FLOAT_MODE_P (mode))
22803         /* ??? SSE vector cost should be used here.  */
22804         *total = ix86_cost->fdiv;
22805       else
22806         *total = ix86_cost->divide[MODE_INDEX (mode)];
22807       return false;
22808
22809     case PLUS:
22810       if (GET_MODE_CLASS (mode) == MODE_INT
22811                && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
22812         {
22813           if (GET_CODE (XEXP (x, 0)) == PLUS
22814               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22815               && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22816               && CONSTANT_P (XEXP (x, 1)))
22817             {
22818               HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22819               if (val == 2 || val == 4 || val == 8)
22820                 {
22821                   *total = ix86_cost->lea;
22822                   *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22823                   *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22824                                       outer_code);
22825                   *total += rtx_cost (XEXP (x, 1), outer_code);
22826                   return true;
22827                 }
22828             }
22829           else if (GET_CODE (XEXP (x, 0)) == MULT
22830                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22831             {
22832               HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22833               if (val == 2 || val == 4 || val == 8)
22834                 {
22835                   *total = ix86_cost->lea;
22836                   *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22837                   *total += rtx_cost (XEXP (x, 1), outer_code);
22838                   return true;
22839                 }
22840             }
22841           else if (GET_CODE (XEXP (x, 0)) == PLUS)
22842             {
22843               *total = ix86_cost->lea;
22844               *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22845               *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22846               *total += rtx_cost (XEXP (x, 1), outer_code);
22847               return true;
22848             }
22849         }
22850       /* FALLTHRU */
22851
22852     case MINUS:
22853       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22854         {
22855           /* ??? SSE cost should be used here.  */
22856           *total = ix86_cost->fadd;
22857           return false;
22858         }
22859       else if (X87_FLOAT_MODE_P (mode))
22860         {
22861           *total = ix86_cost->fadd;
22862           return false;
22863         }
22864       else if (FLOAT_MODE_P (mode))
22865         {
22866           /* ??? SSE vector cost should be used here.  */
22867           *total = ix86_cost->fadd;
22868           return false;
22869         }
22870       /* FALLTHRU */
22871
22872     case AND:
22873     case IOR:
22874     case XOR:
22875       if (!TARGET_64BIT && mode == DImode)
22876         {
22877           *total = (ix86_cost->add * 2
22878                     + (rtx_cost (XEXP (x, 0), outer_code)
22879                        << (GET_MODE (XEXP (x, 0)) != DImode))
22880                     + (rtx_cost (XEXP (x, 1), outer_code)
22881                        << (GET_MODE (XEXP (x, 1)) != DImode)));
22882           return true;
22883         }
22884       /* FALLTHRU */
22885
22886     case NEG:
22887       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22888         {
22889           /* ??? SSE cost should be used here.  */
22890           *total = ix86_cost->fchs;
22891           return false;
22892         }
22893       else if (X87_FLOAT_MODE_P (mode))
22894         {
22895           *total = ix86_cost->fchs;
22896           return false;
22897         }
22898       else if (FLOAT_MODE_P (mode))
22899         {
22900           /* ??? SSE vector cost should be used here.  */
22901           *total = ix86_cost->fchs;
22902           return false;
22903         }
22904       /* FALLTHRU */
22905
22906     case NOT:
22907       if (!TARGET_64BIT && mode == DImode)
22908         *total = ix86_cost->add * 2;
22909       else
22910         *total = ix86_cost->add;
22911       return false;
22912
22913     case COMPARE:
22914       if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
22915           && XEXP (XEXP (x, 0), 1) == const1_rtx
22916           && CONST_INT_P (XEXP (XEXP (x, 0), 2))
22917           && XEXP (x, 1) == const0_rtx)
22918         {
22919           /* This kind of construct is implemented using test[bwl].
22920              Treat it as if we had an AND.  */
22921           *total = (ix86_cost->add
22922                     + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
22923                     + rtx_cost (const1_rtx, outer_code));
22924           return true;
22925         }
22926       return false;
22927
22928     case FLOAT_EXTEND:
22929       if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
22930         *total = 0;
22931       return false;
22932
22933     case ABS:
22934       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22935         /* ??? SSE cost should be used here.  */
22936         *total = ix86_cost->fabs;
22937       else if (X87_FLOAT_MODE_P (mode))
22938         *total = ix86_cost->fabs;
22939       else if (FLOAT_MODE_P (mode))
22940         /* ??? SSE vector cost should be used here.  */
22941         *total = ix86_cost->fabs;
22942       return false;
22943
22944     case SQRT:
22945       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22946         /* ??? SSE cost should be used here.  */
22947         *total = ix86_cost->fsqrt;
22948       else if (X87_FLOAT_MODE_P (mode))
22949         *total = ix86_cost->fsqrt;
22950       else if (FLOAT_MODE_P (mode))
22951         /* ??? SSE vector cost should be used here.  */
22952         *total = ix86_cost->fsqrt;
22953       return false;
22954
22955     case UNSPEC:
22956       if (XINT (x, 1) == UNSPEC_TP)
22957         *total = 0;
22958       return false;
22959
22960     default:
22961       return false;
22962     }
22963 }
22964
22965 #if TARGET_MACHO
22966
22967 static int current_machopic_label_num;
22968
22969 /* Given a symbol name and its associated stub, write out the
22970    definition of the stub.  */
22971
22972 void
22973 machopic_output_stub (FILE *file, const char *symb, const char *stub)
22974 {
22975   unsigned int length;
22976   char *binder_name, *symbol_name, lazy_ptr_name[32];
22977   int label = ++current_machopic_label_num;
22978
22979   /* For 64-bit we shouldn't get here.  */
22980   gcc_assert (!TARGET_64BIT);
22981
22982   /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
22983   symb = (*targetm.strip_name_encoding) (symb);
22984
22985   length = strlen (stub);
22986   binder_name = alloca (length + 32);
22987   GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22988
22989   length = strlen (symb);
22990   symbol_name = alloca (length + 32);
22991   GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22992
22993   sprintf (lazy_ptr_name, "L%d$lz", label);
22994
22995   if (MACHOPIC_PURE)
22996     switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
22997   else
22998     switch_to_section (darwin_sections[machopic_symbol_stub_section]);
22999
23000   fprintf (file, "%s:\n", stub);
23001   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23002
23003   if (MACHOPIC_PURE)
23004     {
23005       fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
23006       fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
23007       fprintf (file, "\tjmp\t*%%edx\n");
23008     }
23009   else
23010     fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
23011
23012   fprintf (file, "%s:\n", binder_name);
23013
23014   if (MACHOPIC_PURE)
23015     {
23016       fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
23017       fprintf (file, "\tpushl\t%%eax\n");
23018     }
23019   else
23020     fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
23021
23022   fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
23023
23024   switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
23025   fprintf (file, "%s:\n", lazy_ptr_name);
23026   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23027   fprintf (file, "\t.long %s\n", binder_name);
23028 }
23029
23030 void
23031 darwin_x86_file_end (void)
23032 {
23033   darwin_file_end ();
23034   ix86_file_end ();
23035 }
23036 #endif /* TARGET_MACHO */
23037
23038 /* Order the registers for register allocator.  */
23039
23040 void
23041 x86_order_regs_for_local_alloc (void)
23042 {
23043    int pos = 0;
23044    int i;
23045
23046    /* First allocate the local general purpose registers.  */
23047    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23048      if (GENERAL_REGNO_P (i) && call_used_regs[i])
23049         reg_alloc_order [pos++] = i;
23050
23051    /* Global general purpose registers.  */
23052    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23053      if (GENERAL_REGNO_P (i) && !call_used_regs[i])
23054         reg_alloc_order [pos++] = i;
23055
23056    /* x87 registers come first in case we are doing FP math
23057       using them.  */
23058    if (!TARGET_SSE_MATH)
23059      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23060        reg_alloc_order [pos++] = i;
23061
23062    /* SSE registers.  */
23063    for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
23064      reg_alloc_order [pos++] = i;
23065    for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
23066      reg_alloc_order [pos++] = i;
23067
23068    /* x87 registers.  */
23069    if (TARGET_SSE_MATH)
23070      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23071        reg_alloc_order [pos++] = i;
23072
23073    for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
23074      reg_alloc_order [pos++] = i;
23075
23076    /* Initialize the rest of array as we do not allocate some registers
23077       at all.  */
23078    while (pos < FIRST_PSEUDO_REGISTER)
23079      reg_alloc_order [pos++] = 0;
23080 }
23081
23082 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
23083    struct attribute_spec.handler.  */
23084 static tree
23085 ix86_handle_struct_attribute (tree *node, tree name,
23086                               tree args ATTRIBUTE_UNUSED,
23087                               int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
23088 {
23089   tree *type = NULL;
23090   if (DECL_P (*node))
23091     {
23092       if (TREE_CODE (*node) == TYPE_DECL)
23093         type = &TREE_TYPE (*node);
23094     }
23095   else
23096     type = node;
23097
23098   if (!(type && (TREE_CODE (*type) == RECORD_TYPE
23099                  || TREE_CODE (*type) == UNION_TYPE)))
23100     {
23101       warning (OPT_Wattributes, "%qs attribute ignored",
23102                IDENTIFIER_POINTER (name));
23103       *no_add_attrs = true;
23104     }
23105
23106   else if ((is_attribute_p ("ms_struct", name)
23107             && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
23108            || ((is_attribute_p ("gcc_struct", name)
23109                 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
23110     {
23111       warning (OPT_Wattributes, "%qs incompatible attribute ignored",
23112                IDENTIFIER_POINTER (name));
23113       *no_add_attrs = true;
23114     }
23115
23116   return NULL_TREE;
23117 }
23118
23119 static bool
23120 ix86_ms_bitfield_layout_p (const_tree record_type)
23121 {
23122   return (TARGET_MS_BITFIELD_LAYOUT &&
23123           !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
23124     || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
23125 }
23126
23127 /* Returns an expression indicating where the this parameter is
23128    located on entry to the FUNCTION.  */
23129
23130 static rtx
23131 x86_this_parameter (tree function)
23132 {
23133   tree type = TREE_TYPE (function);
23134   bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
23135   int nregs;
23136
23137   if (TARGET_64BIT)
23138     {
23139       const int *parm_regs;
23140
23141       if (ix86_function_type_abi (type) == MS_ABI)
23142         parm_regs = x86_64_ms_abi_int_parameter_registers;
23143       else
23144         parm_regs = x86_64_int_parameter_registers;
23145       return gen_rtx_REG (DImode, parm_regs[aggr]);
23146     }
23147
23148   nregs = ix86_function_regparm (type, function);
23149
23150   if (nregs > 0 && !stdarg_p (type))
23151     {
23152       int regno;
23153
23154       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
23155         regno = aggr ? DX_REG : CX_REG;
23156       else
23157         {
23158           regno = AX_REG;
23159           if (aggr)
23160             {
23161               regno = DX_REG;
23162               if (nregs == 1)
23163                 return gen_rtx_MEM (SImode,
23164                                     plus_constant (stack_pointer_rtx, 4));
23165             }
23166         }
23167       return gen_rtx_REG (SImode, regno);
23168     }
23169
23170   return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
23171 }
23172
23173 /* Determine whether x86_output_mi_thunk can succeed.  */
23174
23175 static bool
23176 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
23177                          HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
23178                          HOST_WIDE_INT vcall_offset, const_tree function)
23179 {
23180   /* 64-bit can handle anything.  */
23181   if (TARGET_64BIT)
23182     return true;
23183
23184   /* For 32-bit, everything's fine if we have one free register.  */
23185   if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
23186     return true;
23187
23188   /* Need a free register for vcall_offset.  */
23189   if (vcall_offset)
23190     return false;
23191
23192   /* Need a free register for GOT references.  */
23193   if (flag_pic && !(*targetm.binds_local_p) (function))
23194     return false;
23195
23196   /* Otherwise ok.  */
23197   return true;
23198 }
23199
23200 /* Output the assembler code for a thunk function.  THUNK_DECL is the
23201    declaration for the thunk function itself, FUNCTION is the decl for
23202    the target function.  DELTA is an immediate constant offset to be
23203    added to THIS.  If VCALL_OFFSET is nonzero, the word at
23204    *(*this + vcall_offset) should be added to THIS.  */
23205
23206 static void
23207 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
23208                      tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
23209                      HOST_WIDE_INT vcall_offset, tree function)
23210 {
23211   rtx xops[3];
23212   rtx this_param = x86_this_parameter (function);
23213   rtx this_reg, tmp;
23214
23215   /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
23216      pull it in now and let DELTA benefit.  */
23217   if (REG_P (this_param))
23218     this_reg = this_param;
23219   else if (vcall_offset)
23220     {
23221       /* Put the this parameter into %eax.  */
23222       xops[0] = this_param;
23223       xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
23224       if (TARGET_64BIT)
23225         output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23226       else
23227         output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23228     }
23229   else
23230     this_reg = NULL_RTX;
23231
23232   /* Adjust the this parameter by a fixed constant.  */
23233   if (delta)
23234     {
23235       xops[0] = GEN_INT (delta);
23236       xops[1] = this_reg ? this_reg : this_param;
23237       if (TARGET_64BIT)
23238         {
23239           if (!x86_64_general_operand (xops[0], DImode))
23240             {
23241               tmp = gen_rtx_REG (DImode, R10_REG);
23242               xops[1] = tmp;
23243               output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
23244               xops[0] = tmp;
23245               xops[1] = this_param;
23246             }
23247           output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
23248         }
23249       else
23250         output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
23251     }
23252
23253   /* Adjust the this parameter by a value stored in the vtable.  */
23254   if (vcall_offset)
23255     {
23256       if (TARGET_64BIT)
23257         tmp = gen_rtx_REG (DImode, R10_REG);
23258       else
23259         {
23260           int tmp_regno = CX_REG;
23261           if (lookup_attribute ("fastcall",
23262                                 TYPE_ATTRIBUTES (TREE_TYPE (function))))
23263             tmp_regno = AX_REG;
23264           tmp = gen_rtx_REG (SImode, tmp_regno);
23265         }
23266
23267       xops[0] = gen_rtx_MEM (Pmode, this_reg);
23268       xops[1] = tmp;
23269       if (TARGET_64BIT)
23270         output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23271       else
23272         output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23273
23274       /* Adjust the this parameter.  */
23275       xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
23276       if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
23277         {
23278           rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
23279           xops[0] = GEN_INT (vcall_offset);
23280           xops[1] = tmp2;
23281           output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23282           xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
23283         }
23284       xops[1] = this_reg;
23285       if (TARGET_64BIT)
23286         output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
23287       else
23288         output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
23289     }
23290
23291   /* If necessary, drop THIS back to its stack slot.  */
23292   if (this_reg && this_reg != this_param)
23293     {
23294       xops[0] = this_reg;
23295       xops[1] = this_param;
23296       if (TARGET_64BIT)
23297         output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23298       else
23299         output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23300     }
23301
23302   xops[0] = XEXP (DECL_RTL (function), 0);
23303   if (TARGET_64BIT)
23304     {
23305       if (!flag_pic || (*targetm.binds_local_p) (function))
23306         output_asm_insn ("jmp\t%P0", xops);
23307       /* All thunks should be in the same object as their target,
23308          and thus binds_local_p should be true.  */
23309       else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
23310         gcc_unreachable ();
23311       else
23312         {
23313           tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
23314           tmp = gen_rtx_CONST (Pmode, tmp);
23315           tmp = gen_rtx_MEM (QImode, tmp);
23316           xops[0] = tmp;
23317           output_asm_insn ("jmp\t%A0", xops);
23318         }
23319     }
23320   else
23321     {
23322       if (!flag_pic || (*targetm.binds_local_p) (function))
23323         output_asm_insn ("jmp\t%P0", xops);
23324       else
23325 #if TARGET_MACHO
23326         if (TARGET_MACHO)
23327           {
23328             rtx sym_ref = XEXP (DECL_RTL (function), 0);
23329             tmp = (gen_rtx_SYMBOL_REF
23330                    (Pmode,
23331                     machopic_indirection_name (sym_ref, /*stub_p=*/true)));
23332             tmp = gen_rtx_MEM (QImode, tmp);
23333             xops[0] = tmp;
23334             output_asm_insn ("jmp\t%0", xops);
23335           }
23336         else
23337 #endif /* TARGET_MACHO */
23338         {
23339           tmp = gen_rtx_REG (SImode, CX_REG);
23340           output_set_got (tmp, NULL_RTX);
23341
23342           xops[1] = tmp;
23343           output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
23344           output_asm_insn ("jmp\t{*}%1", xops);
23345         }
23346     }
23347 }
23348
23349 static void
23350 x86_file_start (void)
23351 {
23352   default_file_start ();
23353 #if TARGET_MACHO
23354   darwin_file_start ();
23355 #endif
23356   if (X86_FILE_START_VERSION_DIRECTIVE)
23357     fputs ("\t.version\t\"01.01\"\n", asm_out_file);
23358   if (X86_FILE_START_FLTUSED)
23359     fputs ("\t.global\t__fltused\n", asm_out_file);
23360   if (ix86_asm_dialect == ASM_INTEL)
23361     fputs ("\t.intel_syntax noprefix\n", asm_out_file);
23362 }
23363
23364 int
23365 x86_field_alignment (tree field, int computed)
23366 {
23367   enum machine_mode mode;
23368   tree type = TREE_TYPE (field);
23369
23370   if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
23371     return computed;
23372   mode = TYPE_MODE (strip_array_types (type));
23373   if (mode == DFmode || mode == DCmode
23374       || GET_MODE_CLASS (mode) == MODE_INT
23375       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
23376     return MIN (32, computed);
23377   return computed;
23378 }
23379
23380 /* Output assembler code to FILE to increment profiler label # LABELNO
23381    for profiling a function entry.  */
23382 void
23383 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
23384 {
23385   if (TARGET_64BIT)
23386     {
23387 #ifndef NO_PROFILE_COUNTERS
23388       fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
23389 #endif
23390
23391       if (DEFAULT_ABI == SYSV_ABI && flag_pic)
23392         fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
23393       else
23394         fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23395     }
23396   else if (flag_pic)
23397     {
23398 #ifndef NO_PROFILE_COUNTERS
23399       fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
23400                LPREFIX, labelno, PROFILE_COUNT_REGISTER);
23401 #endif
23402       fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
23403     }
23404   else
23405     {
23406 #ifndef NO_PROFILE_COUNTERS
23407       fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
23408                PROFILE_COUNT_REGISTER);
23409 #endif
23410       fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23411     }
23412 }
23413
23414 /* We don't have exact information about the insn sizes, but we may assume
23415    quite safely that we are informed about all 1 byte insns and memory
23416    address sizes.  This is enough to eliminate unnecessary padding in
23417    99% of cases.  */
23418
23419 static int
23420 min_insn_size (rtx insn)
23421 {
23422   int l = 0;
23423
23424   if (!INSN_P (insn) || !active_insn_p (insn))
23425     return 0;
23426
23427   /* Discard alignments we've emit and jump instructions.  */
23428   if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
23429       && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
23430     return 0;
23431   if (JUMP_P (insn)
23432       && (GET_CODE (PATTERN (insn)) == ADDR_VEC
23433           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
23434     return 0;
23435
23436   /* Important case - calls are always 5 bytes.
23437      It is common to have many calls in the row.  */
23438   if (CALL_P (insn)
23439       && symbolic_reference_mentioned_p (PATTERN (insn))
23440       && !SIBLING_CALL_P (insn))
23441     return 5;
23442   if (get_attr_length (insn) <= 1)
23443     return 1;
23444
23445   /* For normal instructions we may rely on the sizes of addresses
23446      and the presence of symbol to require 4 bytes of encoding.
23447      This is not the case for jumps where references are PC relative.  */
23448   if (!JUMP_P (insn))
23449     {
23450       l = get_attr_length_address (insn);
23451       if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
23452         l = 4;
23453     }
23454   if (l)
23455     return 1+l;
23456   else
23457     return 2;
23458 }
23459
23460 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
23461    window.  */
23462
23463 static void
23464 ix86_avoid_jump_misspredicts (void)
23465 {
23466   rtx insn, start = get_insns ();
23467   int nbytes = 0, njumps = 0;
23468   int isjump = 0;
23469
23470   /* Look for all minimal intervals of instructions containing 4 jumps.
23471      The intervals are bounded by START and INSN.  NBYTES is the total
23472      size of instructions in the interval including INSN and not including
23473      START.  When the NBYTES is smaller than 16 bytes, it is possible
23474      that the end of START and INSN ends up in the same 16byte page.
23475
23476      The smallest offset in the page INSN can start is the case where START
23477      ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
23478      We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
23479      */
23480   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23481     {
23482
23483       nbytes += min_insn_size (insn);
23484       if (dump_file)
23485         fprintf(dump_file, "Insn %i estimated to %i bytes\n",
23486                 INSN_UID (insn), min_insn_size (insn));
23487       if ((JUMP_P (insn)
23488            && GET_CODE (PATTERN (insn)) != ADDR_VEC
23489            && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
23490           || CALL_P (insn))
23491         njumps++;
23492       else
23493         continue;
23494
23495       while (njumps > 3)
23496         {
23497           start = NEXT_INSN (start);
23498           if ((JUMP_P (start)
23499                && GET_CODE (PATTERN (start)) != ADDR_VEC
23500                && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
23501               || CALL_P (start))
23502             njumps--, isjump = 1;
23503           else
23504             isjump = 0;
23505           nbytes -= min_insn_size (start);
23506         }
23507       gcc_assert (njumps >= 0);
23508       if (dump_file)
23509         fprintf (dump_file, "Interval %i to %i has %i bytes\n",
23510                 INSN_UID (start), INSN_UID (insn), nbytes);
23511
23512       if (njumps == 3 && isjump && nbytes < 16)
23513         {
23514           int padsize = 15 - nbytes + min_insn_size (insn);
23515
23516           if (dump_file)
23517             fprintf (dump_file, "Padding insn %i by %i bytes!\n",
23518                      INSN_UID (insn), padsize);
23519           emit_insn_before (gen_align (GEN_INT (padsize)), insn);
23520         }
23521     }
23522 }
23523
23524 /* AMD Athlon works faster
23525    when RET is not destination of conditional jump or directly preceded
23526    by other jump instruction.  We avoid the penalty by inserting NOP just
23527    before the RET instructions in such cases.  */
23528 static void
23529 ix86_pad_returns (void)
23530 {
23531   edge e;
23532   edge_iterator ei;
23533
23534   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
23535     {
23536       basic_block bb = e->src;
23537       rtx ret = BB_END (bb);
23538       rtx prev;
23539       bool replace = false;
23540
23541       if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
23542           || !maybe_hot_bb_p (bb))
23543         continue;
23544       for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
23545         if (active_insn_p (prev) || LABEL_P (prev))
23546           break;
23547       if (prev && LABEL_P (prev))
23548         {
23549           edge e;
23550           edge_iterator ei;
23551
23552           FOR_EACH_EDGE (e, ei, bb->preds)
23553             if (EDGE_FREQUENCY (e) && e->src->index >= 0
23554                 && !(e->flags & EDGE_FALLTHRU))
23555               replace = true;
23556         }
23557       if (!replace)
23558         {
23559           prev = prev_active_insn (ret);
23560           if (prev
23561               && ((JUMP_P (prev) && any_condjump_p (prev))
23562                   || CALL_P (prev)))
23563             replace = true;
23564           /* Empty functions get branch mispredict even when the jump destination
23565              is not visible to us.  */
23566           if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
23567             replace = true;
23568         }
23569       if (replace)
23570         {
23571           emit_insn_before (gen_return_internal_long (), ret);
23572           delete_insn (ret);
23573         }
23574     }
23575 }
23576
23577 /* Implement machine specific optimizations.  We implement padding of returns
23578    for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
23579 static void
23580 ix86_reorg (void)
23581 {
23582   if (TARGET_PAD_RETURNS && optimize && !optimize_size)
23583     ix86_pad_returns ();
23584   if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
23585     ix86_avoid_jump_misspredicts ();
23586 }
23587
23588 /* Return nonzero when QImode register that must be represented via REX prefix
23589    is used.  */
23590 bool
23591 x86_extended_QIreg_mentioned_p (rtx insn)
23592 {
23593   int i;
23594   extract_insn_cached (insn);
23595   for (i = 0; i < recog_data.n_operands; i++)
23596     if (REG_P (recog_data.operand[i])
23597         && REGNO (recog_data.operand[i]) >= 4)
23598        return true;
23599   return false;
23600 }
23601
23602 /* Return nonzero when P points to register encoded via REX prefix.
23603    Called via for_each_rtx.  */
23604 static int
23605 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
23606 {
23607    unsigned int regno;
23608    if (!REG_P (*p))
23609      return 0;
23610    regno = REGNO (*p);
23611    return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
23612 }
23613
23614 /* Return true when INSN mentions register that must be encoded using REX
23615    prefix.  */
23616 bool
23617 x86_extended_reg_mentioned_p (rtx insn)
23618 {
23619   return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
23620 }
23621
23622 /* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
23623    optabs would emit if we didn't have TFmode patterns.  */
23624
23625 void
23626 x86_emit_floatuns (rtx operands[2])
23627 {
23628   rtx neglab, donelab, i0, i1, f0, in, out;
23629   enum machine_mode mode, inmode;
23630
23631   inmode = GET_MODE (operands[1]);
23632   gcc_assert (inmode == SImode || inmode == DImode);
23633
23634   out = operands[0];
23635   in = force_reg (inmode, operands[1]);
23636   mode = GET_MODE (out);
23637   neglab = gen_label_rtx ();
23638   donelab = gen_label_rtx ();
23639   f0 = gen_reg_rtx (mode);
23640
23641   emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23642
23643   expand_float (out, in, 0);
23644
23645   emit_jump_insn (gen_jump (donelab));
23646   emit_barrier ();
23647
23648   emit_label (neglab);
23649
23650   i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23651                             1, OPTAB_DIRECT);
23652   i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23653                             1, OPTAB_DIRECT);
23654   i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23655
23656   expand_float (f0, i0, 0);
23657
23658   emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
23659
23660   emit_label (donelab);
23661 }
23662 \f
23663 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
23664    with all elements equal to VAR.  Return true if successful.  */
23665
23666 static bool
23667 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
23668                                    rtx target, rtx val)
23669 {
23670   enum machine_mode smode, wsmode, wvmode;
23671   rtx x;
23672
23673   switch (mode)
23674     {
23675     case V2SImode:
23676     case V2SFmode:
23677       if (!mmx_ok)
23678         return false;
23679       /* FALLTHRU */
23680
23681     case V2DFmode:
23682     case V2DImode:
23683     case V4SFmode:
23684     case V4SImode:
23685       val = force_reg (GET_MODE_INNER (mode), val);
23686       x = gen_rtx_VEC_DUPLICATE (mode, val);
23687       emit_insn (gen_rtx_SET (VOIDmode, target, x));
23688       return true;
23689
23690     case V4HImode:
23691       if (!mmx_ok)
23692         return false;
23693       if (TARGET_SSE || TARGET_3DNOW_A)
23694         {
23695           val = gen_lowpart (SImode, val);
23696           x = gen_rtx_TRUNCATE (HImode, val);
23697           x = gen_rtx_VEC_DUPLICATE (mode, x);
23698           emit_insn (gen_rtx_SET (VOIDmode, target, x));
23699           return true;
23700         }
23701       else
23702         {
23703           smode = HImode;
23704           wsmode = SImode;
23705           wvmode = V2SImode;
23706           goto widen;
23707         }
23708
23709     case V8QImode:
23710       if (!mmx_ok)
23711         return false;
23712       smode = QImode;
23713       wsmode = HImode;
23714       wvmode = V4HImode;
23715       goto widen;
23716     case V8HImode:
23717       if (TARGET_SSE2)
23718         {
23719           rtx tmp1, tmp2;
23720           /* Extend HImode to SImode using a paradoxical SUBREG.  */
23721           tmp1 = gen_reg_rtx (SImode);
23722           emit_move_insn (tmp1, gen_lowpart (SImode, val));
23723           /* Insert the SImode value as low element of V4SImode vector. */
23724           tmp2 = gen_reg_rtx (V4SImode);
23725           tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23726                                     gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23727                                     CONST0_RTX (V4SImode),
23728                                     const1_rtx);
23729           emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23730           /* Cast the V4SImode vector back to a V8HImode vector.  */
23731           tmp1 = gen_reg_rtx (V8HImode);
23732           emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
23733           /* Duplicate the low short through the whole low SImode word.  */
23734           emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
23735           /* Cast the V8HImode vector back to a V4SImode vector.  */
23736           tmp2 = gen_reg_rtx (V4SImode);
23737           emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23738           /* Replicate the low element of the V4SImode vector.  */
23739           emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23740           /* Cast the V2SImode back to V8HImode, and store in target.  */
23741           emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
23742           return true;
23743         }
23744       smode = HImode;
23745       wsmode = SImode;
23746       wvmode = V4SImode;
23747       goto widen;
23748     case V16QImode:
23749       if (TARGET_SSE2)
23750         {
23751           rtx tmp1, tmp2;
23752           /* Extend QImode to SImode using a paradoxical SUBREG.  */
23753           tmp1 = gen_reg_rtx (SImode);
23754           emit_move_insn (tmp1, gen_lowpart (SImode, val));
23755           /* Insert the SImode value as low element of V4SImode vector. */
23756           tmp2 = gen_reg_rtx (V4SImode);
23757           tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23758                                     gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23759                                     CONST0_RTX (V4SImode),
23760                                     const1_rtx);
23761           emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23762           /* Cast the V4SImode vector back to a V16QImode vector.  */
23763           tmp1 = gen_reg_rtx (V16QImode);
23764           emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
23765           /* Duplicate the low byte through the whole low SImode word.  */
23766           emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23767           emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23768           /* Cast the V16QImode vector back to a V4SImode vector.  */
23769           tmp2 = gen_reg_rtx (V4SImode);
23770           emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23771           /* Replicate the low element of the V4SImode vector.  */
23772           emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23773           /* Cast the V2SImode back to V16QImode, and store in target.  */
23774           emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
23775           return true;
23776         }
23777       smode = QImode;
23778       wsmode = HImode;
23779       wvmode = V8HImode;
23780       goto widen;
23781     widen:
23782       /* Replicate the value once into the next wider mode and recurse.  */
23783       val = convert_modes (wsmode, smode, val, true);
23784       x = expand_simple_binop (wsmode, ASHIFT, val,
23785                                GEN_INT (GET_MODE_BITSIZE (smode)),
23786                                NULL_RTX, 1, OPTAB_LIB_WIDEN);
23787       val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
23788
23789       x = gen_reg_rtx (wvmode);
23790       if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
23791         gcc_unreachable ();
23792       emit_move_insn (target, gen_lowpart (mode, x));
23793       return true;
23794
23795     default:
23796       return false;
23797     }
23798 }
23799
23800 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
23801    whose ONE_VAR element is VAR, and other elements are zero.  Return true
23802    if successful.  */
23803
23804 static bool
23805 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
23806                                      rtx target, rtx var, int one_var)
23807 {
23808   enum machine_mode vsimode;
23809   rtx new_target;
23810   rtx x, tmp;
23811   bool use_vector_set = false;
23812
23813   switch (mode)
23814     {
23815     case V2DImode:
23816       use_vector_set = TARGET_64BIT && TARGET_SSE4_1;
23817       break;
23818     case V16QImode:
23819     case V4SImode:
23820     case V4SFmode:
23821       use_vector_set = TARGET_SSE4_1;
23822       break;
23823     case V8HImode:
23824       use_vector_set = TARGET_SSE2;
23825       break;
23826     case V4HImode:
23827       use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
23828       break;
23829     default:
23830       break;
23831     }
23832
23833   if (use_vector_set)
23834     {
23835       emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
23836       var = force_reg (GET_MODE_INNER (mode), var);
23837       ix86_expand_vector_set (mmx_ok, target, var, one_var);
23838       return true;
23839     }
23840
23841   switch (mode)
23842     {
23843     case V2SFmode:
23844     case V2SImode:
23845       if (!mmx_ok)
23846         return false;
23847       /* FALLTHRU */
23848
23849     case V2DFmode:
23850     case V2DImode:
23851       if (one_var != 0)
23852         return false;
23853       var = force_reg (GET_MODE_INNER (mode), var);
23854       x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
23855       emit_insn (gen_rtx_SET (VOIDmode, target, x));
23856       return true;
23857
23858     case V4SFmode:
23859     case V4SImode:
23860       if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
23861         new_target = gen_reg_rtx (mode);
23862       else
23863         new_target = target;
23864       var = force_reg (GET_MODE_INNER (mode), var);
23865       x = gen_rtx_VEC_DUPLICATE (mode, var);
23866       x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
23867       emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
23868       if (one_var != 0)
23869         {
23870           /* We need to shuffle the value to the correct position, so
23871              create a new pseudo to store the intermediate result.  */
23872
23873           /* With SSE2, we can use the integer shuffle insns.  */
23874           if (mode != V4SFmode && TARGET_SSE2)
23875             {
23876               emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
23877                                             GEN_INT (1),
23878                                             GEN_INT (one_var == 1 ? 0 : 1),
23879                                             GEN_INT (one_var == 2 ? 0 : 1),
23880                                             GEN_INT (one_var == 3 ? 0 : 1)));
23881               if (target != new_target)
23882                 emit_move_insn (target, new_target);
23883               return true;
23884             }
23885
23886           /* Otherwise convert the intermediate result to V4SFmode and
23887              use the SSE1 shuffle instructions.  */
23888           if (mode != V4SFmode)
23889             {
23890               tmp = gen_reg_rtx (V4SFmode);
23891               emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
23892             }
23893           else
23894             tmp = new_target;
23895
23896           emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
23897                                        GEN_INT (1),
23898                                        GEN_INT (one_var == 1 ? 0 : 1),
23899                                        GEN_INT (one_var == 2 ? 0+4 : 1+4),
23900                                        GEN_INT (one_var == 3 ? 0+4 : 1+4)));
23901
23902           if (mode != V4SFmode)
23903             emit_move_insn (target, gen_lowpart (V4SImode, tmp));
23904           else if (tmp != target)
23905             emit_move_insn (target, tmp);
23906         }
23907       else if (target != new_target)
23908         emit_move_insn (target, new_target);
23909       return true;
23910
23911     case V8HImode:
23912     case V16QImode:
23913       vsimode = V4SImode;
23914       goto widen;
23915     case V4HImode:
23916     case V8QImode:
23917       if (!mmx_ok)
23918         return false;
23919       vsimode = V2SImode;
23920       goto widen;
23921     widen:
23922       if (one_var != 0)
23923         return false;
23924
23925       /* Zero extend the variable element to SImode and recurse.  */
23926       var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
23927
23928       x = gen_reg_rtx (vsimode);
23929       if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
23930                                                 var, one_var))
23931         gcc_unreachable ();
23932
23933       emit_move_insn (target, gen_lowpart (mode, x));
23934       return true;
23935
23936     default:
23937       return false;
23938     }
23939 }
23940
23941 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
23942    consisting of the values in VALS.  It is known that all elements
23943    except ONE_VAR are constants.  Return true if successful.  */
23944
23945 static bool
23946 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
23947                                  rtx target, rtx vals, int one_var)
23948 {
23949   rtx var = XVECEXP (vals, 0, one_var);
23950   enum machine_mode wmode;
23951   rtx const_vec, x;
23952
23953   const_vec = copy_rtx (vals);
23954   XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
23955   const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
23956
23957   switch (mode)
23958     {
23959     case V2DFmode:
23960     case V2DImode:
23961     case V2SFmode:
23962     case V2SImode:
23963       /* For the two element vectors, it's just as easy to use
23964          the general case.  */
23965       return false;
23966
23967     case V4SFmode:
23968     case V4SImode:
23969     case V8HImode:
23970     case V4HImode:
23971       break;
23972
23973     case V16QImode:
23974       if (TARGET_SSE4_1)
23975         break;
23976       wmode = V8HImode;
23977       goto widen;
23978     case V8QImode:
23979       wmode = V4HImode;
23980       goto widen;
23981     widen:
23982       /* There's no way to set one QImode entry easily.  Combine
23983          the variable value with its adjacent constant value, and
23984          promote to an HImode set.  */
23985       x = XVECEXP (vals, 0, one_var ^ 1);
23986       if (one_var & 1)
23987         {
23988           var = convert_modes (HImode, QImode, var, true);
23989           var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
23990                                      NULL_RTX, 1, OPTAB_LIB_WIDEN);
23991           x = GEN_INT (INTVAL (x) & 0xff);
23992         }
23993       else
23994         {
23995           var = convert_modes (HImode, QImode, var, true);
23996           x = gen_int_mode (INTVAL (x) << 8, HImode);
23997         }
23998       if (x != const0_rtx)
23999         var = expand_simple_binop (HImode, IOR, var, x, var,
24000                                    1, OPTAB_LIB_WIDEN);
24001
24002       x = gen_reg_rtx (wmode);
24003       emit_move_insn (x, gen_lowpart (wmode, const_vec));
24004       ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
24005
24006       emit_move_insn (target, gen_lowpart (mode, x));
24007       return true;
24008
24009     default:
24010       return false;
24011     }
24012
24013   emit_move_insn (target, const_vec);
24014   ix86_expand_vector_set (mmx_ok, target, var, one_var);
24015   return true;
24016 }
24017
24018 /* A subroutine of ix86_expand_vector_init_general.  Use vector
24019    concatenate to handle the most general case: all values variable,
24020    and none identical.  */
24021
24022 static void
24023 ix86_expand_vector_init_concat (enum machine_mode mode,
24024                                 rtx target, rtx *ops, int n)
24025 {
24026   enum machine_mode cmode, hmode = VOIDmode;
24027   rtx first[4], second[2];
24028   rtvec v;
24029   int i, j;
24030
24031   switch (n)
24032     {
24033     case 2:
24034       switch (mode)
24035         {
24036         case V4SImode:
24037           cmode = V2SImode;
24038           break;
24039         case V4SFmode:
24040           cmode = V2SFmode;
24041           break;
24042         case V2DImode:
24043           cmode = DImode;
24044           break;
24045         case V2SImode:
24046           cmode = SImode;
24047           break;
24048         case V2DFmode:
24049           cmode = DFmode;
24050           break;
24051         case V2SFmode:
24052           cmode = SFmode;
24053           break;
24054         default:
24055           gcc_unreachable ();
24056         }
24057
24058       if (!register_operand (ops[1], cmode))
24059         ops[1] = force_reg (cmode, ops[1]);
24060       if (!register_operand (ops[0], cmode))
24061         ops[0] = force_reg (cmode, ops[0]);
24062       emit_insn (gen_rtx_SET (VOIDmode, target,
24063                               gen_rtx_VEC_CONCAT (mode, ops[0],
24064                                                   ops[1])));
24065       break;
24066
24067     case 4:
24068       switch (mode)
24069         {
24070         case V4SImode:
24071           cmode = V2SImode;
24072           break;
24073         case V4SFmode:
24074           cmode = V2SFmode;
24075           break;
24076         default:
24077           gcc_unreachable ();
24078         }
24079       goto half;
24080
24081 half:
24082       /* FIXME: We process inputs backward to help RA.  PR 36222.  */
24083       i = n - 1;
24084       j = (n >> 1) - 1;
24085       for (; i > 0; i -= 2, j--)
24086         {
24087           first[j] = gen_reg_rtx (cmode);
24088           v = gen_rtvec (2, ops[i - 1], ops[i]);
24089           ix86_expand_vector_init (false, first[j],
24090                                    gen_rtx_PARALLEL (cmode, v));
24091         }
24092
24093       n >>= 1;
24094       if (n > 2)
24095         {
24096           gcc_assert (hmode != VOIDmode);
24097           for (i = j = 0; i < n; i += 2, j++)
24098             {
24099               second[j] = gen_reg_rtx (hmode);
24100               ix86_expand_vector_init_concat (hmode, second [j],
24101                                               &first [i], 2);
24102             }
24103           n >>= 1;
24104           ix86_expand_vector_init_concat (mode, target, second, n);
24105         }
24106       else
24107         ix86_expand_vector_init_concat (mode, target, first, n);
24108       break;
24109
24110     default:
24111       gcc_unreachable ();
24112     }
24113 }
24114
24115 /* A subroutine of ix86_expand_vector_init_general.  Use vector
24116    interleave to handle the most general case: all values variable,
24117    and none identical.  */
24118
24119 static void
24120 ix86_expand_vector_init_interleave (enum machine_mode mode,
24121                                     rtx target, rtx *ops, int n)
24122 {
24123   enum machine_mode first_imode, second_imode, third_imode;
24124   int i, j;
24125   rtx op0, op1;
24126   rtx (*gen_load_even) (rtx, rtx, rtx);
24127   rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
24128   rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
24129
24130   switch (mode)
24131     {
24132     case V8HImode:
24133       gen_load_even = gen_vec_setv8hi;
24134       gen_interleave_first_low = gen_vec_interleave_lowv4si;
24135       gen_interleave_second_low = gen_vec_interleave_lowv2di;
24136       first_imode = V4SImode;
24137       second_imode = V2DImode;
24138       third_imode = VOIDmode;
24139       break;
24140     case V16QImode:
24141       gen_load_even = gen_vec_setv16qi;
24142       gen_interleave_first_low = gen_vec_interleave_lowv8hi;
24143       gen_interleave_second_low = gen_vec_interleave_lowv4si;
24144       first_imode = V8HImode;
24145       second_imode = V4SImode;
24146       third_imode = V2DImode;
24147       break;
24148     default:
24149       gcc_unreachable ();
24150     }
24151
24152   for (i = 0; i < n; i++)
24153     {
24154       /* Extend the odd elment to SImode using a paradoxical SUBREG.  */
24155       op0 = gen_reg_rtx (SImode);
24156       emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
24157
24158       /* Insert the SImode value as low element of V4SImode vector. */
24159       op1 = gen_reg_rtx (V4SImode);
24160       op0 = gen_rtx_VEC_MERGE (V4SImode,
24161                                gen_rtx_VEC_DUPLICATE (V4SImode,
24162                                                       op0),
24163                                CONST0_RTX (V4SImode),
24164                                const1_rtx);
24165       emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
24166
24167       /* Cast the V4SImode vector back to a vector in orignal mode.  */
24168       op0 = gen_reg_rtx (mode);
24169       emit_move_insn (op0, gen_lowpart (mode, op1));
24170
24171       /* Load even elements into the second positon.  */
24172       emit_insn ((*gen_load_even) (op0, ops [i + i + 1],
24173                                    const1_rtx));
24174
24175       /* Cast vector to FIRST_IMODE vector.  */
24176       ops[i] = gen_reg_rtx (first_imode);
24177       emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
24178     }
24179
24180   /* Interleave low FIRST_IMODE vectors.  */
24181   for (i = j = 0; i < n; i += 2, j++)
24182     {
24183       op0 = gen_reg_rtx (first_imode);
24184       emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
24185
24186       /* Cast FIRST_IMODE vector to SECOND_IMODE vector.  */
24187       ops[j] = gen_reg_rtx (second_imode);
24188       emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
24189     }
24190
24191   /* Interleave low SECOND_IMODE vectors.  */
24192   switch (second_imode)
24193     {
24194     case V4SImode:
24195       for (i = j = 0; i < n / 2; i += 2, j++)
24196         {
24197           op0 = gen_reg_rtx (second_imode);
24198           emit_insn ((*gen_interleave_second_low) (op0, ops[i],
24199                                                    ops[i + 1]));
24200
24201           /* Cast the SECOND_IMODE vector to the THIRD_IMODE
24202              vector.  */
24203           ops[j] = gen_reg_rtx (third_imode);
24204           emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
24205         }
24206       second_imode = V2DImode;
24207       gen_interleave_second_low = gen_vec_interleave_lowv2di;
24208       /* FALLTHRU */
24209
24210     case V2DImode:
24211       op0 = gen_reg_rtx (second_imode);
24212       emit_insn ((*gen_interleave_second_low) (op0, ops[0],
24213                                                ops[1]));
24214
24215       /* Cast the SECOND_IMODE vector back to a vector on original
24216          mode.  */
24217       emit_insn (gen_rtx_SET (VOIDmode, target,
24218                               gen_lowpart (mode, op0)));
24219       break;
24220
24221     default:
24222       gcc_unreachable ();
24223     }
24224 }
24225
24226 /* A subroutine of ix86_expand_vector_init.  Handle the most general case:
24227    all values variable, and none identical.  */
24228
24229 static void
24230 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
24231                                  rtx target, rtx vals)
24232 {
24233   rtx ops[16];
24234   int n, i;
24235
24236   switch (mode)
24237     {
24238     case V2SFmode:
24239     case V2SImode:
24240       if (!mmx_ok && !TARGET_SSE)
24241         break;
24242       /* FALLTHRU */
24243
24244     case V4SFmode:
24245     case V4SImode:
24246     case V2DFmode:
24247     case V2DImode:
24248       n = GET_MODE_NUNITS (mode);
24249       for (i = 0; i < n; i++)
24250         ops[i] = XVECEXP (vals, 0, i);
24251       ix86_expand_vector_init_concat (mode, target, ops, n);
24252       return;
24253
24254     case V16QImode:
24255       if (!TARGET_SSE4_1)
24256         break;
24257       /* FALLTHRU */
24258
24259     case V8HImode:
24260       if (!TARGET_SSE2)
24261         break;
24262
24263       n = GET_MODE_NUNITS (mode);
24264       for (i = 0; i < n; i++)
24265         ops[i] = XVECEXP (vals, 0, i);
24266       ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
24267       return;
24268
24269     case V4HImode:
24270     case V8QImode:
24271       break;
24272
24273     default:
24274       gcc_unreachable ();
24275     }
24276
24277     {
24278       int i, j, n_elts, n_words, n_elt_per_word;
24279       enum machine_mode inner_mode;
24280       rtx words[4], shift;
24281
24282       inner_mode = GET_MODE_INNER (mode);
24283       n_elts = GET_MODE_NUNITS (mode);
24284       n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
24285       n_elt_per_word = n_elts / n_words;
24286       shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
24287
24288       for (i = 0; i < n_words; ++i)
24289         {
24290           rtx word = NULL_RTX;
24291
24292           for (j = 0; j < n_elt_per_word; ++j)
24293             {
24294               rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
24295               elt = convert_modes (word_mode, inner_mode, elt, true);
24296
24297               if (j == 0)
24298                 word = elt;
24299               else
24300                 {
24301                   word = expand_simple_binop (word_mode, ASHIFT, word, shift,
24302                                               word, 1, OPTAB_LIB_WIDEN);
24303                   word = expand_simple_binop (word_mode, IOR, word, elt,
24304                                               word, 1, OPTAB_LIB_WIDEN);
24305                 }
24306             }
24307
24308           words[i] = word;
24309         }
24310
24311       if (n_words == 1)
24312         emit_move_insn (target, gen_lowpart (mode, words[0]));
24313       else if (n_words == 2)
24314         {
24315           rtx tmp = gen_reg_rtx (mode);
24316           emit_clobber (tmp);
24317           emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
24318           emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
24319           emit_move_insn (target, tmp);
24320         }
24321       else if (n_words == 4)
24322         {
24323           rtx tmp = gen_reg_rtx (V4SImode);
24324           gcc_assert (word_mode == SImode);
24325           vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
24326           ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
24327           emit_move_insn (target, gen_lowpart (mode, tmp));
24328         }
24329       else
24330         gcc_unreachable ();
24331     }
24332 }
24333
24334 /* Initialize vector TARGET via VALS.  Suppress the use of MMX
24335    instructions unless MMX_OK is true.  */
24336
24337 void
24338 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
24339 {
24340   enum machine_mode mode = GET_MODE (target);
24341   enum machine_mode inner_mode = GET_MODE_INNER (mode);
24342   int n_elts = GET_MODE_NUNITS (mode);
24343   int n_var = 0, one_var = -1;
24344   bool all_same = true, all_const_zero = true;
24345   int i;
24346   rtx x;
24347
24348   for (i = 0; i < n_elts; ++i)
24349     {
24350       x = XVECEXP (vals, 0, i);
24351       if (!(CONST_INT_P (x)
24352             || GET_CODE (x) == CONST_DOUBLE
24353             || GET_CODE (x) == CONST_FIXED))
24354         n_var++, one_var = i;
24355       else if (x != CONST0_RTX (inner_mode))
24356         all_const_zero = false;
24357       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
24358         all_same = false;
24359     }
24360
24361   /* Constants are best loaded from the constant pool.  */
24362   if (n_var == 0)
24363     {
24364       emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
24365       return;
24366     }
24367
24368   /* If all values are identical, broadcast the value.  */
24369   if (all_same
24370       && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
24371                                             XVECEXP (vals, 0, 0)))
24372     return;
24373
24374   /* Values where only one field is non-constant are best loaded from
24375      the pool and overwritten via move later.  */
24376   if (n_var == 1)
24377     {
24378       if (all_const_zero
24379           && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
24380                                                   XVECEXP (vals, 0, one_var),
24381                                                   one_var))
24382         return;
24383
24384       if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
24385         return;
24386     }
24387
24388   ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
24389 }
24390
24391 void
24392 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
24393 {
24394   enum machine_mode mode = GET_MODE (target);
24395   enum machine_mode inner_mode = GET_MODE_INNER (mode);
24396   bool use_vec_merge = false;
24397   rtx tmp;
24398
24399   switch (mode)
24400     {
24401     case V2SFmode:
24402     case V2SImode:
24403       if (mmx_ok)
24404         {
24405           tmp = gen_reg_rtx (GET_MODE_INNER (mode));
24406           ix86_expand_vector_extract (true, tmp, target, 1 - elt);
24407           if (elt == 0)
24408             tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
24409           else
24410             tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
24411           emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24412           return;
24413         }
24414       break;
24415
24416     case V2DImode:
24417       use_vec_merge = TARGET_SSE4_1;
24418       if (use_vec_merge)
24419         break;
24420
24421     case V2DFmode:
24422       {
24423         rtx op0, op1;
24424
24425         /* For the two element vectors, we implement a VEC_CONCAT with
24426            the extraction of the other element.  */
24427
24428         tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
24429         tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
24430
24431         if (elt == 0)
24432           op0 = val, op1 = tmp;
24433         else
24434           op0 = tmp, op1 = val;
24435
24436         tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
24437         emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24438       }
24439       return;
24440
24441     case V4SFmode:
24442       use_vec_merge = TARGET_SSE4_1;
24443       if (use_vec_merge)
24444         break;
24445
24446       switch (elt)
24447         {
24448         case 0:
24449           use_vec_merge = true;
24450           break;
24451
24452         case 1:
24453           /* tmp = target = A B C D */
24454           tmp = copy_to_reg (target);
24455           /* target = A A B B */
24456           emit_insn (gen_sse_unpcklps (target, target, target));
24457           /* target = X A B B */
24458           ix86_expand_vector_set (false, target, val, 0);
24459           /* target = A X C D  */
24460           emit_insn (gen_sse_shufps_1 (target, target, tmp,
24461                                        GEN_INT (1), GEN_INT (0),
24462                                        GEN_INT (2+4), GEN_INT (3+4)));
24463           return;
24464
24465         case 2:
24466           /* tmp = target = A B C D */
24467           tmp = copy_to_reg (target);
24468           /* tmp = X B C D */
24469           ix86_expand_vector_set (false, tmp, val, 0);
24470           /* target = A B X D */
24471           emit_insn (gen_sse_shufps_1 (target, target, tmp,
24472                                        GEN_INT (0), GEN_INT (1),
24473                                        GEN_INT (0+4), GEN_INT (3+4)));
24474           return;
24475
24476         case 3:
24477           /* tmp = target = A B C D */
24478           tmp = copy_to_reg (target);
24479           /* tmp = X B C D */
24480           ix86_expand_vector_set (false, tmp, val, 0);
24481           /* target = A B X D */
24482           emit_insn (gen_sse_shufps_1 (target, target, tmp,
24483                                        GEN_INT (0), GEN_INT (1),
24484                                        GEN_INT (2+4), GEN_INT (0+4)));
24485           return;
24486
24487         default:
24488           gcc_unreachable ();
24489         }
24490       break;
24491
24492     case V4SImode:
24493       use_vec_merge = TARGET_SSE4_1;
24494       if (use_vec_merge)
24495         break;
24496
24497       /* Element 0 handled by vec_merge below.  */
24498       if (elt == 0)
24499         {
24500           use_vec_merge = true;
24501           break;
24502         }
24503
24504       if (TARGET_SSE2)
24505         {
24506           /* With SSE2, use integer shuffles to swap element 0 and ELT,
24507              store into element 0, then shuffle them back.  */
24508
24509           rtx order[4];
24510
24511           order[0] = GEN_INT (elt);
24512           order[1] = const1_rtx;
24513           order[2] = const2_rtx;
24514           order[3] = GEN_INT (3);
24515           order[elt] = const0_rtx;
24516
24517           emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24518                                         order[1], order[2], order[3]));
24519
24520           ix86_expand_vector_set (false, target, val, 0);
24521
24522           emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24523                                         order[1], order[2], order[3]));
24524         }
24525       else
24526         {
24527           /* For SSE1, we have to reuse the V4SF code.  */
24528           ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
24529                                   gen_lowpart (SFmode, val), elt);
24530         }
24531       return;
24532
24533     case V8HImode:
24534       use_vec_merge = TARGET_SSE2;
24535       break;
24536     case V4HImode:
24537       use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24538       break;
24539
24540     case V16QImode:
24541       use_vec_merge = TARGET_SSE4_1;
24542       break;
24543
24544     case V8QImode:
24545     default:
24546       break;
24547     }
24548
24549   if (use_vec_merge)
24550     {
24551       tmp = gen_rtx_VEC_DUPLICATE (mode, val);
24552       tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
24553       emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24554     }
24555   else
24556     {
24557       rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24558
24559       emit_move_insn (mem, target);
24560
24561       tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24562       emit_move_insn (tmp, val);
24563
24564       emit_move_insn (target, mem);
24565     }
24566 }
24567
24568 void
24569 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
24570 {
24571   enum machine_mode mode = GET_MODE (vec);
24572   enum machine_mode inner_mode = GET_MODE_INNER (mode);
24573   bool use_vec_extr = false;
24574   rtx tmp;
24575
24576   switch (mode)
24577     {
24578     case V2SImode:
24579     case V2SFmode:
24580       if (!mmx_ok)
24581         break;
24582       /* FALLTHRU */
24583
24584     case V2DFmode:
24585     case V2DImode:
24586       use_vec_extr = true;
24587       break;
24588
24589     case V4SFmode:
24590       use_vec_extr = TARGET_SSE4_1;
24591       if (use_vec_extr)
24592         break;
24593
24594       switch (elt)
24595         {
24596         case 0:
24597           tmp = vec;
24598           break;
24599
24600         case 1:
24601         case 3:
24602           tmp = gen_reg_rtx (mode);
24603           emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
24604                                        GEN_INT (elt), GEN_INT (elt),
24605                                        GEN_INT (elt+4), GEN_INT (elt+4)));
24606           break;
24607
24608         case 2:
24609           tmp = gen_reg_rtx (mode);
24610           emit_insn (gen_sse_unpckhps (tmp, vec, vec));
24611           break;
24612
24613         default:
24614           gcc_unreachable ();
24615         }
24616       vec = tmp;
24617       use_vec_extr = true;
24618       elt = 0;
24619       break;
24620
24621     case V4SImode:
24622       use_vec_extr = TARGET_SSE4_1;
24623       if (use_vec_extr)
24624         break;
24625
24626       if (TARGET_SSE2)
24627         {
24628           switch (elt)
24629             {
24630             case 0:
24631               tmp = vec;
24632               break;
24633
24634             case 1:
24635             case 3:
24636               tmp = gen_reg_rtx (mode);
24637               emit_insn (gen_sse2_pshufd_1 (tmp, vec,
24638                                             GEN_INT (elt), GEN_INT (elt),
24639                                             GEN_INT (elt), GEN_INT (elt)));
24640               break;
24641
24642             case 2:
24643               tmp = gen_reg_rtx (mode);
24644               emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
24645               break;
24646
24647             default:
24648               gcc_unreachable ();
24649             }
24650           vec = tmp;
24651           use_vec_extr = true;
24652           elt = 0;
24653         }
24654       else
24655         {
24656           /* For SSE1, we have to reuse the V4SF code.  */
24657           ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
24658                                       gen_lowpart (V4SFmode, vec), elt);
24659           return;
24660         }
24661       break;
24662
24663     case V8HImode:
24664       use_vec_extr = TARGET_SSE2;
24665       break;
24666     case V4HImode:
24667       use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24668       break;
24669
24670     case V16QImode:
24671       use_vec_extr = TARGET_SSE4_1;
24672       break;
24673
24674     case V8QImode:
24675       /* ??? Could extract the appropriate HImode element and shift.  */
24676     default:
24677       break;
24678     }
24679
24680   if (use_vec_extr)
24681     {
24682       tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
24683       tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
24684
24685       /* Let the rtl optimizers know about the zero extension performed.  */
24686       if (inner_mode == QImode || inner_mode == HImode)
24687         {
24688           tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
24689           target = gen_lowpart (SImode, target);
24690         }
24691
24692       emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24693     }
24694   else
24695     {
24696       rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24697
24698       emit_move_insn (mem, vec);
24699
24700       tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24701       emit_move_insn (target, tmp);
24702     }
24703 }
24704
24705 /* Expand a vector reduction on V4SFmode for SSE1.  FN is the binary
24706    pattern to reduce; DEST is the destination; IN is the input vector.  */
24707
24708 void
24709 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
24710 {
24711   rtx tmp1, tmp2, tmp3;
24712
24713   tmp1 = gen_reg_rtx (V4SFmode);
24714   tmp2 = gen_reg_rtx (V4SFmode);
24715   tmp3 = gen_reg_rtx (V4SFmode);
24716
24717   emit_insn (gen_sse_movhlps (tmp1, in, in));
24718   emit_insn (fn (tmp2, tmp1, in));
24719
24720   emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
24721                                GEN_INT (1), GEN_INT (1),
24722                                GEN_INT (1+4), GEN_INT (1+4)));
24723   emit_insn (fn (dest, tmp2, tmp3));
24724 }
24725 \f
24726 /* Target hook for scalar_mode_supported_p.  */
24727 static bool
24728 ix86_scalar_mode_supported_p (enum machine_mode mode)
24729 {
24730   if (DECIMAL_FLOAT_MODE_P (mode))
24731     return true;
24732   else if (mode == TFmode)
24733     return TARGET_64BIT;
24734   else
24735     return default_scalar_mode_supported_p (mode);
24736 }
24737
24738 /* Implements target hook vector_mode_supported_p.  */
24739 static bool
24740 ix86_vector_mode_supported_p (enum machine_mode mode)
24741 {
24742   if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
24743     return true;
24744   if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
24745     return true;
24746   if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
24747     return true;
24748   if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
24749     return true;
24750   return false;
24751 }
24752
24753 /* Target hook for c_mode_for_suffix.  */
24754 static enum machine_mode
24755 ix86_c_mode_for_suffix (char suffix)
24756 {
24757   if (TARGET_64BIT && suffix == 'q')
24758     return TFmode;
24759   if (TARGET_MMX && suffix == 'w')
24760     return XFmode;
24761
24762   return VOIDmode;
24763 }
24764
24765 /* Worker function for TARGET_MD_ASM_CLOBBERS.
24766
24767    We do this in the new i386 backend to maintain source compatibility
24768    with the old cc0-based compiler.  */
24769
24770 static tree
24771 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
24772                       tree inputs ATTRIBUTE_UNUSED,
24773                       tree clobbers)
24774 {
24775   clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
24776                         clobbers);
24777   clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
24778                         clobbers);
24779   return clobbers;
24780 }
24781
24782 /* Implements target vector targetm.asm.encode_section_info.  This
24783    is not used by netware.  */
24784
24785 static void ATTRIBUTE_UNUSED
24786 ix86_encode_section_info (tree decl, rtx rtl, int first)
24787 {
24788   default_encode_section_info (decl, rtl, first);
24789
24790   if (TREE_CODE (decl) == VAR_DECL
24791       && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
24792       && ix86_in_large_data_p (decl))
24793     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
24794 }
24795
24796 /* Worker function for REVERSE_CONDITION.  */
24797
24798 enum rtx_code
24799 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
24800 {
24801   return (mode != CCFPmode && mode != CCFPUmode
24802           ? reverse_condition (code)
24803           : reverse_condition_maybe_unordered (code));
24804 }
24805
24806 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24807    to OPERANDS[0].  */
24808
24809 const char *
24810 output_387_reg_move (rtx insn, rtx *operands)
24811 {
24812   if (REG_P (operands[0]))
24813     {
24814       if (REG_P (operands[1])
24815           && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24816         {
24817           if (REGNO (operands[0]) == FIRST_STACK_REG)
24818             return output_387_ffreep (operands, 0);
24819           return "fstp\t%y0";
24820         }
24821       if (STACK_TOP_P (operands[0]))
24822         return "fld%z1\t%y1";
24823       return "fst\t%y0";
24824     }
24825   else if (MEM_P (operands[0]))
24826     {
24827       gcc_assert (REG_P (operands[1]));
24828       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24829         return "fstp%z0\t%y0";
24830       else
24831         {
24832           /* There is no non-popping store to memory for XFmode.
24833              So if we need one, follow the store with a load.  */
24834           if (GET_MODE (operands[0]) == XFmode)
24835             return "fstp%z0\t%y0\n\tfld%z0\t%y0";
24836           else
24837             return "fst%z0\t%y0";
24838         }
24839     }
24840   else
24841     gcc_unreachable();
24842 }
24843
24844 /* Output code to perform a conditional jump to LABEL, if C2 flag in
24845    FP status register is set.  */
24846
24847 void
24848 ix86_emit_fp_unordered_jump (rtx label)
24849 {
24850   rtx reg = gen_reg_rtx (HImode);
24851   rtx temp;
24852
24853   emit_insn (gen_x86_fnstsw_1 (reg));
24854
24855   if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
24856     {
24857       emit_insn (gen_x86_sahf_1 (reg));
24858
24859       temp = gen_rtx_REG (CCmode, FLAGS_REG);
24860       temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
24861     }
24862   else
24863     {
24864       emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
24865
24866       temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24867       temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
24868     }
24869
24870   temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
24871                               gen_rtx_LABEL_REF (VOIDmode, label),
24872                               pc_rtx);
24873   temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
24874
24875   emit_jump_insn (temp);
24876   predict_jump (REG_BR_PROB_BASE * 10 / 100);
24877 }
24878
24879 /* Output code to perform a log1p XFmode calculation.  */
24880
24881 void ix86_emit_i387_log1p (rtx op0, rtx op1)
24882 {
24883   rtx label1 = gen_label_rtx ();
24884   rtx label2 = gen_label_rtx ();
24885
24886   rtx tmp = gen_reg_rtx (XFmode);
24887   rtx tmp2 = gen_reg_rtx (XFmode);
24888
24889   emit_insn (gen_absxf2 (tmp, op1));
24890   emit_insn (gen_cmpxf (tmp,
24891     CONST_DOUBLE_FROM_REAL_VALUE (
24892        REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
24893        XFmode)));
24894   emit_jump_insn (gen_bge (label1));
24895
24896   emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24897   emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
24898   emit_jump (label2);
24899
24900   emit_label (label1);
24901   emit_move_insn (tmp, CONST1_RTX (XFmode));
24902   emit_insn (gen_addxf3 (tmp, op1, tmp));
24903   emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24904   emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
24905
24906   emit_label (label2);
24907 }
24908
24909 /* Output code to perform a Newton-Rhapson approximation of a single precision
24910    floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm].  */
24911
24912 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
24913 {
24914   rtx x0, x1, e0, e1, two;
24915
24916   x0 = gen_reg_rtx (mode);
24917   e0 = gen_reg_rtx (mode);
24918   e1 = gen_reg_rtx (mode);
24919   x1 = gen_reg_rtx (mode);
24920
24921   two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
24922
24923   if (VECTOR_MODE_P (mode))
24924     two = ix86_build_const_vector (SFmode, true, two);
24925
24926   two = force_reg (mode, two);
24927
24928   /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24929
24930   /* x0 = rcp(b) estimate */
24931   emit_insn (gen_rtx_SET (VOIDmode, x0,
24932                           gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
24933                                           UNSPEC_RCP)));
24934   /* e0 = x0 * b */
24935   emit_insn (gen_rtx_SET (VOIDmode, e0,
24936                           gen_rtx_MULT (mode, x0, b)));
24937   /* e1 = 2. - e0 */
24938   emit_insn (gen_rtx_SET (VOIDmode, e1,
24939                           gen_rtx_MINUS (mode, two, e0)));
24940   /* x1 = x0 * e1 */
24941   emit_insn (gen_rtx_SET (VOIDmode, x1,
24942                           gen_rtx_MULT (mode, x0, e1)));
24943   /* res = a * x1 */
24944   emit_insn (gen_rtx_SET (VOIDmode, res,
24945                           gen_rtx_MULT (mode, a, x1)));
24946 }
24947
24948 /* Output code to perform a Newton-Rhapson approximation of a
24949    single precision floating point [reciprocal] square root.  */
24950
24951 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
24952                          bool recip)
24953 {
24954   rtx x0, e0, e1, e2, e3, mthree, mhalf;
24955   REAL_VALUE_TYPE r;
24956
24957   x0 = gen_reg_rtx (mode);
24958   e0 = gen_reg_rtx (mode);
24959   e1 = gen_reg_rtx (mode);
24960   e2 = gen_reg_rtx (mode);
24961   e3 = gen_reg_rtx (mode);
24962
24963   real_from_integer (&r, VOIDmode, -3, -1, 0);
24964   mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24965
24966   real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
24967   mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24968
24969   if (VECTOR_MODE_P (mode))
24970     {
24971       mthree = ix86_build_const_vector (SFmode, true, mthree);
24972       mhalf = ix86_build_const_vector (SFmode, true, mhalf);
24973     }
24974
24975   /* sqrt(a)  = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
24976      rsqrt(a) = -0.5     * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
24977
24978   /* x0 = rsqrt(a) estimate */
24979   emit_insn (gen_rtx_SET (VOIDmode, x0,
24980                           gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
24981                                           UNSPEC_RSQRT)));
24982
24983   /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0).  */
24984   if (!recip)
24985     {
24986       rtx zero, mask;
24987
24988       zero = gen_reg_rtx (mode);
24989       mask = gen_reg_rtx (mode);
24990
24991       zero = force_reg (mode, CONST0_RTX(mode));
24992       emit_insn (gen_rtx_SET (VOIDmode, mask,
24993                               gen_rtx_NE (mode, zero, a)));
24994
24995       emit_insn (gen_rtx_SET (VOIDmode, x0,
24996                               gen_rtx_AND (mode, x0, mask)));
24997     }
24998
24999   /* e0 = x0 * a */
25000   emit_insn (gen_rtx_SET (VOIDmode, e0,
25001                           gen_rtx_MULT (mode, x0, a)));
25002   /* e1 = e0 * x0 */
25003   emit_insn (gen_rtx_SET (VOIDmode, e1,
25004                           gen_rtx_MULT (mode, e0, x0)));
25005
25006   /* e2 = e1 - 3. */
25007   mthree = force_reg (mode, mthree);
25008   emit_insn (gen_rtx_SET (VOIDmode, e2,
25009                           gen_rtx_PLUS (mode, e1, mthree)));
25010
25011   mhalf = force_reg (mode, mhalf);
25012   if (recip)
25013     /* e3 = -.5 * x0 */
25014     emit_insn (gen_rtx_SET (VOIDmode, e3,
25015                             gen_rtx_MULT (mode, x0, mhalf)));
25016   else
25017     /* e3 = -.5 * e0 */
25018     emit_insn (gen_rtx_SET (VOIDmode, e3,
25019                             gen_rtx_MULT (mode, e0, mhalf)));
25020   /* ret = e2 * e3 */
25021   emit_insn (gen_rtx_SET (VOIDmode, res,
25022                           gen_rtx_MULT (mode, e2, e3)));
25023 }
25024
25025 /* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
25026
25027 static void ATTRIBUTE_UNUSED
25028 i386_solaris_elf_named_section (const char *name, unsigned int flags,
25029                                 tree decl)
25030 {
25031   /* With Binutils 2.15, the "@unwind" marker must be specified on
25032      every occurrence of the ".eh_frame" section, not just the first
25033      one.  */
25034   if (TARGET_64BIT
25035       && strcmp (name, ".eh_frame") == 0)
25036     {
25037       fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
25038                flags & SECTION_WRITE ? "aw" : "a");
25039       return;
25040     }
25041   default_elf_asm_named_section (name, flags, decl);
25042 }
25043
25044 /* Return the mangling of TYPE if it is an extended fundamental type.  */
25045
25046 static const char *
25047 ix86_mangle_type (const_tree type)
25048 {
25049   type = TYPE_MAIN_VARIANT (type);
25050
25051   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
25052       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
25053     return NULL;
25054
25055   switch (TYPE_MODE (type))
25056     {
25057     case TFmode:
25058       /* __float128 is "g".  */
25059       return "g";
25060     case XFmode:
25061       /* "long double" or __float80 is "e".  */
25062       return "e";
25063     default:
25064       return NULL;
25065     }
25066 }
25067
25068 /* For 32-bit code we can save PIC register setup by using
25069    __stack_chk_fail_local hidden function instead of calling
25070    __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
25071    register, so it is better to call __stack_chk_fail directly.  */
25072
25073 static tree
25074 ix86_stack_protect_fail (void)
25075 {
25076   return TARGET_64BIT
25077          ? default_external_stack_protect_fail ()
25078          : default_hidden_stack_protect_fail ();
25079 }
25080
25081 /* Select a format to encode pointers in exception handling data.  CODE
25082    is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
25083    true if the symbol may be affected by dynamic relocations.
25084
25085    ??? All x86 object file formats are capable of representing this.
25086    After all, the relocation needed is the same as for the call insn.
25087    Whether or not a particular assembler allows us to enter such, I
25088    guess we'll have to see.  */
25089 int
25090 asm_preferred_eh_data_format (int code, int global)
25091 {
25092   if (flag_pic)
25093     {
25094       int type = DW_EH_PE_sdata8;
25095       if (!TARGET_64BIT
25096           || ix86_cmodel == CM_SMALL_PIC
25097           || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
25098         type = DW_EH_PE_sdata4;
25099       return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
25100     }
25101   if (ix86_cmodel == CM_SMALL
25102       || (ix86_cmodel == CM_MEDIUM && code))
25103     return DW_EH_PE_udata4;
25104   return DW_EH_PE_absptr;
25105 }
25106 \f
25107 /* Expand copysign from SIGN to the positive value ABS_VALUE
25108    storing in RESULT.  If MASK is non-null, it shall be a mask to mask out
25109    the sign-bit.  */
25110 static void
25111 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
25112 {
25113   enum machine_mode mode = GET_MODE (sign);
25114   rtx sgn = gen_reg_rtx (mode);
25115   if (mask == NULL_RTX)
25116     {
25117       mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
25118       if (!VECTOR_MODE_P (mode))
25119         {
25120           /* We need to generate a scalar mode mask in this case.  */
25121           rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
25122           tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
25123           mask = gen_reg_rtx (mode);
25124           emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
25125         }
25126     }
25127   else
25128     mask = gen_rtx_NOT (mode, mask);
25129   emit_insn (gen_rtx_SET (VOIDmode, sgn,
25130                           gen_rtx_AND (mode, mask, sign)));
25131   emit_insn (gen_rtx_SET (VOIDmode, result,
25132                           gen_rtx_IOR (mode, abs_value, sgn)));
25133 }
25134
25135 /* Expand fabs (OP0) and return a new rtx that holds the result.  The
25136    mask for masking out the sign-bit is stored in *SMASK, if that is
25137    non-null.  */
25138 static rtx
25139 ix86_expand_sse_fabs (rtx op0, rtx *smask)
25140 {
25141   enum machine_mode mode = GET_MODE (op0);
25142   rtx xa, mask;
25143
25144   xa = gen_reg_rtx (mode);
25145   mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
25146   if (!VECTOR_MODE_P (mode))
25147     {
25148       /* We need to generate a scalar mode mask in this case.  */
25149       rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
25150       tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
25151       mask = gen_reg_rtx (mode);
25152       emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
25153     }
25154   emit_insn (gen_rtx_SET (VOIDmode, xa,
25155                           gen_rtx_AND (mode, op0, mask)));
25156
25157   if (smask)
25158     *smask = mask;
25159
25160   return xa;
25161 }
25162
25163 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
25164    swapping the operands if SWAP_OPERANDS is true.  The expanded
25165    code is a forward jump to a newly created label in case the
25166    comparison is true.  The generated label rtx is returned.  */
25167 static rtx
25168 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
25169                                   bool swap_operands)
25170 {
25171   rtx label, tmp;
25172
25173   if (swap_operands)
25174     {
25175       tmp = op0;
25176       op0 = op1;
25177       op1 = tmp;
25178     }
25179
25180   label = gen_label_rtx ();
25181   tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
25182   emit_insn (gen_rtx_SET (VOIDmode, tmp,
25183                           gen_rtx_COMPARE (CCFPUmode, op0, op1)));
25184   tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
25185   tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25186                               gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
25187   tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25188   JUMP_LABEL (tmp) = label;
25189
25190   return label;
25191 }
25192
25193 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
25194    using comparison code CODE.  Operands are swapped for the comparison if
25195    SWAP_OPERANDS is true.  Returns a rtx for the generated mask.  */
25196 static rtx
25197 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
25198                               bool swap_operands)
25199 {
25200   enum machine_mode mode = GET_MODE (op0);
25201   rtx mask = gen_reg_rtx (mode);
25202
25203   if (swap_operands)
25204     {
25205       rtx tmp = op0;
25206       op0 = op1;
25207       op1 = tmp;
25208     }
25209
25210   if (mode == DFmode)
25211     emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
25212                                     gen_rtx_fmt_ee (code, mode, op0, op1)));
25213   else
25214     emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
25215                                    gen_rtx_fmt_ee (code, mode, op0, op1)));
25216
25217   return mask;
25218 }
25219
25220 /* Generate and return a rtx of mode MODE for 2**n where n is the number
25221    of bits of the mantissa of MODE, which must be one of DFmode or SFmode.  */
25222 static rtx
25223 ix86_gen_TWO52 (enum machine_mode mode)
25224 {
25225   REAL_VALUE_TYPE TWO52r;
25226   rtx TWO52;
25227
25228   real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
25229   TWO52 = const_double_from_real_value (TWO52r, mode);
25230   TWO52 = force_reg (mode, TWO52);
25231
25232   return TWO52;
25233 }
25234
25235 /* Expand SSE sequence for computing lround from OP1 storing
25236    into OP0.  */
25237 void
25238 ix86_expand_lround (rtx op0, rtx op1)
25239 {
25240   /* C code for the stuff we're doing below:
25241        tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
25242        return (long)tmp;
25243    */
25244   enum machine_mode mode = GET_MODE (op1);
25245   const struct real_format *fmt;
25246   REAL_VALUE_TYPE pred_half, half_minus_pred_half;
25247   rtx adj;
25248
25249   /* load nextafter (0.5, 0.0) */
25250   fmt = REAL_MODE_FORMAT (mode);
25251   real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
25252   REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
25253
25254   /* adj = copysign (0.5, op1) */
25255   adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
25256   ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
25257
25258   /* adj = op1 + adj */
25259   adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
25260
25261   /* op0 = (imode)adj */
25262   expand_fix (op0, adj, 0);
25263 }
25264
25265 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
25266    into OPERAND0.  */
25267 void
25268 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
25269 {
25270   /* C code for the stuff we're doing below (for do_floor):
25271         xi = (long)op1;
25272         xi -= (double)xi > op1 ? 1 : 0;
25273         return xi;
25274    */
25275   enum machine_mode fmode = GET_MODE (op1);
25276   enum machine_mode imode = GET_MODE (op0);
25277   rtx ireg, freg, label, tmp;
25278
25279   /* reg = (long)op1 */
25280   ireg = gen_reg_rtx (imode);
25281   expand_fix (ireg, op1, 0);
25282
25283   /* freg = (double)reg */
25284   freg = gen_reg_rtx (fmode);
25285   expand_float (freg, ireg, 0);
25286
25287   /* ireg = (freg > op1) ? ireg - 1 : ireg */
25288   label = ix86_expand_sse_compare_and_jump (UNLE,
25289                                             freg, op1, !do_floor);
25290   tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
25291                              ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
25292   emit_move_insn (ireg, tmp);
25293
25294   emit_label (label);
25295   LABEL_NUSES (label) = 1;
25296
25297   emit_move_insn (op0, ireg);
25298 }
25299
25300 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
25301    result in OPERAND0.  */
25302 void
25303 ix86_expand_rint (rtx operand0, rtx operand1)
25304 {
25305   /* C code for the stuff we're doing below:
25306         xa = fabs (operand1);
25307         if (!isless (xa, 2**52))
25308           return operand1;
25309         xa = xa + 2**52 - 2**52;
25310         return copysign (xa, operand1);
25311    */
25312   enum machine_mode mode = GET_MODE (operand0);
25313   rtx res, xa, label, TWO52, mask;
25314
25315   res = gen_reg_rtx (mode);
25316   emit_move_insn (res, operand1);
25317
25318   /* xa = abs (operand1) */
25319   xa = ix86_expand_sse_fabs (res, &mask);
25320
25321   /* if (!isless (xa, TWO52)) goto label; */
25322   TWO52 = ix86_gen_TWO52 (mode);
25323   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25324
25325   xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25326   xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
25327
25328   ix86_sse_copysign_to_positive (res, xa, res, mask);
25329
25330   emit_label (label);
25331   LABEL_NUSES (label) = 1;
25332
25333   emit_move_insn (operand0, res);
25334 }
25335
25336 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
25337    into OPERAND0.  */
25338 void
25339 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
25340 {
25341   /* C code for the stuff we expand below.
25342         double xa = fabs (x), x2;
25343         if (!isless (xa, TWO52))
25344           return x;
25345         xa = xa + TWO52 - TWO52;
25346         x2 = copysign (xa, x);
25347      Compensate.  Floor:
25348         if (x2 > x)
25349           x2 -= 1;
25350      Compensate.  Ceil:
25351         if (x2 < x)
25352           x2 -= -1;
25353         return x2;
25354    */
25355   enum machine_mode mode = GET_MODE (operand0);
25356   rtx xa, TWO52, tmp, label, one, res, mask;
25357
25358   TWO52 = ix86_gen_TWO52 (mode);
25359
25360   /* Temporary for holding the result, initialized to the input
25361      operand to ease control flow.  */
25362   res = gen_reg_rtx (mode);
25363   emit_move_insn (res, operand1);
25364
25365   /* xa = abs (operand1) */
25366   xa = ix86_expand_sse_fabs (res, &mask);
25367
25368   /* if (!isless (xa, TWO52)) goto label; */
25369   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25370
25371   /* xa = xa + TWO52 - TWO52; */
25372   xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25373   xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
25374
25375   /* xa = copysign (xa, operand1) */
25376   ix86_sse_copysign_to_positive (xa, xa, res, mask);
25377
25378   /* generate 1.0 or -1.0 */
25379   one = force_reg (mode,
25380                    const_double_from_real_value (do_floor
25381                                                  ? dconst1 : dconstm1, mode));
25382
25383   /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25384   tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25385   emit_insn (gen_rtx_SET (VOIDmode, tmp,
25386                           gen_rtx_AND (mode, one, tmp)));
25387   /* We always need to subtract here to preserve signed zero.  */
25388   tmp = expand_simple_binop (mode, MINUS,
25389                              xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25390   emit_move_insn (res, tmp);
25391
25392   emit_label (label);
25393   LABEL_NUSES (label) = 1;
25394
25395   emit_move_insn (operand0, res);
25396 }
25397
25398 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
25399    into OPERAND0.  */
25400 void
25401 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
25402 {
25403   /* C code for the stuff we expand below.
25404         double xa = fabs (x), x2;
25405         if (!isless (xa, TWO52))
25406           return x;
25407         x2 = (double)(long)x;
25408      Compensate.  Floor:
25409         if (x2 > x)
25410           x2 -= 1;
25411      Compensate.  Ceil:
25412         if (x2 < x)
25413           x2 += 1;
25414         if (HONOR_SIGNED_ZEROS (mode))
25415           return copysign (x2, x);
25416         return x2;
25417    */
25418   enum machine_mode mode = GET_MODE (operand0);
25419   rtx xa, xi, TWO52, tmp, label, one, res, mask;
25420
25421   TWO52 = ix86_gen_TWO52 (mode);
25422
25423   /* Temporary for holding the result, initialized to the input
25424      operand to ease control flow.  */
25425   res = gen_reg_rtx (mode);
25426   emit_move_insn (res, operand1);
25427
25428   /* xa = abs (operand1) */
25429   xa = ix86_expand_sse_fabs (res, &mask);
25430
25431   /* if (!isless (xa, TWO52)) goto label; */
25432   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25433
25434   /* xa = (double)(long)x */
25435   xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25436   expand_fix (xi, res, 0);
25437   expand_float (xa, xi, 0);
25438
25439   /* generate 1.0 */
25440   one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25441
25442   /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25443   tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25444   emit_insn (gen_rtx_SET (VOIDmode, tmp,
25445                           gen_rtx_AND (mode, one, tmp)));
25446   tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
25447                              xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25448   emit_move_insn (res, tmp);
25449
25450   if (HONOR_SIGNED_ZEROS (mode))
25451     ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25452
25453   emit_label (label);
25454   LABEL_NUSES (label) = 1;
25455
25456   emit_move_insn (operand0, res);
25457 }
25458
25459 /* Expand SSE sequence for computing round from OPERAND1 storing
25460    into OPERAND0.  Sequence that works without relying on DImode truncation
25461    via cvttsd2siq that is only available on 64bit targets.  */
25462 void
25463 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
25464 {
25465   /* C code for the stuff we expand below.
25466         double xa = fabs (x), xa2, x2;
25467         if (!isless (xa, TWO52))
25468           return x;
25469      Using the absolute value and copying back sign makes
25470      -0.0 -> -0.0 correct.
25471         xa2 = xa + TWO52 - TWO52;
25472      Compensate.
25473         dxa = xa2 - xa;
25474         if (dxa <= -0.5)
25475           xa2 += 1;
25476         else if (dxa > 0.5)
25477           xa2 -= 1;
25478         x2 = copysign (xa2, x);
25479         return x2;
25480    */
25481   enum machine_mode mode = GET_MODE (operand0);
25482   rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
25483
25484   TWO52 = ix86_gen_TWO52 (mode);
25485
25486   /* Temporary for holding the result, initialized to the input
25487      operand to ease control flow.  */
25488   res = gen_reg_rtx (mode);
25489   emit_move_insn (res, operand1);
25490
25491   /* xa = abs (operand1) */
25492   xa = ix86_expand_sse_fabs (res, &mask);
25493
25494   /* if (!isless (xa, TWO52)) goto label; */
25495   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25496
25497   /* xa2 = xa + TWO52 - TWO52; */
25498   xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25499   xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
25500
25501   /* dxa = xa2 - xa; */
25502   dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
25503
25504   /* generate 0.5, 1.0 and -0.5 */
25505   half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
25506   one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
25507   mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
25508                                0, OPTAB_DIRECT);
25509
25510   /* Compensate.  */
25511   tmp = gen_reg_rtx (mode);
25512   /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
25513   tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
25514   emit_insn (gen_rtx_SET (VOIDmode, tmp,
25515                           gen_rtx_AND (mode, one, tmp)));
25516   xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25517   /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
25518   tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
25519   emit_insn (gen_rtx_SET (VOIDmode, tmp,
25520                           gen_rtx_AND (mode, one, tmp)));
25521   xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25522
25523   /* res = copysign (xa2, operand1) */
25524   ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
25525
25526   emit_label (label);
25527   LABEL_NUSES (label) = 1;
25528
25529   emit_move_insn (operand0, res);
25530 }
25531
25532 /* Expand SSE sequence for computing trunc from OPERAND1 storing
25533    into OPERAND0.  */
25534 void
25535 ix86_expand_trunc (rtx operand0, rtx operand1)
25536 {
25537   /* C code for SSE variant we expand below.
25538         double xa = fabs (x), x2;
25539         if (!isless (xa, TWO52))
25540           return x;
25541         x2 = (double)(long)x;
25542         if (HONOR_SIGNED_ZEROS (mode))
25543           return copysign (x2, x);
25544         return x2;
25545    */
25546   enum machine_mode mode = GET_MODE (operand0);
25547   rtx xa, xi, TWO52, label, res, mask;
25548
25549   TWO52 = ix86_gen_TWO52 (mode);
25550
25551   /* Temporary for holding the result, initialized to the input
25552      operand to ease control flow.  */
25553   res = gen_reg_rtx (mode);
25554   emit_move_insn (res, operand1);
25555
25556   /* xa = abs (operand1) */
25557   xa = ix86_expand_sse_fabs (res, &mask);
25558
25559   /* if (!isless (xa, TWO52)) goto label; */
25560   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25561
25562   /* x = (double)(long)x */
25563   xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25564   expand_fix (xi, res, 0);
25565   expand_float (res, xi, 0);
25566
25567   if (HONOR_SIGNED_ZEROS (mode))
25568     ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25569
25570   emit_label (label);
25571   LABEL_NUSES (label) = 1;
25572
25573   emit_move_insn (operand0, res);
25574 }
25575
25576 /* Expand SSE sequence for computing trunc from OPERAND1 storing
25577    into OPERAND0.  */
25578 void
25579 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
25580 {
25581   enum machine_mode mode = GET_MODE (operand0);
25582   rtx xa, mask, TWO52, label, one, res, smask, tmp;
25583
25584   /* C code for SSE variant we expand below.
25585         double xa = fabs (x), x2;
25586         if (!isless (xa, TWO52))
25587           return x;
25588         xa2 = xa + TWO52 - TWO52;
25589      Compensate:
25590         if (xa2 > xa)
25591           xa2 -= 1.0;
25592         x2 = copysign (xa2, x);
25593         return x2;
25594    */
25595
25596   TWO52 = ix86_gen_TWO52 (mode);
25597
25598   /* Temporary for holding the result, initialized to the input
25599      operand to ease control flow.  */
25600   res = gen_reg_rtx (mode);
25601   emit_move_insn (res, operand1);
25602
25603   /* xa = abs (operand1) */
25604   xa = ix86_expand_sse_fabs (res, &smask);
25605
25606   /* if (!isless (xa, TWO52)) goto label; */
25607   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25608
25609   /* res = xa + TWO52 - TWO52; */
25610   tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25611   tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
25612   emit_move_insn (res, tmp);
25613
25614   /* generate 1.0 */
25615   one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25616
25617   /* Compensate: res = xa2 - (res > xa ? 1 : 0)  */
25618   mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
25619   emit_insn (gen_rtx_SET (VOIDmode, mask,
25620                           gen_rtx_AND (mode, mask, one)));
25621   tmp = expand_simple_binop (mode, MINUS,
25622                              res, mask, NULL_RTX, 0, OPTAB_DIRECT);
25623   emit_move_insn (res, tmp);
25624
25625   /* res = copysign (res, operand1) */
25626   ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
25627
25628   emit_label (label);
25629   LABEL_NUSES (label) = 1;
25630
25631   emit_move_insn (operand0, res);
25632 }
25633
25634 /* Expand SSE sequence for computing round from OPERAND1 storing
25635    into OPERAND0.  */
25636 void
25637 ix86_expand_round (rtx operand0, rtx operand1)
25638 {
25639   /* C code for the stuff we're doing below:
25640         double xa = fabs (x);
25641         if (!isless (xa, TWO52))
25642           return x;
25643         xa = (double)(long)(xa + nextafter (0.5, 0.0));
25644         return copysign (xa, x);
25645    */
25646   enum machine_mode mode = GET_MODE (operand0);
25647   rtx res, TWO52, xa, label, xi, half, mask;
25648   const struct real_format *fmt;
25649   REAL_VALUE_TYPE pred_half, half_minus_pred_half;
25650
25651   /* Temporary for holding the result, initialized to the input
25652      operand to ease control flow.  */
25653   res = gen_reg_rtx (mode);
25654   emit_move_insn (res, operand1);
25655
25656   TWO52 = ix86_gen_TWO52 (mode);
25657   xa = ix86_expand_sse_fabs (res, &mask);
25658   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25659
25660   /* load nextafter (0.5, 0.0) */
25661   fmt = REAL_MODE_FORMAT (mode);
25662   real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
25663   REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
25664
25665   /* xa = xa + 0.5 */
25666   half = force_reg (mode, const_double_from_real_value (pred_half, mode));
25667   xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
25668
25669   /* xa = (double)(int64_t)xa */
25670   xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25671   expand_fix (xi, xa, 0);
25672   expand_float (xa, xi, 0);
25673
25674   /* res = copysign (xa, operand1) */
25675   ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
25676
25677   emit_label (label);
25678   LABEL_NUSES (label) = 1;
25679
25680   emit_move_insn (operand0, res);
25681 }
25682
25683 \f
25684 /* Validate whether a SSE5 instruction is valid or not.
25685    OPERANDS is the array of operands.
25686    NUM is the number of operands.
25687    USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
25688    NUM_MEMORY is the maximum number of memory operands to accept.  */
25689
25690 bool
25691 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
25692                       bool uses_oc0, int num_memory)
25693 {
25694   int mem_mask;
25695   int mem_count;
25696   int i;
25697
25698   /* Count the number of memory arguments */
25699   mem_mask = 0;
25700   mem_count = 0;
25701   for (i = 0; i < num; i++)
25702     {
25703       enum machine_mode mode = GET_MODE (operands[i]);
25704       if (register_operand (operands[i], mode))
25705         ;
25706
25707       else if (memory_operand (operands[i], mode))
25708         {
25709           mem_mask |= (1 << i);
25710           mem_count++;
25711         }
25712
25713       else
25714         {
25715           rtx pattern = PATTERN (insn);
25716
25717           /* allow 0 for pcmov */
25718           if (GET_CODE (pattern) != SET
25719               || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
25720               || i < 2
25721               || operands[i] != CONST0_RTX (mode))
25722             return false;
25723         }
25724     }
25725
25726   /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
25727      a memory operation.  */
25728   if (num_memory < 0)
25729     {
25730       num_memory = -num_memory;
25731       if ((mem_mask & (1 << (num-1))) != 0)
25732         {
25733           mem_mask &= ~(1 << (num-1));
25734           mem_count--;
25735         }
25736     }
25737
25738   /* If there were no memory operations, allow the insn */
25739   if (mem_mask == 0)
25740     return true;
25741
25742   /* Do not allow the destination register to be a memory operand.  */
25743   else if (mem_mask & (1 << 0))
25744     return false;
25745
25746   /* If there are too many memory operations, disallow the instruction.  While
25747      the hardware only allows 1 memory reference, before register allocation
25748      for some insns, we allow two memory operations sometimes in order to allow
25749      code like the following to be optimized:
25750
25751         float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
25752
25753     or similar cases that are vectorized into using the fmaddss
25754     instruction.  */
25755   else if (mem_count > num_memory)
25756     return false;
25757
25758   /* Don't allow more than one memory operation if not optimizing.  */
25759   else if (mem_count > 1 && !optimize)
25760     return false;
25761
25762   else if (num == 4 && mem_count == 1)
25763     {
25764       /* formats (destination is the first argument), example fmaddss:
25765          xmm1, xmm1, xmm2, xmm3/mem
25766          xmm1, xmm1, xmm2/mem, xmm3
25767          xmm1, xmm2, xmm3/mem, xmm1
25768          xmm1, xmm2/mem, xmm3, xmm1 */
25769       if (uses_oc0)
25770         return ((mem_mask == (1 << 1))
25771                 || (mem_mask == (1 << 2))
25772                 || (mem_mask == (1 << 3)));
25773
25774       /* format, example pmacsdd:
25775          xmm1, xmm2, xmm3/mem, xmm1 */
25776       else
25777         return (mem_mask == (1 << 2));
25778     }
25779
25780   else if (num == 4 && num_memory == 2)
25781     {
25782       /* If there are two memory operations, we can load one of the memory ops
25783          into the destination register.  This is for optimizing the
25784          multiply/add ops, which the combiner has optimized both the multiply
25785          and the add insns to have a memory operation.  We have to be careful
25786          that the destination doesn't overlap with the inputs.  */
25787       rtx op0 = operands[0];
25788
25789       if (reg_mentioned_p (op0, operands[1])
25790           || reg_mentioned_p (op0, operands[2])
25791           || reg_mentioned_p (op0, operands[3]))
25792         return false;
25793
25794       /* formats (destination is the first argument), example fmaddss:
25795          xmm1, xmm1, xmm2, xmm3/mem
25796          xmm1, xmm1, xmm2/mem, xmm3
25797          xmm1, xmm2, xmm3/mem, xmm1
25798          xmm1, xmm2/mem, xmm3, xmm1
25799
25800          For the oc0 case, we will load either operands[1] or operands[3] into
25801          operands[0], so any combination of 2 memory operands is ok.  */
25802       if (uses_oc0)
25803         return true;
25804
25805       /* format, example pmacsdd:
25806          xmm1, xmm2, xmm3/mem, xmm1
25807
25808          For the integer multiply/add instructions be more restrictive and
25809          require operands[2] and operands[3] to be the memory operands.  */
25810       else
25811         return (mem_mask == ((1 << 2) | (1 << 3)));
25812     }
25813
25814   else if (num == 3 && num_memory == 1)
25815     {
25816       /* formats, example protb:
25817          xmm1, xmm2, xmm3/mem
25818          xmm1, xmm2/mem, xmm3 */
25819       if (uses_oc0)
25820         return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
25821
25822       /* format, example comeq:
25823          xmm1, xmm2, xmm3/mem */
25824       else
25825         return (mem_mask == (1 << 2));
25826     }
25827
25828   else
25829     gcc_unreachable ();
25830
25831   return false;
25832 }
25833
25834 \f
25835 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
25836    hardware will allow by using the destination register to load one of the
25837    memory operations.  Presently this is used by the multiply/add routines to
25838    allow 2 memory references.  */
25839
25840 void
25841 ix86_expand_sse5_multiple_memory (rtx operands[],
25842                                   int num,
25843                                   enum machine_mode mode)
25844 {
25845   rtx op0 = operands[0];
25846   if (num != 4
25847       || memory_operand (op0, mode)
25848       || reg_mentioned_p (op0, operands[1])
25849       || reg_mentioned_p (op0, operands[2])
25850       || reg_mentioned_p (op0, operands[3]))
25851     gcc_unreachable ();
25852
25853   /* For 2 memory operands, pick either operands[1] or operands[3] to move into
25854      the destination register.  */
25855   if (memory_operand (operands[1], mode))
25856     {
25857       emit_move_insn (op0, operands[1]);
25858       operands[1] = op0;
25859     }
25860   else if (memory_operand (operands[3], mode))
25861     {
25862       emit_move_insn (op0, operands[3]);
25863       operands[3] = op0;
25864     }
25865   else
25866     gcc_unreachable ();
25867
25868   return;
25869 }
25870
25871 \f
25872 /* Table of valid machine attributes.  */
25873 static const struct attribute_spec ix86_attribute_table[] =
25874 {
25875   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25876   /* Stdcall attribute says callee is responsible for popping arguments
25877      if they are not variable.  */
25878   { "stdcall",   0, 0, false, true,  true,  ix86_handle_cconv_attribute },
25879   /* Fastcall attribute says callee is responsible for popping arguments
25880      if they are not variable.  */
25881   { "fastcall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute },
25882   /* Cdecl attribute says the callee is a normal C declaration */
25883   { "cdecl",     0, 0, false, true,  true,  ix86_handle_cconv_attribute },
25884   /* Regparm attribute specifies how many integer arguments are to be
25885      passed in registers.  */
25886   { "regparm",   1, 1, false, true,  true,  ix86_handle_cconv_attribute },
25887   /* Sseregparm attribute says we are using x86_64 calling conventions
25888      for FP arguments.  */
25889   { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25890   /* force_align_arg_pointer says this function realigns the stack at entry.  */
25891   { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
25892     false, true,  true, ix86_handle_cconv_attribute },
25893 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25894   { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
25895   { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
25896   { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
25897 #endif
25898   { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
25899   { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
25900 #ifdef SUBTARGET_ATTRIBUTE_TABLE
25901   SUBTARGET_ATTRIBUTE_TABLE,
25902 #endif
25903   { NULL,        0, 0, false, false, false, NULL }
25904 };
25905
25906 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
25907 static int
25908 x86_builtin_vectorization_cost (bool runtime_test)
25909 {
25910   /* If the branch of the runtime test is taken - i.e. - the vectorized
25911      version is skipped - this incurs a misprediction cost (because the
25912      vectorized version is expected to be the fall-through).  So we subtract
25913      the latency of a mispredicted branch from the costs that are incured
25914      when the vectorized version is executed.
25915
25916      TODO: The values in individual target tables have to be tuned or new
25917      fields may be needed. For eg. on K8, the default branch path is the
25918      not-taken path. If the taken path is predicted correctly, the minimum
25919      penalty of going down the taken-path is 1 cycle. If the taken-path is
25920      not predicted correctly, then the minimum penalty is 10 cycles.  */
25921
25922   if (runtime_test)
25923     {
25924       return (-(ix86_cost->cond_taken_branch_cost));
25925     }
25926   else
25927     return 0;
25928 }
25929
25930 /* Initialize the GCC target structure.  */
25931 #undef TARGET_RETURN_IN_MEMORY
25932 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
25933
25934 #undef TARGET_ATTRIBUTE_TABLE
25935 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25936 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25937 #  undef TARGET_MERGE_DECL_ATTRIBUTES
25938 #  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25939 #endif
25940
25941 #undef TARGET_COMP_TYPE_ATTRIBUTES
25942 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25943
25944 #undef TARGET_INIT_BUILTINS
25945 #define TARGET_INIT_BUILTINS ix86_init_builtins
25946 #undef TARGET_EXPAND_BUILTIN
25947 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25948
25949 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25950 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25951   ix86_builtin_vectorized_function
25952
25953 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
25954 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25955
25956 #undef TARGET_BUILTIN_RECIPROCAL
25957 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25958
25959 #undef TARGET_ASM_FUNCTION_EPILOGUE
25960 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25961
25962 #undef TARGET_ENCODE_SECTION_INFO
25963 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25964 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25965 #else
25966 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25967 #endif
25968
25969 #undef TARGET_ASM_OPEN_PAREN
25970 #define TARGET_ASM_OPEN_PAREN ""
25971 #undef TARGET_ASM_CLOSE_PAREN
25972 #define TARGET_ASM_CLOSE_PAREN ""
25973
25974 #undef TARGET_ASM_ALIGNED_HI_OP
25975 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25976 #undef TARGET_ASM_ALIGNED_SI_OP
25977 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25978 #ifdef ASM_QUAD
25979 #undef TARGET_ASM_ALIGNED_DI_OP
25980 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25981 #endif
25982
25983 #undef TARGET_ASM_UNALIGNED_HI_OP
25984 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25985 #undef TARGET_ASM_UNALIGNED_SI_OP
25986 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25987 #undef TARGET_ASM_UNALIGNED_DI_OP
25988 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25989
25990 #undef TARGET_SCHED_ADJUST_COST
25991 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25992 #undef TARGET_SCHED_ISSUE_RATE
25993 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25994 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25995 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25996   ia32_multipass_dfa_lookahead
25997
25998 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25999 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
26000
26001 #ifdef HAVE_AS_TLS
26002 #undef TARGET_HAVE_TLS
26003 #define TARGET_HAVE_TLS true
26004 #endif
26005 #undef TARGET_CANNOT_FORCE_CONST_MEM
26006 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
26007 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
26008 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
26009
26010 #undef TARGET_DELEGITIMIZE_ADDRESS
26011 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
26012
26013 #undef TARGET_MS_BITFIELD_LAYOUT_P
26014 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
26015
26016 #if TARGET_MACHO
26017 #undef TARGET_BINDS_LOCAL_P
26018 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
26019 #endif
26020 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
26021 #undef TARGET_BINDS_LOCAL_P
26022 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
26023 #endif
26024
26025 #undef TARGET_ASM_OUTPUT_MI_THUNK
26026 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
26027 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
26028 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
26029
26030 #undef TARGET_ASM_FILE_START
26031 #define TARGET_ASM_FILE_START x86_file_start
26032
26033 #undef TARGET_DEFAULT_TARGET_FLAGS
26034 #define TARGET_DEFAULT_TARGET_FLAGS     \
26035   (TARGET_DEFAULT                       \
26036    | TARGET_SUBTARGET_DEFAULT           \
26037    | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
26038
26039 #undef TARGET_HANDLE_OPTION
26040 #define TARGET_HANDLE_OPTION ix86_handle_option
26041
26042 #undef TARGET_RTX_COSTS
26043 #define TARGET_RTX_COSTS ix86_rtx_costs
26044 #undef TARGET_ADDRESS_COST
26045 #define TARGET_ADDRESS_COST ix86_address_cost
26046
26047 #undef TARGET_FIXED_CONDITION_CODE_REGS
26048 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
26049 #undef TARGET_CC_MODES_COMPATIBLE
26050 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
26051
26052 #undef TARGET_MACHINE_DEPENDENT_REORG
26053 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
26054
26055 #undef TARGET_BUILD_BUILTIN_VA_LIST
26056 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
26057
26058 #undef TARGET_EXPAND_BUILTIN_VA_START
26059 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
26060
26061 #undef TARGET_MD_ASM_CLOBBERS
26062 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
26063
26064 #undef TARGET_PROMOTE_PROTOTYPES
26065 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
26066 #undef TARGET_STRUCT_VALUE_RTX
26067 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
26068 #undef TARGET_SETUP_INCOMING_VARARGS
26069 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
26070 #undef TARGET_MUST_PASS_IN_STACK
26071 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
26072 #undef TARGET_PASS_BY_REFERENCE
26073 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
26074 #undef TARGET_INTERNAL_ARG_POINTER
26075 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
26076 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
26077 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
26078 #undef TARGET_STRICT_ARGUMENT_NAMING
26079 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
26080
26081 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
26082 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
26083
26084 #undef TARGET_SCALAR_MODE_SUPPORTED_P
26085 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
26086
26087 #undef TARGET_VECTOR_MODE_SUPPORTED_P
26088 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
26089
26090 #undef TARGET_C_MODE_FOR_SUFFIX
26091 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
26092
26093 #ifdef HAVE_AS_TLS
26094 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
26095 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
26096 #endif
26097
26098 #ifdef SUBTARGET_INSERT_ATTRIBUTES
26099 #undef TARGET_INSERT_ATTRIBUTES
26100 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
26101 #endif
26102
26103 #undef TARGET_MANGLE_TYPE
26104 #define TARGET_MANGLE_TYPE ix86_mangle_type
26105
26106 #undef TARGET_STACK_PROTECT_FAIL
26107 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
26108
26109 #undef TARGET_FUNCTION_VALUE
26110 #define TARGET_FUNCTION_VALUE ix86_function_value
26111
26112 #undef TARGET_SECONDARY_RELOAD
26113 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
26114
26115 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
26116 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
26117
26118 struct gcc_target targetm = TARGET_INITIALIZER;
26119 \f
26120 #include "gt-i386.h"