gcc/config/sparc/sparc.c

   1 /* Subroutines for insn-output.c for SPARC.
   2    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   3    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   4    Free Software Foundation, Inc.
   5    Contributed by Michael Tiemann (tiemann@cygnus.com)
   6    64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
   7    at Cygnus Support.
   8
   9 This file is part of GCC.
  10
  11 GCC is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU General Public License as published by
  13 the Free Software Foundation; either version 3, or (at your option)
  14 any later version.
  15
  16 GCC is distributed in the hope that it will be useful,
  17 but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 GNU General Public License for more details.
  20
  21 You should have received a copy of the GNU General Public License
  22 along with GCC; see the file COPYING3.  If not see
  23 <http://www.gnu.org/licenses/>.  */
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "tree.h"
  30 #include "rtl.h"
  31 #include "regs.h"
  32 #include "hard-reg-set.h"
  33 #include "insn-config.h"
  34 #include "insn-codes.h"
  35 #include "conditions.h"
  36 #include "output.h"
  37 #include "insn-attr.h"
  38 #include "flags.h"
  39 #include "function.h"
  40 #include "except.h"
  41 #include "expr.h"
  42 #include "optabs.h"
  43 #include "recog.h"
  44 #include "diagnostic-core.h"
  45 #include "toplev.h"
  46 #include "ggc.h"
  47 #include "tm_p.h"
  48 #include "debug.h"
  49 #include "target.h"
  50 #include "target-def.h"
  51 #include "cfglayout.h"
  52 #include "gimple.h"
  53 #include "langhooks.h"
  54 #include "reload.h"
  55 #include "params.h"
  56 #include "df.h"
  57 #include "dwarf2out.h"
  58
  59 /* Processor costs */
  60 static const
  61 struct processor_costs cypress_costs = {
  62   COSTS_N_INSNS (2), /* int load */
  63   COSTS_N_INSNS (2), /* int signed load */
  64   COSTS_N_INSNS (2), /* int zeroed load */
  65   COSTS_N_INSNS (2), /* float load */
  66   COSTS_N_INSNS (5), /* fmov, fneg, fabs */
  67   COSTS_N_INSNS (5), /* fadd, fsub */
  68   COSTS_N_INSNS (1), /* fcmp */
  69   COSTS_N_INSNS (1), /* fmov, fmovr */
  70   COSTS_N_INSNS (7), /* fmul */
  71   COSTS_N_INSNS (37), /* fdivs */
  72   COSTS_N_INSNS (37), /* fdivd */
  73   COSTS_N_INSNS (63), /* fsqrts */
  74   COSTS_N_INSNS (63), /* fsqrtd */
  75   COSTS_N_INSNS (1), /* imul */
  76   COSTS_N_INSNS (1), /* imulX */
  77   0, /* imul bit factor */
  78   COSTS_N_INSNS (1), /* idiv */
  79   COSTS_N_INSNS (1), /* idivX */
  80   COSTS_N_INSNS (1), /* movcc/movr */
  81   0, /* shift penalty */
  82 };
  83
  84 static const
  85 struct processor_costs supersparc_costs = {
  86   COSTS_N_INSNS (1), /* int load */
  87   COSTS_N_INSNS (1), /* int signed load */
  88   COSTS_N_INSNS (1), /* int zeroed load */
  89   COSTS_N_INSNS (0), /* float load */
  90   COSTS_N_INSNS (3), /* fmov, fneg, fabs */
  91   COSTS_N_INSNS (3), /* fadd, fsub */
  92   COSTS_N_INSNS (3), /* fcmp */
  93   COSTS_N_INSNS (1), /* fmov, fmovr */
  94   COSTS_N_INSNS (3), /* fmul */
  95   COSTS_N_INSNS (6), /* fdivs */
  96   COSTS_N_INSNS (9), /* fdivd */
  97   COSTS_N_INSNS (12), /* fsqrts */
  98   COSTS_N_INSNS (12), /* fsqrtd */
  99   COSTS_N_INSNS (4), /* imul */
 100   COSTS_N_INSNS (4), /* imulX */
 101   0, /* imul bit factor */
 102   COSTS_N_INSNS (4), /* idiv */
 103   COSTS_N_INSNS (4), /* idivX */
 104   COSTS_N_INSNS (1), /* movcc/movr */
 105   1, /* shift penalty */
 106 };
 107
 108 static const
 109 struct processor_costs hypersparc_costs = {
 110   COSTS_N_INSNS (1), /* int load */
 111   COSTS_N_INSNS (1), /* int signed load */
 112   COSTS_N_INSNS (1), /* int zeroed load */
 113   COSTS_N_INSNS (1), /* float load */
 114   COSTS_N_INSNS (1), /* fmov, fneg, fabs */
 115   COSTS_N_INSNS (1), /* fadd, fsub */
 116   COSTS_N_INSNS (1), /* fcmp */
 117   COSTS_N_INSNS (1), /* fmov, fmovr */
 118   COSTS_N_INSNS (1), /* fmul */
 119   COSTS_N_INSNS (8), /* fdivs */
 120   COSTS_N_INSNS (12), /* fdivd */
 121   COSTS_N_INSNS (17), /* fsqrts */
 122   COSTS_N_INSNS (17), /* fsqrtd */
 123   COSTS_N_INSNS (17), /* imul */
 124   COSTS_N_INSNS (17), /* imulX */
 125   0, /* imul bit factor */
 126   COSTS_N_INSNS (17), /* idiv */
 127   COSTS_N_INSNS (17), /* idivX */
 128   COSTS_N_INSNS (1), /* movcc/movr */
 129   0, /* shift penalty */
 130 };
 131
 132 static const
 133 struct processor_costs sparclet_costs = {
 134   COSTS_N_INSNS (3), /* int load */
 135   COSTS_N_INSNS (3), /* int signed load */
 136   COSTS_N_INSNS (1), /* int zeroed load */
 137   COSTS_N_INSNS (1), /* float load */
 138   COSTS_N_INSNS (1), /* fmov, fneg, fabs */
 139   COSTS_N_INSNS (1), /* fadd, fsub */
 140   COSTS_N_INSNS (1), /* fcmp */
 141   COSTS_N_INSNS (1), /* fmov, fmovr */
 142   COSTS_N_INSNS (1), /* fmul */
 143   COSTS_N_INSNS (1), /* fdivs */
 144   COSTS_N_INSNS (1), /* fdivd */
 145   COSTS_N_INSNS (1), /* fsqrts */
 146   COSTS_N_INSNS (1), /* fsqrtd */
 147   COSTS_N_INSNS (5), /* imul */
 148   COSTS_N_INSNS (5), /* imulX */
 149   0, /* imul bit factor */
 150   COSTS_N_INSNS (5), /* idiv */
 151   COSTS_N_INSNS (5), /* idivX */
 152   COSTS_N_INSNS (1), /* movcc/movr */
 153   0, /* shift penalty */
 154 };
 155
 156 static const
 157 struct processor_costs ultrasparc_costs = {
 158   COSTS_N_INSNS (2), /* int load */
 159   COSTS_N_INSNS (3), /* int signed load */
 160   COSTS_N_INSNS (2), /* int zeroed load */
 161   COSTS_N_INSNS (2), /* float load */
 162   COSTS_N_INSNS (1), /* fmov, fneg, fabs */
 163   COSTS_N_INSNS (4), /* fadd, fsub */
 164   COSTS_N_INSNS (1), /* fcmp */
 165   COSTS_N_INSNS (2), /* fmov, fmovr */
 166   COSTS_N_INSNS (4), /* fmul */
 167   COSTS_N_INSNS (13), /* fdivs */
 168   COSTS_N_INSNS (23), /* fdivd */
 169   COSTS_N_INSNS (13), /* fsqrts */
 170   COSTS_N_INSNS (23), /* fsqrtd */
 171   COSTS_N_INSNS (4), /* imul */
 172   COSTS_N_INSNS (4), /* imulX */
 173   2, /* imul bit factor */
 174   COSTS_N_INSNS (37), /* idiv */
 175   COSTS_N_INSNS (68), /* idivX */
 176   COSTS_N_INSNS (2), /* movcc/movr */
 177   2, /* shift penalty */
 178 };
 179
 180 static const
 181 struct processor_costs ultrasparc3_costs = {
 182   COSTS_N_INSNS (2), /* int load */
 183   COSTS_N_INSNS (3), /* int signed load */
 184   COSTS_N_INSNS (3), /* int zeroed load */
 185   COSTS_N_INSNS (2), /* float load */
 186   COSTS_N_INSNS (3), /* fmov, fneg, fabs */
 187   COSTS_N_INSNS (4), /* fadd, fsub */
 188   COSTS_N_INSNS (5), /* fcmp */
 189   COSTS_N_INSNS (3), /* fmov, fmovr */
 190   COSTS_N_INSNS (4), /* fmul */
 191   COSTS_N_INSNS (17), /* fdivs */
 192   COSTS_N_INSNS (20), /* fdivd */
 193   COSTS_N_INSNS (20), /* fsqrts */
 194   COSTS_N_INSNS (29), /* fsqrtd */
 195   COSTS_N_INSNS (6), /* imul */
 196   COSTS_N_INSNS (6), /* imulX */
 197   0, /* imul bit factor */
 198   COSTS_N_INSNS (40), /* idiv */
 199   COSTS_N_INSNS (71), /* idivX */
 200   COSTS_N_INSNS (2), /* movcc/movr */
 201   0, /* shift penalty */
 202 };
 203
 204 static const
 205 struct processor_costs niagara_costs = {
 206   COSTS_N_INSNS (3), /* int load */
 207   COSTS_N_INSNS (3), /* int signed load */
 208   COSTS_N_INSNS (3), /* int zeroed load */
 209   COSTS_N_INSNS (9), /* float load */
 210   COSTS_N_INSNS (8), /* fmov, fneg, fabs */
 211   COSTS_N_INSNS (8), /* fadd, fsub */
 212   COSTS_N_INSNS (26), /* fcmp */
 213   COSTS_N_INSNS (8), /* fmov, fmovr */
 214   COSTS_N_INSNS (29), /* fmul */
 215   COSTS_N_INSNS (54), /* fdivs */
 216   COSTS_N_INSNS (83), /* fdivd */
 217   COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
 218   COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
 219   COSTS_N_INSNS (11), /* imul */
 220   COSTS_N_INSNS (11), /* imulX */
 221   0, /* imul bit factor */
 222   COSTS_N_INSNS (72), /* idiv */
 223   COSTS_N_INSNS (72), /* idivX */
 224   COSTS_N_INSNS (1), /* movcc/movr */
 225   0, /* shift penalty */
 226 };
 227
 228 static const
 229 struct processor_costs niagara2_costs = {
 230   COSTS_N_INSNS (3), /* int load */
 231   COSTS_N_INSNS (3), /* int signed load */
 232   COSTS_N_INSNS (3), /* int zeroed load */
 233   COSTS_N_INSNS (3), /* float load */
 234   COSTS_N_INSNS (6), /* fmov, fneg, fabs */
 235   COSTS_N_INSNS (6), /* fadd, fsub */
 236   COSTS_N_INSNS (6), /* fcmp */
 237   COSTS_N_INSNS (6), /* fmov, fmovr */
 238   COSTS_N_INSNS (6), /* fmul */
 239   COSTS_N_INSNS (19), /* fdivs */
 240   COSTS_N_INSNS (33), /* fdivd */
 241   COSTS_N_INSNS (19), /* fsqrts */
 242   COSTS_N_INSNS (33), /* fsqrtd */
 243   COSTS_N_INSNS (5), /* imul */
 244   COSTS_N_INSNS (5), /* imulX */
 245   0, /* imul bit factor */
 246   COSTS_N_INSNS (31), /* idiv, average of 12 - 41 cycle range */
 247   COSTS_N_INSNS (31), /* idivX, average of 12 - 41 cycle range */
 248   COSTS_N_INSNS (1), /* movcc/movr */
 249   0, /* shift penalty */
 250 };
 251
 252 const struct processor_costs *sparc_costs = &cypress_costs;
 253
 254 #ifdef HAVE_AS_RELAX_OPTION
 255 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
 256    "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
 257    With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
 258    somebody does not branch between the sethi and jmp.  */
 259 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
 260 #else
 261 #define LEAF_SIBCALL_SLOT_RESERVED_P \
 262   ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
 263 #endif
 264
 265 /* Global variables for machine-dependent things.  */
 266
 267 /* Size of frame.  Need to know this to emit return insns from leaf procedures.
 268    ACTUAL_FSIZE is set by sparc_compute_frame_size() which is called during the
 269    reload pass.  This is important as the value is later used for scheduling
 270    (to see what can go in a delay slot).
 271    APPARENT_FSIZE is the size of the stack less the register save area and less
 272    the outgoing argument area.  It is used when saving call preserved regs.  */
 273 static HOST_WIDE_INT apparent_fsize;
 274 static HOST_WIDE_INT actual_fsize;
 275
 276 /* Number of live general or floating point registers needed to be
 277    saved (as 4-byte quantities).  */
 278 static int num_gfregs;
 279
 280 /* The alias set for prologue/epilogue register save/restore.  */
 281 static GTY(()) alias_set_type sparc_sr_alias_set;
 282
 283 /* The alias set for the structure return value.  */
 284 static GTY(()) alias_set_type struct_value_alias_set;
 285
 286 /* Vector to say how input registers are mapped to output registers.
 287    HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
 288    eliminate it.  You must use -fomit-frame-pointer to get that.  */
 289 char leaf_reg_remap[] =
 290 { 0, 1, 2, 3, 4, 5, 6, 7,
 291   -1, -1, -1, -1, -1, -1, 14, -1,
 292   -1, -1, -1, -1, -1, -1, -1, -1,
 293   8, 9, 10, 11, 12, 13, -1, 15,
 294
 295   32, 33, 34, 35, 36, 37, 38, 39,
 296   40, 41, 42, 43, 44, 45, 46, 47,
 297   48, 49, 50, 51, 52, 53, 54, 55,
 298   56, 57, 58, 59, 60, 61, 62, 63,
 299   64, 65, 66, 67, 68, 69, 70, 71,
 300   72, 73, 74, 75, 76, 77, 78, 79,
 301   80, 81, 82, 83, 84, 85, 86, 87,
 302   88, 89, 90, 91, 92, 93, 94, 95,
 303   96, 97, 98, 99, 100};
 304
 305 /* Vector, indexed by hard register number, which contains 1
 306    for a register that is allowable in a candidate for leaf
 307    function treatment.  */
 308 char sparc_leaf_regs[] =
 309 { 1, 1, 1, 1, 1, 1, 1, 1,
 310   0, 0, 0, 0, 0, 0, 1, 0,
 311   0, 0, 0, 0, 0, 0, 0, 0,
 312   1, 1, 1, 1, 1, 1, 0, 1,
 313   1, 1, 1, 1, 1, 1, 1, 1,
 314   1, 1, 1, 1, 1, 1, 1, 1,
 315   1, 1, 1, 1, 1, 1, 1, 1,
 316   1, 1, 1, 1, 1, 1, 1, 1,
 317   1, 1, 1, 1, 1, 1, 1, 1,
 318   1, 1, 1, 1, 1, 1, 1, 1,
 319   1, 1, 1, 1, 1, 1, 1, 1,
 320   1, 1, 1, 1, 1, 1, 1, 1,
 321   1, 1, 1, 1, 1};
 322
 323 struct GTY(()) machine_function
 324 {
 325   /* Some local-dynamic TLS symbol name.  */
 326   const char *some_ld_name;
 327
 328   /* True if the current function is leaf and uses only leaf regs,
 329      so that the SPARC leaf function optimization can be applied.
 330      Private version of current_function_uses_only_leaf_regs, see
 331      sparc_expand_prologue for the rationale.  */
 332   int leaf_function_p;
 333
 334   /* True if the data calculated by sparc_expand_prologue are valid.  */
 335   bool prologue_data_valid_p;
 336 };
 337
 338 #define sparc_leaf_function_p  cfun->machine->leaf_function_p
 339 #define sparc_prologue_data_valid_p  cfun->machine->prologue_data_valid_p
 340
 341 /* Register we pretend to think the frame pointer is allocated to.
 342    Normally, this is %fp, but if we are in a leaf procedure, this
 343    is %sp+"something".  We record "something" separately as it may
 344    be too big for reg+constant addressing.  */
 345 static rtx frame_base_reg;
 346 static HOST_WIDE_INT frame_base_offset;
 347
 348 /* 1 if the next opcode is to be specially indented.  */
 349 int sparc_indent_opcode = 0;
 350
 351 static bool sparc_handle_option (size_t, const char *, int);
 352 static void sparc_option_override (void);
 353 static void sparc_init_modes (void);
 354 static void scan_record_type (tree, int *, int *, int *);
 355 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
 356                                 tree, int, int, int *, int *);
 357
 358 static int supersparc_adjust_cost (rtx, rtx, rtx, int);
 359 static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
 360
 361 static void sparc_emit_set_const32 (rtx, rtx);
 362 static void sparc_emit_set_const64 (rtx, rtx);
 363 static void sparc_output_addr_vec (rtx);
 364 static void sparc_output_addr_diff_vec (rtx);
 365 static void sparc_output_deferred_case_vectors (void);
 366 static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
 367 static rtx sparc_builtin_saveregs (void);
 368 static int epilogue_renumber (rtx *, int);
 369 static bool sparc_assemble_integer (rtx, unsigned int, int);
 370 static int set_extends (rtx);
 371 static void load_pic_register (void);
 372 static int save_or_restore_regs (int, int, rtx, int, int);
 373 static void emit_save_or_restore_regs (int);
 374 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
 375 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
 376 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
 377                                                  tree) ATTRIBUTE_UNUSED;
 378 static int sparc_adjust_cost (rtx, rtx, rtx, int);
 379 static int sparc_issue_rate (void);
 380 static void sparc_sched_init (FILE *, int, int);
 381 static int sparc_use_sched_lookahead (void);
 382
 383 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
 384 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
 385 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
 386 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
 387 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
 388
 389 static bool sparc_function_ok_for_sibcall (tree, tree);
 390 static void sparc_init_libfuncs (void);
 391 static void sparc_init_builtins (void);
 392 static void sparc_vis_init_builtins (void);
 393 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
 394 static tree sparc_fold_builtin (tree, int, tree *, bool);
 395 static int sparc_vis_mul8x16 (int, int);
 396 static tree sparc_handle_vis_mul8x16 (int, tree, tree, tree);
 397 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
 398                                    HOST_WIDE_INT, tree);
 399 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
 400                                        HOST_WIDE_INT, const_tree);
 401 static struct machine_function * sparc_init_machine_status (void);
 402 static bool sparc_cannot_force_const_mem (rtx);
 403 static rtx sparc_tls_get_addr (void);
 404 static rtx sparc_tls_got (void);
 405 static const char *get_some_local_dynamic_name (void);
 406 static int get_some_local_dynamic_name_1 (rtx *, void *);
 407 static bool sparc_rtx_costs (rtx, int, int, int *, bool);
 408 static bool sparc_promote_prototypes (const_tree);
 409 static rtx sparc_function_value (const_tree, const_tree, bool);
 410 static rtx sparc_libcall_value (enum machine_mode, const_rtx);
 411 static bool sparc_function_value_regno_p (const unsigned int);
 412 static rtx sparc_struct_value_rtx (tree, int);
 413 static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode,
 414                                                       int *, const_tree, int);
 415 static bool sparc_return_in_memory (const_tree, const_tree);
 416 static bool sparc_strict_argument_naming (CUMULATIVE_ARGS *);
 417 static void sparc_va_start (tree, rtx);
 418 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
 419 static bool sparc_vector_mode_supported_p (enum machine_mode);
 420 static bool sparc_tls_referenced_p (rtx);
 421 static rtx sparc_legitimize_tls_address (rtx);
 422 static rtx sparc_legitimize_pic_address (rtx, rtx);
 423 static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
 424 static bool sparc_mode_dependent_address_p (const_rtx);
 425 static bool sparc_pass_by_reference (CUMULATIVE_ARGS *,
 426                                      enum machine_mode, const_tree, bool);
 427 static int sparc_arg_partial_bytes (CUMULATIVE_ARGS *,
 428                                     enum machine_mode, tree, bool);
 429 static void sparc_dwarf_handle_frame_unspec (const char *, rtx, int);
 430 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 431 static void sparc_file_end (void);
 432 static bool sparc_frame_pointer_required (void);
 433 static bool sparc_can_eliminate (const int, const int);
 434 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
 435 static const char *sparc_mangle_type (const_tree);
 436 #endif
 437 static void sparc_trampoline_init (rtx, tree, rtx);
 438 static unsigned int sparc_units_per_simd_word (enum machine_mode);
 439 \f
 440 #ifdef SUBTARGET_ATTRIBUTE_TABLE
 441 /* Table of valid machine attributes.  */
 442 static const struct attribute_spec sparc_attribute_table[] =
 443 {
 444   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
 445   SUBTARGET_ATTRIBUTE_TABLE,
 446   { NULL,        0, 0, false, false, false, NULL }
 447 };
 448 #endif
 449 \f
 450 /* Option handling.  */
 451
 452 /* Parsed value.  */
 453 enum cmodel sparc_cmodel;
 454
 455 char sparc_hard_reg_printed[8];
 456
 457 struct sparc_cpu_select sparc_select[] =
 458 {
 459   /* switch     name,           tune    arch */
 460   { (char *)0,  "default",      1,      1 },
 461   { (char *)0,  "-mcpu=",       1,      1 },
 462   { (char *)0,  "-mtune=",      1,      0 },
 463   { 0, 0, 0, 0 }
 464 };
 465
 466 /* CPU type.  This is set from TARGET_CPU_DEFAULT and -m{cpu,tune}=xxx.  */
 467 enum processor_type sparc_cpu;
 468
 469 /* Whether\fan FPU option was specified.  */
 470 static bool fpu_option_set = false;
 471
 472 /* Initialize the GCC target structure.  */
 473
 474 /* The default is to use .half rather than .short for aligned HI objects.  */
 475 #undef TARGET_ASM_ALIGNED_HI_OP
 476 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
 477
 478 #undef TARGET_ASM_UNALIGNED_HI_OP
 479 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
 480 #undef TARGET_ASM_UNALIGNED_SI_OP
 481 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
 482 #undef TARGET_ASM_UNALIGNED_DI_OP
 483 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
 484
 485 /* The target hook has to handle DI-mode values.  */
 486 #undef TARGET_ASM_INTEGER
 487 #define TARGET_ASM_INTEGER sparc_assemble_integer
 488
 489 #undef TARGET_ASM_FUNCTION_PROLOGUE
 490 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
 491 #undef TARGET_ASM_FUNCTION_EPILOGUE
 492 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
 493
 494 #undef TARGET_SCHED_ADJUST_COST
 495 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
 496 #undef TARGET_SCHED_ISSUE_RATE
 497 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
 498 #undef TARGET_SCHED_INIT
 499 #define TARGET_SCHED_INIT sparc_sched_init
 500 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 501 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
 502
 503 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
 504 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
 505
 506 #undef TARGET_INIT_LIBFUNCS
 507 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
 508 #undef TARGET_INIT_BUILTINS
 509 #define TARGET_INIT_BUILTINS sparc_init_builtins
 510
 511 #undef TARGET_LEGITIMIZE_ADDRESS
 512 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
 513 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
 514 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
 515
 516 #undef TARGET_EXPAND_BUILTIN
 517 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
 518 #undef TARGET_FOLD_BUILTIN
 519 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
 520
 521 #if TARGET_TLS
 522 #undef TARGET_HAVE_TLS
 523 #define TARGET_HAVE_TLS true
 524 #endif
 525
 526 #undef TARGET_CANNOT_FORCE_CONST_MEM
 527 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
 528
 529 #undef TARGET_ASM_OUTPUT_MI_THUNK
 530 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
 531 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
 532 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
 533
 534 #undef TARGET_RTX_COSTS
 535 #define TARGET_RTX_COSTS sparc_rtx_costs
 536 #undef TARGET_ADDRESS_COST
 537 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
 538
 539 #undef TARGET_PROMOTE_FUNCTION_MODE
 540 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
 541
 542 #undef TARGET_PROMOTE_PROTOTYPES
 543 #define TARGET_PROMOTE_PROTOTYPES sparc_promote_prototypes
 544
 545 #undef TARGET_FUNCTION_VALUE
 546 #define TARGET_FUNCTION_VALUE sparc_function_value
 547 #undef TARGET_LIBCALL_VALUE
 548 #define TARGET_LIBCALL_VALUE sparc_libcall_value
 549 #undef TARGET_FUNCTION_VALUE_REGNO_P
 550 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
 551
 552 #undef TARGET_STRUCT_VALUE_RTX
 553 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
 554 #undef TARGET_RETURN_IN_MEMORY
 555 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
 556 #undef TARGET_MUST_PASS_IN_STACK
 557 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
 558 #undef TARGET_PASS_BY_REFERENCE
 559 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
 560 #undef TARGET_ARG_PARTIAL_BYTES
 561 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
 562
 563 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
 564 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
 565 #undef TARGET_STRICT_ARGUMENT_NAMING
 566 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
 567
 568 #undef TARGET_EXPAND_BUILTIN_VA_START
 569 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
 570 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 571 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
 572
 573 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 574 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
 575
 576 #undef TARGET_VECTORIZE_UNITS_PER_SIMD_WORD
 577 #define TARGET_VECTORIZE_UNITS_PER_SIMD_WORD sparc_units_per_simd_word
 578
 579 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
 580 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC sparc_dwarf_handle_frame_unspec
 581
 582 #ifdef SUBTARGET_INSERT_ATTRIBUTES
 583 #undef TARGET_INSERT_ATTRIBUTES
 584 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
 585 #endif
 586
 587 #ifdef SUBTARGET_ATTRIBUTE_TABLE
 588 #undef TARGET_ATTRIBUTE_TABLE
 589 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
 590 #endif
 591
 592 #undef TARGET_RELAXED_ORDERING
 593 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
 594
 595 #undef TARGET_DEFAULT_TARGET_FLAGS
 596 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
 597 #undef TARGET_HANDLE_OPTION
 598 #define TARGET_HANDLE_OPTION sparc_handle_option
 599 #undef TARGET_OPTION_OVERRIDE
 600 #define TARGET_OPTION_OVERRIDE sparc_option_override
 601
 602 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
 603 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 604 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
 605 #endif
 606
 607 #undef TARGET_ASM_FILE_END
 608 #define TARGET_ASM_FILE_END sparc_file_end
 609
 610 #undef TARGET_FRAME_POINTER_REQUIRED
 611 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
 612
 613 #undef TARGET_CAN_ELIMINATE
 614 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
 615
 616 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
 617 #undef TARGET_MANGLE_TYPE
 618 #define TARGET_MANGLE_TYPE sparc_mangle_type
 619 #endif
 620
 621 #undef TARGET_LEGITIMATE_ADDRESS_P
 622 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
 623
 624 #undef TARGET_TRAMPOLINE_INIT
 625 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
 626
 627 struct gcc_target targetm = TARGET_INITIALIZER;
 628
 629 /* Implement TARGET_HANDLE_OPTION.  */
 630
 631 static bool
 632 sparc_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
 633 {
 634   switch (code)
 635     {
 636     case OPT_mfpu:
 637     case OPT_mhard_float:
 638     case OPT_msoft_float:
 639       fpu_option_set = true;
 640       break;
 641
 642     case OPT_mcpu_:
 643       sparc_select[1].string = arg;
 644       break;
 645
 646     case OPT_mtune_:
 647       sparc_select[2].string = arg;
 648       break;
 649     }
 650
 651   return true;
 652 }
 653
 654 /* Validate and override various options, and do some machine dependent
 655    initialization.  */
 656
 657 static void
 658 sparc_option_override (void)
 659 {
 660   static struct code_model {
 661     const char *const name;
 662     const enum cmodel value;
 663   } const cmodels[] = {
 664     { "32", CM_32 },
 665     { "medlow", CM_MEDLOW },
 666     { "medmid", CM_MEDMID },
 667     { "medany", CM_MEDANY },
 668     { "embmedany", CM_EMBMEDANY },
 669     { NULL, (enum cmodel) 0 }
 670   };
 671   const struct code_model *cmodel;
 672   /* Map TARGET_CPU_DEFAULT to value for -m{arch,tune}=.  */
 673   static struct cpu_default {
 674     const int cpu;
 675     const char *const name;
 676   } const cpu_default[] = {
 677     /* There must be one entry here for each TARGET_CPU value.  */
 678     { TARGET_CPU_sparc, "cypress" },
 679     { TARGET_CPU_sparclet, "tsc701" },
 680     { TARGET_CPU_sparclite, "f930" },
 681     { TARGET_CPU_v8, "v8" },
 682     { TARGET_CPU_hypersparc, "hypersparc" },
 683     { TARGET_CPU_sparclite86x, "sparclite86x" },
 684     { TARGET_CPU_supersparc, "supersparc" },
 685     { TARGET_CPU_v9, "v9" },
 686     { TARGET_CPU_ultrasparc, "ultrasparc" },
 687     { TARGET_CPU_ultrasparc3, "ultrasparc3" },
 688     { TARGET_CPU_niagara, "niagara" },
 689     { TARGET_CPU_niagara2, "niagara2" },
 690     { 0, 0 }
 691   };
 692   const struct cpu_default *def;
 693   /* Table of values for -m{cpu,tune}=.  */
 694   static struct cpu_table {
 695     const char *const name;
 696     const enum processor_type processor;
 697     const int disable;
 698     const int enable;
 699   } const cpu_table[] = {
 700     { "v7",         PROCESSOR_V7, MASK_ISA, 0 },
 701     { "cypress",    PROCESSOR_CYPRESS, MASK_ISA, 0 },
 702     { "v8",         PROCESSOR_V8, MASK_ISA, MASK_V8 },
 703     /* TI TMS390Z55 supersparc */
 704     { "supersparc", PROCESSOR_SUPERSPARC, MASK_ISA, MASK_V8 },
 705     { "sparclite",  PROCESSOR_SPARCLITE, MASK_ISA, MASK_SPARCLITE },
 706     /* The Fujitsu MB86930 is the original sparclite chip, with no fpu.
 707        The Fujitsu MB86934 is the recent sparclite chip, with an fpu.  */
 708     { "f930",       PROCESSOR_F930, MASK_ISA|MASK_FPU, MASK_SPARCLITE },
 709     { "f934",       PROCESSOR_F934, MASK_ISA, MASK_SPARCLITE|MASK_FPU },
 710     { "hypersparc", PROCESSOR_HYPERSPARC, MASK_ISA, MASK_V8|MASK_FPU },
 711     { "sparclite86x",  PROCESSOR_SPARCLITE86X, MASK_ISA|MASK_FPU,
 712       MASK_SPARCLITE },
 713     { "sparclet",   PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
 714     /* TEMIC sparclet */
 715     { "tsc701",     PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
 716     { "v9",         PROCESSOR_V9, MASK_ISA, MASK_V9 },
 717     /* TI ultrasparc I, II, IIi */
 718     { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9
 719     /* Although insns using %y are deprecated, it is a clear win on current
 720        ultrasparcs.  */
 721                                                     |MASK_DEPRECATED_V8_INSNS},
 722     /* TI ultrasparc III */
 723     /* ??? Check if %y issue still holds true in ultra3.  */
 724     { "ultrasparc3", PROCESSOR_ULTRASPARC3, MASK_ISA, MASK_V9|MASK_DEPRECATED_V8_INSNS},
 725     /* UltraSPARC T1 */
 726     { "niagara", PROCESSOR_NIAGARA, MASK_ISA, MASK_V9|MASK_DEPRECATED_V8_INSNS},
 727     { "niagara2", PROCESSOR_NIAGARA, MASK_ISA, MASK_V9},
 728     { 0, (enum processor_type) 0, 0, 0 }
 729   };
 730   const struct cpu_table *cpu;
 731   const struct sparc_cpu_select *sel;
 732   int fpu;
 733
 734 #ifdef SUBTARGET_OVERRIDE_OPTIONS
 735   SUBTARGET_OVERRIDE_OPTIONS;
 736 #endif
 737
 738 #ifndef SPARC_BI_ARCH
 739   /* Check for unsupported architecture size.  */
 740   if (! TARGET_64BIT != DEFAULT_ARCH32_P)
 741     error ("%s is not supported by this configuration",
 742            DEFAULT_ARCH32_P ? "-m64" : "-m32");
 743 #endif
 744
 745   /* We force all 64bit archs to use 128 bit long double */
 746   if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
 747     {
 748       error ("-mlong-double-64 not allowed with -m64");
 749       target_flags |= MASK_LONG_DOUBLE_128;
 750     }
 751
 752   /* Code model selection.  */
 753   sparc_cmodel = SPARC_DEFAULT_CMODEL;
 754
 755 #ifdef SPARC_BI_ARCH
 756   if (TARGET_ARCH32)
 757     sparc_cmodel = CM_32;
 758 #endif
 759
 760   if (sparc_cmodel_string != NULL)
 761     {
 762       if (TARGET_ARCH64)
 763         {
 764           for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
 765             if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
 766               break;
 767           if (cmodel->name == NULL)
 768             error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
 769           else
 770             sparc_cmodel = cmodel->value;
 771         }
 772       else
 773         error ("-mcmodel= is not supported on 32 bit systems");
 774     }
 775
 776   fpu = target_flags & MASK_FPU; /* save current -mfpu status */
 777
 778   /* Set the default CPU.  */
 779   for (def = &cpu_default[0]; def->name; ++def)
 780     if (def->cpu == TARGET_CPU_DEFAULT)
 781       break;
 782   gcc_assert (def->name);
 783   sparc_select[0].string = def->name;
 784
 785   for (sel = &sparc_select[0]; sel->name; ++sel)
 786     {
 787       if (sel->string)
 788         {
 789           for (cpu = &cpu_table[0]; cpu->name; ++cpu)
 790             if (! strcmp (sel->string, cpu->name))
 791               {
 792                 if (sel->set_tune_p)
 793                   sparc_cpu = cpu->processor;
 794
 795                 if (sel->set_arch_p)
 796                   {
 797                     target_flags &= ~cpu->disable;
 798                     target_flags |= cpu->enable;
 799                   }
 800                 break;
 801               }
 802
 803           if (! cpu->name)
 804             error ("bad value (%s) for %s switch", sel->string, sel->name);
 805         }
 806     }
 807
 808   /* If -mfpu or -mno-fpu was explicitly used, don't override with
 809      the processor default.  */
 810   if (fpu_option_set)
 811     target_flags = (target_flags & ~MASK_FPU) | fpu;
 812
 813   /* Don't allow -mvis if FPU is disabled.  */
 814   if (! TARGET_FPU)
 815     target_flags &= ~MASK_VIS;
 816
 817   /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
 818      are available.
 819      -m64 also implies v9.  */
 820   if (TARGET_VIS || TARGET_ARCH64)
 821     {
 822       target_flags |= MASK_V9;
 823       target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
 824     }
 825
 826   /* Use the deprecated v8 insns for sparc64 in 32 bit mode.  */
 827   if (TARGET_V9 && TARGET_ARCH32)
 828     target_flags |= MASK_DEPRECATED_V8_INSNS;
 829
 830   /* V8PLUS requires V9, makes no sense in 64 bit mode.  */
 831   if (! TARGET_V9 || TARGET_ARCH64)
 832     target_flags &= ~MASK_V8PLUS;
 833
 834   /* Don't use stack biasing in 32 bit mode.  */
 835   if (TARGET_ARCH32)
 836     target_flags &= ~MASK_STACK_BIAS;
 837
 838   /* Supply a default value for align_functions.  */
 839   if (align_functions == 0
 840       && (sparc_cpu == PROCESSOR_ULTRASPARC
 841           || sparc_cpu == PROCESSOR_ULTRASPARC3
 842           || sparc_cpu == PROCESSOR_NIAGARA
 843           || sparc_cpu == PROCESSOR_NIAGARA2))
 844     align_functions = 32;
 845
 846   /* Validate PCC_STRUCT_RETURN.  */
 847   if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
 848     flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
 849
 850   /* Only use .uaxword when compiling for a 64-bit target.  */
 851   if (!TARGET_ARCH64)
 852     targetm.asm_out.unaligned_op.di = NULL;
 853
 854   /* Do various machine dependent initializations.  */
 855   sparc_init_modes ();
 856
 857   /* Acquire unique alias sets for our private stuff.  */
 858   sparc_sr_alias_set = new_alias_set ();
 859   struct_value_alias_set = new_alias_set ();
 860
 861   /* Set up function hooks.  */
 862   init_machine_status = sparc_init_machine_status;
 863
 864   switch (sparc_cpu)
 865     {
 866     case PROCESSOR_V7:
 867     case PROCESSOR_CYPRESS:
 868       sparc_costs = &cypress_costs;
 869       break;
 870     case PROCESSOR_V8:
 871     case PROCESSOR_SPARCLITE:
 872     case PROCESSOR_SUPERSPARC:
 873       sparc_costs = &supersparc_costs;
 874       break;
 875     case PROCESSOR_F930:
 876     case PROCESSOR_F934:
 877     case PROCESSOR_HYPERSPARC:
 878     case PROCESSOR_SPARCLITE86X:
 879       sparc_costs = &hypersparc_costs;
 880       break;
 881     case PROCESSOR_SPARCLET:
 882     case PROCESSOR_TSC701:
 883       sparc_costs = &sparclet_costs;
 884       break;
 885     case PROCESSOR_V9:
 886     case PROCESSOR_ULTRASPARC:
 887       sparc_costs = &ultrasparc_costs;
 888       break;
 889     case PROCESSOR_ULTRASPARC3:
 890       sparc_costs = &ultrasparc3_costs;
 891       break;
 892     case PROCESSOR_NIAGARA:
 893       sparc_costs = &niagara_costs;
 894       break;
 895     case PROCESSOR_NIAGARA2:
 896       sparc_costs = &niagara2_costs;
 897       break;
 898     };
 899
 900 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
 901   if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
 902     target_flags |= MASK_LONG_DOUBLE_128;
 903 #endif
 904
 905   if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
 906     set_param_value ("simultaneous-prefetches",
 907                      ((sparc_cpu == PROCESSOR_ULTRASPARC
 908                        || sparc_cpu == PROCESSOR_NIAGARA
 909                        || sparc_cpu == PROCESSOR_NIAGARA2)
 910                       ? 2
 911                       : (sparc_cpu == PROCESSOR_ULTRASPARC3
 912                          ? 8 : 3)));
 913   if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
 914     set_param_value ("l1-cache-line-size",
 915                      ((sparc_cpu == PROCESSOR_ULTRASPARC
 916                        || sparc_cpu == PROCESSOR_ULTRASPARC3
 917                        || sparc_cpu == PROCESSOR_NIAGARA
 918                        || sparc_cpu == PROCESSOR_NIAGARA2)
 919                       ? 64 : 32));
 920 }
 921 \f
 922 /* Miscellaneous utilities.  */
 923
 924 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
 925    or branch on register contents instructions.  */
 926
 927 int
 928 v9_regcmp_p (enum rtx_code code)
 929 {
 930   return (code == EQ || code == NE || code == GE || code == LT
 931           || code == LE || code == GT);
 932 }
 933
 934 /* Nonzero if OP is a floating point constant which can
 935    be loaded into an integer register using a single
 936    sethi instruction.  */
 937
 938 int
 939 fp_sethi_p (rtx op)
 940 {
 941   if (GET_CODE (op) == CONST_DOUBLE)
 942     {
 943       REAL_VALUE_TYPE r;
 944       long i;
 945
 946       REAL_VALUE_FROM_CONST_DOUBLE (r, op);
 947       REAL_VALUE_TO_TARGET_SINGLE (r, i);
 948       return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
 949     }
 950
 951   return 0;
 952 }
 953
 954 /* Nonzero if OP is a floating point constant which can
 955    be loaded into an integer register using a single
 956    mov instruction.  */
 957
 958 int
 959 fp_mov_p (rtx op)
 960 {
 961   if (GET_CODE (op) == CONST_DOUBLE)
 962     {
 963       REAL_VALUE_TYPE r;
 964       long i;
 965
 966       REAL_VALUE_FROM_CONST_DOUBLE (r, op);
 967       REAL_VALUE_TO_TARGET_SINGLE (r, i);
 968       return SPARC_SIMM13_P (i);
 969     }
 970
 971   return 0;
 972 }
 973
 974 /* Nonzero if OP is a floating point constant which can
 975    be loaded into an integer register using a high/losum
 976    instruction sequence.  */
 977
 978 int
 979 fp_high_losum_p (rtx op)
 980 {
 981   /* The constraints calling this should only be in
 982      SFmode move insns, so any constant which cannot
 983      be moved using a single insn will do.  */
 984   if (GET_CODE (op) == CONST_DOUBLE)
 985     {
 986       REAL_VALUE_TYPE r;
 987       long i;
 988
 989       REAL_VALUE_FROM_CONST_DOUBLE (r, op);
 990       REAL_VALUE_TO_TARGET_SINGLE (r, i);
 991       return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
 992     }
 993
 994   return 0;
 995 }
 996
 997 /* Expand a move instruction.  Return true if all work is done.  */
 998
 999 bool
1000 sparc_expand_move (enum machine_mode mode, rtx *operands)
1001 {
1002   /* Handle sets of MEM first.  */
1003   if (GET_CODE (operands[0]) == MEM)
1004     {
1005       /* 0 is a register (or a pair of registers) on SPARC.  */
1006       if (register_or_zero_operand (operands[1], mode))
1007         return false;
1008
1009       if (!reload_in_progress)
1010         {
1011           operands[0] = validize_mem (operands[0]);
1012           operands[1] = force_reg (mode, operands[1]);
1013         }
1014     }
1015
1016   /* Fixup TLS cases.  */
1017   if (TARGET_HAVE_TLS
1018       && CONSTANT_P (operands[1])
1019       && sparc_tls_referenced_p (operands [1]))
1020     {
1021       operands[1] = sparc_legitimize_tls_address (operands[1]);
1022       return false;
1023     }
1024
1025   /* Fixup PIC cases.  */
1026   if (flag_pic && CONSTANT_P (operands[1]))
1027     {
1028       if (pic_address_needs_scratch (operands[1]))
1029         operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1030
1031       /* VxWorks does not impose a fixed gap between segments; the run-time
1032          gap can be different from the object-file gap.  We therefore can't
1033          assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1034          are absolutely sure that X is in the same segment as the GOT.
1035          Unfortunately, the flexibility of linker scripts means that we
1036          can't be sure of that in general, so assume that _G_O_T_-relative
1037          accesses are never valid on VxWorks.  */
1038       if (GET_CODE (operands[1]) == LABEL_REF && !TARGET_VXWORKS_RTP)
1039         {
1040           if (mode == SImode)
1041             {
1042               emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1043               return true;
1044             }
1045
1046           if (mode == DImode)
1047             {
1048               gcc_assert (TARGET_ARCH64);
1049               emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1050               return true;
1051             }
1052         }
1053
1054       if (symbolic_operand (operands[1], mode))
1055         {
1056           operands[1]
1057             = sparc_legitimize_pic_address (operands[1],
1058                                             reload_in_progress
1059                                             ? operands[0] : NULL_RTX);
1060           return false;
1061         }
1062     }
1063
1064   /* If we are trying to toss an integer constant into FP registers,
1065      or loading a FP or vector constant, force it into memory.  */
1066   if (CONSTANT_P (operands[1])
1067       && REG_P (operands[0])
1068       && (SPARC_FP_REG_P (REGNO (operands[0]))
1069           || SCALAR_FLOAT_MODE_P (mode)
1070           || VECTOR_MODE_P (mode)))
1071     {
1072       /* emit_group_store will send such bogosity to us when it is
1073          not storing directly into memory.  So fix this up to avoid
1074          crashes in output_constant_pool.  */
1075       if (operands [1] == const0_rtx)
1076         operands[1] = CONST0_RTX (mode);
1077
1078       /* We can clear FP registers if TARGET_VIS, and always other regs.  */
1079       if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1080           && const_zero_operand (operands[1], mode))
1081         return false;
1082
1083       if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1084           /* We are able to build any SF constant in integer registers
1085              with at most 2 instructions.  */
1086           && (mode == SFmode
1087               /* And any DF constant in integer registers.  */
1088               || (mode == DFmode
1089                   && (reload_completed || reload_in_progress))))
1090         return false;
1091
1092       operands[1] = force_const_mem (mode, operands[1]);
1093       if (!reload_in_progress)
1094         operands[1] = validize_mem (operands[1]);
1095       return false;
1096     }
1097
1098   /* Accept non-constants and valid constants unmodified.  */
1099   if (!CONSTANT_P (operands[1])
1100       || GET_CODE (operands[1]) == HIGH
1101       || input_operand (operands[1], mode))
1102     return false;
1103
1104   switch (mode)
1105     {
1106     case QImode:
1107       /* All QImode constants require only one insn, so proceed.  */
1108       break;
1109
1110     case HImode:
1111     case SImode:
1112       sparc_emit_set_const32 (operands[0], operands[1]);
1113       return true;
1114
1115     case DImode:
1116       /* input_operand should have filtered out 32-bit mode.  */
1117       sparc_emit_set_const64 (operands[0], operands[1]);
1118       return true;
1119
1120     default:
1121       gcc_unreachable ();
1122     }
1123
1124   return false;
1125 }
1126
1127 /* Load OP1, a 32-bit constant, into OP0, a register.
1128    We know it can't be done in one insn when we get
1129    here, the move expander guarantees this.  */
1130
1131 static void
1132 sparc_emit_set_const32 (rtx op0, rtx op1)
1133 {
1134   enum machine_mode mode = GET_MODE (op0);
1135   rtx temp;
1136
1137   if (reload_in_progress || reload_completed)
1138     temp = op0;
1139   else
1140     temp = gen_reg_rtx (mode);
1141
1142   if (GET_CODE (op1) == CONST_INT)
1143     {
1144       gcc_assert (!small_int_operand (op1, mode)
1145                   && !const_high_operand (op1, mode));
1146
1147       /* Emit them as real moves instead of a HIGH/LO_SUM,
1148          this way CSE can see everything and reuse intermediate
1149          values if it wants.  */
1150       emit_insn (gen_rtx_SET (VOIDmode, temp,
1151                               GEN_INT (INTVAL (op1)
1152                                 & ~(HOST_WIDE_INT)0x3ff)));
1153
1154       emit_insn (gen_rtx_SET (VOIDmode,
1155                               op0,
1156                               gen_rtx_IOR (mode, temp,
1157                                            GEN_INT (INTVAL (op1) & 0x3ff))));
1158     }
1159   else
1160     {
1161       /* A symbol, emit in the traditional way.  */
1162       emit_insn (gen_rtx_SET (VOIDmode, temp,
1163                               gen_rtx_HIGH (mode, op1)));
1164       emit_insn (gen_rtx_SET (VOIDmode,
1165                               op0, gen_rtx_LO_SUM (mode, temp, op1)));
1166     }
1167 }
1168
1169 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1170    If TEMP is nonzero, we are forbidden to use any other scratch
1171    registers.  Otherwise, we are allowed to generate them as needed.
1172
1173    Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1174    or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns).  */
1175
1176 void
1177 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1178 {
1179   rtx temp1, temp2, temp3, temp4, temp5;
1180   rtx ti_temp = 0;
1181
1182   if (temp && GET_MODE (temp) == TImode)
1183     {
1184       ti_temp = temp;
1185       temp = gen_rtx_REG (DImode, REGNO (temp));
1186     }
1187
1188   /* SPARC-V9 code-model support.  */
1189   switch (sparc_cmodel)
1190     {
1191     case CM_MEDLOW:
1192       /* The range spanned by all instructions in the object is less
1193          than 2^31 bytes (2GB) and the distance from any instruction
1194          to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1195          than 2^31 bytes (2GB).
1196
1197          The executable must be in the low 4TB of the virtual address
1198          space.
1199
1200          sethi  %hi(symbol), %temp1
1201          or     %temp1, %lo(symbol), %reg  */
1202       if (temp)
1203         temp1 = temp;  /* op0 is allowed.  */
1204       else
1205         temp1 = gen_reg_rtx (DImode);
1206
1207       emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1208       emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1209       break;
1210
1211     case CM_MEDMID:
1212       /* The range spanned by all instructions in the object is less
1213          than 2^31 bytes (2GB) and the distance from any instruction
1214          to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1215          than 2^31 bytes (2GB).
1216
1217          The executable must be in the low 16TB of the virtual address
1218          space.
1219
1220          sethi  %h44(symbol), %temp1
1221          or     %temp1, %m44(symbol), %temp2
1222          sllx   %temp2, 12, %temp3
1223          or     %temp3, %l44(symbol), %reg  */
1224       if (temp)
1225         {
1226           temp1 = op0;
1227           temp2 = op0;
1228           temp3 = temp;  /* op0 is allowed.  */
1229         }
1230       else
1231         {
1232           temp1 = gen_reg_rtx (DImode);
1233           temp2 = gen_reg_rtx (DImode);
1234           temp3 = gen_reg_rtx (DImode);
1235         }
1236
1237       emit_insn (gen_seth44 (temp1, op1));
1238       emit_insn (gen_setm44 (temp2, temp1, op1));
1239       emit_insn (gen_rtx_SET (VOIDmode, temp3,
1240                               gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1241       emit_insn (gen_setl44 (op0, temp3, op1));
1242       break;
1243
1244     case CM_MEDANY:
1245       /* The range spanned by all instructions in the object is less
1246          than 2^31 bytes (2GB) and the distance from any instruction
1247          to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1248          than 2^31 bytes (2GB).
1249
1250          The executable can be placed anywhere in the virtual address
1251          space.
1252
1253          sethi  %hh(symbol), %temp1
1254          sethi  %lm(symbol), %temp2
1255          or     %temp1, %hm(symbol), %temp3
1256          sllx   %temp3, 32, %temp4
1257          or     %temp4, %temp2, %temp5
1258          or     %temp5, %lo(symbol), %reg  */
1259       if (temp)
1260         {
1261           /* It is possible that one of the registers we got for operands[2]
1262              might coincide with that of operands[0] (which is why we made
1263              it TImode).  Pick the other one to use as our scratch.  */
1264           if (rtx_equal_p (temp, op0))
1265             {
1266               gcc_assert (ti_temp);
1267               temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1268             }
1269           temp1 = op0;
1270           temp2 = temp;  /* op0 is _not_ allowed, see above.  */
1271           temp3 = op0;
1272           temp4 = op0;
1273           temp5 = op0;
1274         }
1275       else
1276         {
1277           temp1 = gen_reg_rtx (DImode);
1278           temp2 = gen_reg_rtx (DImode);
1279           temp3 = gen_reg_rtx (DImode);
1280           temp4 = gen_reg_rtx (DImode);
1281           temp5 = gen_reg_rtx (DImode);
1282         }
1283
1284       emit_insn (gen_sethh (temp1, op1));
1285       emit_insn (gen_setlm (temp2, op1));
1286       emit_insn (gen_sethm (temp3, temp1, op1));
1287       emit_insn (gen_rtx_SET (VOIDmode, temp4,
1288                               gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1289       emit_insn (gen_rtx_SET (VOIDmode, temp5,
1290                               gen_rtx_PLUS (DImode, temp4, temp2)));
1291       emit_insn (gen_setlo (op0, temp5, op1));
1292       break;
1293
1294     case CM_EMBMEDANY:
1295       /* Old old old backwards compatibility kruft here.
1296          Essentially it is MEDLOW with a fixed 64-bit
1297          virtual base added to all data segment addresses.
1298          Text-segment stuff is computed like MEDANY, we can't
1299          reuse the code above because the relocation knobs
1300          look different.
1301
1302          Data segment:  sethi   %hi(symbol), %temp1
1303                         add     %temp1, EMBMEDANY_BASE_REG, %temp2
1304                         or      %temp2, %lo(symbol), %reg  */
1305       if (data_segment_operand (op1, GET_MODE (op1)))
1306         {
1307           if (temp)
1308             {
1309               temp1 = temp;  /* op0 is allowed.  */
1310               temp2 = op0;
1311             }
1312           else
1313             {
1314               temp1 = gen_reg_rtx (DImode);
1315               temp2 = gen_reg_rtx (DImode);
1316             }
1317
1318           emit_insn (gen_embmedany_sethi (temp1, op1));
1319           emit_insn (gen_embmedany_brsum (temp2, temp1));
1320           emit_insn (gen_embmedany_losum (op0, temp2, op1));
1321         }
1322
1323       /* Text segment:  sethi   %uhi(symbol), %temp1
1324                         sethi   %hi(symbol), %temp2
1325                         or      %temp1, %ulo(symbol), %temp3
1326                         sllx    %temp3, 32, %temp4
1327                         or      %temp4, %temp2, %temp5
1328                         or      %temp5, %lo(symbol), %reg  */
1329       else
1330         {
1331           if (temp)
1332             {
1333               /* It is possible that one of the registers we got for operands[2]
1334                  might coincide with that of operands[0] (which is why we made
1335                  it TImode).  Pick the other one to use as our scratch.  */
1336               if (rtx_equal_p (temp, op0))
1337                 {
1338                   gcc_assert (ti_temp);
1339                   temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1340                 }
1341               temp1 = op0;
1342               temp2 = temp;  /* op0 is _not_ allowed, see above.  */
1343               temp3 = op0;
1344               temp4 = op0;
1345               temp5 = op0;
1346             }
1347           else
1348             {
1349               temp1 = gen_reg_rtx (DImode);
1350               temp2 = gen_reg_rtx (DImode);
1351               temp3 = gen_reg_rtx (DImode);
1352               temp4 = gen_reg_rtx (DImode);
1353               temp5 = gen_reg_rtx (DImode);
1354             }
1355
1356           emit_insn (gen_embmedany_textuhi (temp1, op1));
1357           emit_insn (gen_embmedany_texthi  (temp2, op1));
1358           emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
1359           emit_insn (gen_rtx_SET (VOIDmode, temp4,
1360                                   gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1361           emit_insn (gen_rtx_SET (VOIDmode, temp5,
1362                                   gen_rtx_PLUS (DImode, temp4, temp2)));
1363           emit_insn (gen_embmedany_textlo  (op0, temp5, op1));
1364         }
1365       break;
1366
1367     default:
1368       gcc_unreachable ();
1369     }
1370 }
1371
1372 #if HOST_BITS_PER_WIDE_INT == 32
1373 static void
1374 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
1375 {
1376   gcc_unreachable ();
1377 }
1378 #else
1379 /* These avoid problems when cross compiling.  If we do not
1380    go through all this hair then the optimizer will see
1381    invalid REG_EQUAL notes or in some cases none at all.  */
1382 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
1383 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
1384 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
1385 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
1386
1387 /* The optimizer is not to assume anything about exactly
1388    which bits are set for a HIGH, they are unspecified.
1389    Unfortunately this leads to many missed optimizations
1390    during CSE.  We mask out the non-HIGH bits, and matches
1391    a plain movdi, to alleviate this problem.  */
1392 static rtx
1393 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
1394 {
1395   return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
1396 }
1397
1398 static rtx
1399 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
1400 {
1401   return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
1402 }
1403
1404 static rtx
1405 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
1406 {
1407   return gen_rtx_IOR (DImode, src, GEN_INT (val));
1408 }
1409
1410 static rtx
1411 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
1412 {
1413   return gen_rtx_XOR (DImode, src, GEN_INT (val));
1414 }
1415
1416 /* Worker routines for 64-bit constant formation on arch64.
1417    One of the key things to be doing in these emissions is
1418    to create as many temp REGs as possible.  This makes it
1419    possible for half-built constants to be used later when
1420    such values are similar to something required later on.
1421    Without doing this, the optimizer cannot see such
1422    opportunities.  */
1423
1424 static void sparc_emit_set_const64_quick1 (rtx, rtx,
1425                                            unsigned HOST_WIDE_INT, int);
1426
1427 static void
1428 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
1429                                unsigned HOST_WIDE_INT low_bits, int is_neg)
1430 {
1431   unsigned HOST_WIDE_INT high_bits;
1432
1433   if (is_neg)
1434     high_bits = (~low_bits) & 0xffffffff;
1435   else
1436     high_bits = low_bits;
1437
1438   emit_insn (gen_safe_HIGH64 (temp, high_bits));
1439   if (!is_neg)
1440     {
1441       emit_insn (gen_rtx_SET (VOIDmode, op0,
1442                               gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1443     }
1444   else
1445     {
1446       /* If we are XOR'ing with -1, then we should emit a one's complement
1447          instead.  This way the combiner will notice logical operations
1448          such as ANDN later on and substitute.  */
1449       if ((low_bits & 0x3ff) == 0x3ff)
1450         {
1451           emit_insn (gen_rtx_SET (VOIDmode, op0,
1452                                   gen_rtx_NOT (DImode, temp)));
1453         }
1454       else
1455         {
1456           emit_insn (gen_rtx_SET (VOIDmode, op0,
1457                                   gen_safe_XOR64 (temp,
1458                                                   (-(HOST_WIDE_INT)0x400
1459                                                    | (low_bits & 0x3ff)))));
1460         }
1461     }
1462 }
1463
1464 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
1465                                            unsigned HOST_WIDE_INT, int);
1466
1467 static void
1468 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
1469                                unsigned HOST_WIDE_INT high_bits,
1470                                unsigned HOST_WIDE_INT low_immediate,
1471                                int shift_count)
1472 {
1473   rtx temp2 = op0;
1474
1475   if ((high_bits & 0xfffffc00) != 0)
1476     {
1477       emit_insn (gen_safe_HIGH64 (temp, high_bits));
1478       if ((high_bits & ~0xfffffc00) != 0)
1479         emit_insn (gen_rtx_SET (VOIDmode, op0,
1480                                 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1481       else
1482         temp2 = temp;
1483     }
1484   else
1485     {
1486       emit_insn (gen_safe_SET64 (temp, high_bits));
1487       temp2 = temp;
1488     }
1489
1490   /* Now shift it up into place.  */
1491   emit_insn (gen_rtx_SET (VOIDmode, op0,
1492                           gen_rtx_ASHIFT (DImode, temp2,
1493                                           GEN_INT (shift_count))));
1494
1495   /* If there is a low immediate part piece, finish up by
1496      putting that in as well.  */
1497   if (low_immediate != 0)
1498     emit_insn (gen_rtx_SET (VOIDmode, op0,
1499                             gen_safe_OR64 (op0, low_immediate)));
1500 }
1501
1502 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
1503                                             unsigned HOST_WIDE_INT);
1504
1505 /* Full 64-bit constant decomposition.  Even though this is the
1506    'worst' case, we still optimize a few things away.  */
1507 static void
1508 sparc_emit_set_const64_longway (rtx op0, rtx temp,
1509                                 unsigned HOST_WIDE_INT high_bits,
1510                                 unsigned HOST_WIDE_INT low_bits)
1511 {
1512   rtx sub_temp;
1513
1514   if (reload_in_progress || reload_completed)
1515     sub_temp = op0;
1516   else
1517     sub_temp = gen_reg_rtx (DImode);
1518
1519   if ((high_bits & 0xfffffc00) != 0)
1520     {
1521       emit_insn (gen_safe_HIGH64 (temp, high_bits));
1522       if ((high_bits & ~0xfffffc00) != 0)
1523         emit_insn (gen_rtx_SET (VOIDmode,
1524                                 sub_temp,
1525                                 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1526       else
1527         sub_temp = temp;
1528     }
1529   else
1530     {
1531       emit_insn (gen_safe_SET64 (temp, high_bits));
1532       sub_temp = temp;
1533     }
1534
1535   if (!reload_in_progress && !reload_completed)
1536     {
1537       rtx temp2 = gen_reg_rtx (DImode);
1538       rtx temp3 = gen_reg_rtx (DImode);
1539       rtx temp4 = gen_reg_rtx (DImode);
1540
1541       emit_insn (gen_rtx_SET (VOIDmode, temp4,
1542                               gen_rtx_ASHIFT (DImode, sub_temp,
1543                                               GEN_INT (32))));
1544
1545       emit_insn (gen_safe_HIGH64 (temp2, low_bits));
1546       if ((low_bits & ~0xfffffc00) != 0)
1547         {
1548           emit_insn (gen_rtx_SET (VOIDmode, temp3,
1549                                   gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
1550           emit_insn (gen_rtx_SET (VOIDmode, op0,
1551                                   gen_rtx_PLUS (DImode, temp4, temp3)));
1552         }
1553       else
1554         {
1555           emit_insn (gen_rtx_SET (VOIDmode, op0,
1556                                   gen_rtx_PLUS (DImode, temp4, temp2)));
1557         }
1558     }
1559   else
1560     {
1561       rtx low1 = GEN_INT ((low_bits >> (32 - 12))          & 0xfff);
1562       rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12))     & 0xfff);
1563       rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
1564       int to_shift = 12;
1565
1566       /* We are in the middle of reload, so this is really
1567          painful.  However we do still make an attempt to
1568          avoid emitting truly stupid code.  */
1569       if (low1 != const0_rtx)
1570         {
1571           emit_insn (gen_rtx_SET (VOIDmode, op0,
1572                                   gen_rtx_ASHIFT (DImode, sub_temp,
1573                                                   GEN_INT (to_shift))));
1574           emit_insn (gen_rtx_SET (VOIDmode, op0,
1575                                   gen_rtx_IOR (DImode, op0, low1)));
1576           sub_temp = op0;
1577           to_shift = 12;
1578         }
1579       else
1580         {
1581           to_shift += 12;
1582         }
1583       if (low2 != const0_rtx)
1584         {
1585           emit_insn (gen_rtx_SET (VOIDmode, op0,
1586                                   gen_rtx_ASHIFT (DImode, sub_temp,
1587                                                   GEN_INT (to_shift))));
1588           emit_insn (gen_rtx_SET (VOIDmode, op0,
1589                                   gen_rtx_IOR (DImode, op0, low2)));
1590           sub_temp = op0;
1591           to_shift = 8;
1592         }
1593       else
1594         {
1595           to_shift += 8;
1596         }
1597       emit_insn (gen_rtx_SET (VOIDmode, op0,
1598                               gen_rtx_ASHIFT (DImode, sub_temp,
1599                                               GEN_INT (to_shift))));
1600       if (low3 != const0_rtx)
1601         emit_insn (gen_rtx_SET (VOIDmode, op0,
1602                                 gen_rtx_IOR (DImode, op0, low3)));
1603       /* phew...  */
1604     }
1605 }
1606
1607 /* Analyze a 64-bit constant for certain properties.  */
1608 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
1609                                     unsigned HOST_WIDE_INT,
1610                                     int *, int *, int *);
1611
1612 static void
1613 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
1614                         unsigned HOST_WIDE_INT low_bits,
1615                         int *hbsp, int *lbsp, int *abbasp)
1616 {
1617   int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
1618   int i;
1619
1620   lowest_bit_set = highest_bit_set = -1;
1621   i = 0;
1622   do
1623     {
1624       if ((lowest_bit_set == -1)
1625           && ((low_bits >> i) & 1))
1626         lowest_bit_set = i;
1627       if ((highest_bit_set == -1)
1628           && ((high_bits >> (32 - i - 1)) & 1))
1629         highest_bit_set = (64 - i - 1);
1630     }
1631   while (++i < 32
1632          && ((highest_bit_set == -1)
1633              || (lowest_bit_set == -1)));
1634   if (i == 32)
1635     {
1636       i = 0;
1637       do
1638         {
1639           if ((lowest_bit_set == -1)
1640               && ((high_bits >> i) & 1))
1641             lowest_bit_set = i + 32;
1642           if ((highest_bit_set == -1)
1643               && ((low_bits >> (32 - i - 1)) & 1))
1644             highest_bit_set = 32 - i - 1;
1645         }
1646       while (++i < 32
1647              && ((highest_bit_set == -1)
1648                  || (lowest_bit_set == -1)));
1649     }
1650   /* If there are no bits set this should have gone out
1651      as one instruction!  */
1652   gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
1653   all_bits_between_are_set = 1;
1654   for (i = lowest_bit_set; i <= highest_bit_set; i++)
1655     {
1656       if (i < 32)
1657         {
1658           if ((low_bits & (1 << i)) != 0)
1659             continue;
1660         }
1661       else
1662         {
1663           if ((high_bits & (1 << (i - 32))) != 0)
1664             continue;
1665         }
1666       all_bits_between_are_set = 0;
1667       break;
1668     }
1669   *hbsp = highest_bit_set;
1670   *lbsp = lowest_bit_set;
1671   *abbasp = all_bits_between_are_set;
1672 }
1673
1674 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
1675
1676 static int
1677 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
1678                    unsigned HOST_WIDE_INT low_bits)
1679 {
1680   int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
1681
1682   if (high_bits == 0
1683       || high_bits == 0xffffffff)
1684     return 1;
1685
1686   analyze_64bit_constant (high_bits, low_bits,
1687                           &highest_bit_set, &lowest_bit_set,
1688                           &all_bits_between_are_set);
1689
1690   if ((highest_bit_set == 63
1691        || lowest_bit_set == 0)
1692       && all_bits_between_are_set != 0)
1693     return 1;
1694
1695   if ((highest_bit_set - lowest_bit_set) < 21)
1696     return 1;
1697
1698   return 0;
1699 }
1700
1701 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
1702                                                         unsigned HOST_WIDE_INT,
1703                                                         int, int);
1704
1705 static unsigned HOST_WIDE_INT
1706 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
1707                           unsigned HOST_WIDE_INT low_bits,
1708                           int lowest_bit_set, int shift)
1709 {
1710   HOST_WIDE_INT hi, lo;
1711
1712   if (lowest_bit_set < 32)
1713     {
1714       lo = (low_bits >> lowest_bit_set) << shift;
1715       hi = ((high_bits << (32 - lowest_bit_set)) << shift);
1716     }
1717   else
1718     {
1719       lo = 0;
1720       hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
1721     }
1722   gcc_assert (! (hi & lo));
1723   return (hi | lo);
1724 }
1725
1726 /* Here we are sure to be arch64 and this is an integer constant
1727    being loaded into a register.  Emit the most efficient
1728    insn sequence possible.  Detection of all the 1-insn cases
1729    has been done already.  */
1730 static void
1731 sparc_emit_set_const64 (rtx op0, rtx op1)
1732 {
1733   unsigned HOST_WIDE_INT high_bits, low_bits;
1734   int lowest_bit_set, highest_bit_set;
1735   int all_bits_between_are_set;
1736   rtx temp = 0;
1737
1738   /* Sanity check that we know what we are working with.  */
1739   gcc_assert (TARGET_ARCH64
1740               && (GET_CODE (op0) == SUBREG
1741                   || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
1742
1743   if (reload_in_progress || reload_completed)
1744     temp = op0;
1745
1746   if (GET_CODE (op1) != CONST_INT)
1747     {
1748       sparc_emit_set_symbolic_const64 (op0, op1, temp);
1749       return;
1750     }
1751
1752   if (! temp)
1753     temp = gen_reg_rtx (DImode);
1754
1755   high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
1756   low_bits = (INTVAL (op1) & 0xffffffff);
1757
1758   /* low_bits   bits 0  --> 31
1759      high_bits  bits 32 --> 63  */
1760
1761   analyze_64bit_constant (high_bits, low_bits,
1762                           &highest_bit_set, &lowest_bit_set,
1763                           &all_bits_between_are_set);
1764
1765   /* First try for a 2-insn sequence.  */
1766
1767   /* These situations are preferred because the optimizer can
1768    * do more things with them:
1769    * 1) mov     -1, %reg
1770    *    sllx    %reg, shift, %reg
1771    * 2) mov     -1, %reg
1772    *    srlx    %reg, shift, %reg
1773    * 3) mov     some_small_const, %reg
1774    *    sllx    %reg, shift, %reg
1775    */
1776   if (((highest_bit_set == 63
1777         || lowest_bit_set == 0)
1778        && all_bits_between_are_set != 0)
1779       || ((highest_bit_set - lowest_bit_set) < 12))
1780     {
1781       HOST_WIDE_INT the_const = -1;
1782       int shift = lowest_bit_set;
1783
1784       if ((highest_bit_set != 63
1785            && lowest_bit_set != 0)
1786           || all_bits_between_are_set == 0)
1787         {
1788           the_const =
1789             create_simple_focus_bits (high_bits, low_bits,
1790                                       lowest_bit_set, 0);
1791         }
1792       else if (lowest_bit_set == 0)
1793         shift = -(63 - highest_bit_set);
1794
1795       gcc_assert (SPARC_SIMM13_P (the_const));
1796       gcc_assert (shift != 0);
1797
1798       emit_insn (gen_safe_SET64 (temp, the_const));
1799       if (shift > 0)
1800         emit_insn (gen_rtx_SET (VOIDmode,
1801                                 op0,
1802                                 gen_rtx_ASHIFT (DImode,
1803                                                 temp,
1804                                                 GEN_INT (shift))));
1805       else if (shift < 0)
1806         emit_insn (gen_rtx_SET (VOIDmode,
1807                                 op0,
1808                                 gen_rtx_LSHIFTRT (DImode,
1809                                                   temp,
1810                                                   GEN_INT (-shift))));
1811       return;
1812     }
1813
1814   /* Now a range of 22 or less bits set somewhere.
1815    * 1) sethi   %hi(focus_bits), %reg
1816    *    sllx    %reg, shift, %reg
1817    * 2) sethi   %hi(focus_bits), %reg
1818    *    srlx    %reg, shift, %reg
1819    */
1820   if ((highest_bit_set - lowest_bit_set) < 21)
1821     {
1822       unsigned HOST_WIDE_INT focus_bits =
1823         create_simple_focus_bits (high_bits, low_bits,
1824                                   lowest_bit_set, 10);
1825
1826       gcc_assert (SPARC_SETHI_P (focus_bits));
1827       gcc_assert (lowest_bit_set != 10);
1828
1829       emit_insn (gen_safe_HIGH64 (temp, focus_bits));
1830
1831       /* If lowest_bit_set == 10 then a sethi alone could have done it.  */
1832       if (lowest_bit_set < 10)
1833         emit_insn (gen_rtx_SET (VOIDmode,
1834                                 op0,
1835                                 gen_rtx_LSHIFTRT (DImode, temp,
1836                                                   GEN_INT (10 - lowest_bit_set))));
1837       else if (lowest_bit_set > 10)
1838         emit_insn (gen_rtx_SET (VOIDmode,
1839                                 op0,
1840                                 gen_rtx_ASHIFT (DImode, temp,
1841                                                 GEN_INT (lowest_bit_set - 10))));
1842       return;
1843     }
1844
1845   /* 1) sethi   %hi(low_bits), %reg
1846    *    or      %reg, %lo(low_bits), %reg
1847    * 2) sethi   %hi(~low_bits), %reg
1848    *    xor     %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
1849    */
1850   if (high_bits == 0
1851       || high_bits == 0xffffffff)
1852     {
1853       sparc_emit_set_const64_quick1 (op0, temp, low_bits,
1854                                      (high_bits == 0xffffffff));
1855       return;
1856     }
1857
1858   /* Now, try 3-insn sequences.  */
1859
1860   /* 1) sethi   %hi(high_bits), %reg
1861    *    or      %reg, %lo(high_bits), %reg
1862    *    sllx    %reg, 32, %reg
1863    */
1864   if (low_bits == 0)
1865     {
1866       sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
1867       return;
1868     }
1869
1870   /* We may be able to do something quick
1871      when the constant is negated, so try that.  */
1872   if (const64_is_2insns ((~high_bits) & 0xffffffff,
1873                          (~low_bits) & 0xfffffc00))
1874     {
1875       /* NOTE: The trailing bits get XOR'd so we need the
1876          non-negated bits, not the negated ones.  */
1877       unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
1878
1879       if ((((~high_bits) & 0xffffffff) == 0
1880            && ((~low_bits) & 0x80000000) == 0)
1881           || (((~high_bits) & 0xffffffff) == 0xffffffff
1882               && ((~low_bits) & 0x80000000) != 0))
1883         {
1884           unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
1885
1886           if ((SPARC_SETHI_P (fast_int)
1887                && (~high_bits & 0xffffffff) == 0)
1888               || SPARC_SIMM13_P (fast_int))
1889             emit_insn (gen_safe_SET64 (temp, fast_int));
1890           else
1891             sparc_emit_set_const64 (temp, GEN_INT (fast_int));
1892         }
1893       else
1894         {
1895           rtx negated_const;
1896           negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
1897                                    (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
1898           sparc_emit_set_const64 (temp, negated_const);
1899         }
1900
1901       /* If we are XOR'ing with -1, then we should emit a one's complement
1902          instead.  This way the combiner will notice logical operations
1903          such as ANDN later on and substitute.  */
1904       if (trailing_bits == 0x3ff)
1905         {
1906           emit_insn (gen_rtx_SET (VOIDmode, op0,
1907                                   gen_rtx_NOT (DImode, temp)));
1908         }
1909       else
1910         {
1911           emit_insn (gen_rtx_SET (VOIDmode,
1912                                   op0,
1913                                   gen_safe_XOR64 (temp,
1914                                                   (-0x400 | trailing_bits))));
1915         }
1916       return;
1917     }
1918
1919   /* 1) sethi   %hi(xxx), %reg
1920    *    or      %reg, %lo(xxx), %reg
1921    *    sllx    %reg, yyy, %reg
1922    *
1923    * ??? This is just a generalized version of the low_bits==0
1924    * thing above, FIXME...
1925    */
1926   if ((highest_bit_set - lowest_bit_set) < 32)
1927     {
1928       unsigned HOST_WIDE_INT focus_bits =
1929         create_simple_focus_bits (high_bits, low_bits,
1930                                   lowest_bit_set, 0);
1931
1932       /* We can't get here in this state.  */
1933       gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
1934
1935       /* So what we know is that the set bits straddle the
1936          middle of the 64-bit word.  */
1937       sparc_emit_set_const64_quick2 (op0, temp,
1938                                      focus_bits, 0,
1939                                      lowest_bit_set);
1940       return;
1941     }
1942
1943   /* 1) sethi   %hi(high_bits), %reg
1944    *    or      %reg, %lo(high_bits), %reg
1945    *    sllx    %reg, 32, %reg
1946    *    or      %reg, low_bits, %reg
1947    */
1948   if (SPARC_SIMM13_P(low_bits)
1949       && ((int)low_bits > 0))
1950     {
1951       sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
1952       return;
1953     }
1954
1955   /* The easiest way when all else fails, is full decomposition.  */
1956   sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
1957 }
1958 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
1959
1960 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
1961    return the mode to be used for the comparison.  For floating-point,
1962    CCFP[E]mode is used.  CC_NOOVmode should be used when the first operand
1963    is a PLUS, MINUS, NEG, or ASHIFT.  CCmode should be used when no special
1964    processing is needed.  */
1965
1966 enum machine_mode
1967 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
1968 {
1969   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1970     {
1971       switch (op)
1972         {
1973         case EQ:
1974         case NE:
1975         case UNORDERED:
1976         case ORDERED:
1977         case UNLT:
1978         case UNLE:
1979         case UNGT:
1980         case UNGE:
1981         case UNEQ:
1982         case LTGT:
1983           return CCFPmode;
1984
1985         case LT:
1986         case LE:
1987         case GT:
1988         case GE:
1989           return CCFPEmode;
1990
1991         default:
1992           gcc_unreachable ();
1993         }
1994     }
1995   else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
1996            || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
1997     {
1998       if (TARGET_ARCH64 && GET_MODE (x) == DImode)
1999         return CCX_NOOVmode;
2000       else
2001         return CC_NOOVmode;
2002     }
2003   else
2004     {
2005       if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2006         return CCXmode;
2007       else
2008         return CCmode;
2009     }
2010 }
2011
2012 /* Emit the compare insn and return the CC reg for a CODE comparison
2013    with operands X and Y.  */
2014
2015 static rtx
2016 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2017 {
2018   enum machine_mode mode;
2019   rtx cc_reg;
2020
2021   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2022     return x;
2023
2024   mode = SELECT_CC_MODE (code, x, y);
2025
2026   /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2027      fcc regs (cse can't tell they're really call clobbered regs and will
2028      remove a duplicate comparison even if there is an intervening function
2029      call - it will then try to reload the cc reg via an int reg which is why
2030      we need the movcc patterns).  It is possible to provide the movcc
2031      patterns by using the ldxfsr/stxfsr v9 insns.  I tried it: you need two
2032      registers (say %g1,%g5) and it takes about 6 insns.  A better fix would be
2033      to tell cse that CCFPE mode registers (even pseudos) are call
2034      clobbered.  */
2035
2036   /* ??? This is an experiment.  Rather than making changes to cse which may
2037      or may not be easy/clean, we do our own cse.  This is possible because
2038      we will generate hard registers.  Cse knows they're call clobbered (it
2039      doesn't know the same thing about pseudos). If we guess wrong, no big
2040      deal, but if we win, great!  */
2041
2042   if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2043 #if 1 /* experiment */
2044     {
2045       int reg;
2046       /* We cycle through the registers to ensure they're all exercised.  */
2047       static int next_fcc_reg = 0;
2048       /* Previous x,y for each fcc reg.  */
2049       static rtx prev_args[4][2];
2050
2051       /* Scan prev_args for x,y.  */
2052       for (reg = 0; reg < 4; reg++)
2053         if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2054           break;
2055       if (reg == 4)
2056         {
2057           reg = next_fcc_reg;
2058           prev_args[reg][0] = x;
2059           prev_args[reg][1] = y;
2060           next_fcc_reg = (next_fcc_reg + 1) & 3;
2061         }
2062       cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2063     }
2064 #else
2065     cc_reg = gen_reg_rtx (mode);
2066 #endif /* ! experiment */
2067   else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2068     cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2069   else
2070     cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2071
2072   /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD.  If we do, this
2073      will only result in an unrecognizable insn so no point in asserting.  */
2074   emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2075
2076   return cc_reg;
2077 }
2078
2079
2080 /* Emit the compare insn and return the CC reg for the comparison in CMP.  */
2081
2082 rtx
2083 gen_compare_reg (rtx cmp)
2084 {
2085   return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2086 }
2087
2088 /* This function is used for v9 only.
2089    DEST is the target of the Scc insn.
2090    CODE is the code for an Scc's comparison.
2091    X and Y are the values we compare.
2092
2093    This function is needed to turn
2094
2095            (set (reg:SI 110)
2096                (gt (reg:CCX 100 %icc)
2097                    (const_int 0)))
2098    into
2099            (set (reg:SI 110)
2100                (gt:DI (reg:CCX 100 %icc)
2101                    (const_int 0)))
2102
2103    IE: The instruction recognizer needs to see the mode of the comparison to
2104    find the right instruction. We could use "gt:DI" right in the
2105    define_expand, but leaving it out allows us to handle DI, SI, etc.  */
2106
2107 static int
2108 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2109 {
2110   if (! TARGET_ARCH64
2111       && (GET_MODE (x) == DImode
2112           || GET_MODE (dest) == DImode))
2113     return 0;
2114
2115   /* Try to use the movrCC insns.  */
2116   if (TARGET_ARCH64
2117       && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2118       && y == const0_rtx
2119       && v9_regcmp_p (compare_code))
2120     {
2121       rtx op0 = x;
2122       rtx temp;
2123
2124       /* Special case for op0 != 0.  This can be done with one instruction if
2125          dest == x.  */
2126
2127       if (compare_code == NE
2128           && GET_MODE (dest) == DImode
2129           && rtx_equal_p (op0, dest))
2130         {
2131           emit_insn (gen_rtx_SET (VOIDmode, dest,
2132                               gen_rtx_IF_THEN_ELSE (DImode,
2133                                        gen_rtx_fmt_ee (compare_code, DImode,
2134                                                        op0, const0_rtx),
2135                                        const1_rtx,
2136                                        dest)));
2137           return 1;
2138         }
2139
2140       if (reg_overlap_mentioned_p (dest, op0))
2141         {
2142           /* Handle the case where dest == x.
2143              We "early clobber" the result.  */
2144           op0 = gen_reg_rtx (GET_MODE (x));
2145           emit_move_insn (op0, x);
2146         }
2147
2148       emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2149       if (GET_MODE (op0) != DImode)
2150         {
2151           temp = gen_reg_rtx (DImode);
2152           convert_move (temp, op0, 0);
2153         }
2154       else
2155         temp = op0;
2156       emit_insn (gen_rtx_SET (VOIDmode, dest,
2157                           gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2158                                    gen_rtx_fmt_ee (compare_code, DImode,
2159                                                    temp, const0_rtx),
2160                                    const1_rtx,
2161                                    dest)));
2162       return 1;
2163     }
2164   else
2165     {
2166       x = gen_compare_reg_1 (compare_code, x, y);
2167       y = const0_rtx;
2168
2169       gcc_assert (GET_MODE (x) != CC_NOOVmode
2170                   && GET_MODE (x) != CCX_NOOVmode);
2171
2172       emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2173       emit_insn (gen_rtx_SET (VOIDmode, dest,
2174                           gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2175                                    gen_rtx_fmt_ee (compare_code,
2176                                                    GET_MODE (x), x, y),
2177                                     const1_rtx, dest)));
2178       return 1;
2179     }
2180 }
2181
2182
2183 /* Emit an scc insn.  For seq, sne, sgeu, and sltu, we can do this
2184    without jumps using the addx/subx instructions.  */
2185
2186 bool
2187 emit_scc_insn (rtx operands[])
2188 {
2189   rtx tem;
2190   rtx x;
2191   rtx y;
2192   enum rtx_code code;
2193
2194   /* The quad-word fp compare library routines all return nonzero to indicate
2195      true, which is different from the equivalent libgcc routines, so we must
2196      handle them specially here.  */
2197   if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2198     {
2199       operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2200                                               GET_CODE (operands[1]));
2201       operands[2] = XEXP (operands[1], 0);
2202       operands[3] = XEXP (operands[1], 1);
2203     }
2204
2205   code = GET_CODE (operands[1]);
2206   x = operands[2];
2207   y = operands[3];
2208
2209   /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2210      more applications).  The exception to this is "reg != 0" which can
2211      be done in one instruction on v9 (so we do it).  */
2212   if (code == EQ)
2213     {
2214       if (GET_MODE (x) == SImode)
2215         {
2216           rtx pat = gen_seqsi_special (operands[0], x, y);
2217           emit_insn (pat);
2218           return true;
2219         }
2220       else if (GET_MODE (x) == DImode)
2221         {
2222           rtx pat = gen_seqdi_special (operands[0], x, y);
2223           emit_insn (pat);
2224           return true;
2225         }
2226     }
2227
2228   if (code == NE)
2229     {
2230       if (GET_MODE (x) == SImode)
2231         {
2232           rtx pat = gen_snesi_special (operands[0], x, y);
2233           emit_insn (pat);
2234           return true;
2235         }
2236       else if (GET_MODE (x) == DImode)
2237         {
2238           rtx pat = gen_snedi_special (operands[0], x, y);
2239           emit_insn (pat);
2240           return true;
2241         }
2242     }
2243
2244   /* For the rest, on v9 we can use conditional moves.  */
2245
2246   if (TARGET_V9)
2247     {
2248       if (gen_v9_scc (operands[0], code, x, y))
2249         return true;
2250     }
2251
2252   /* We can do LTU and GEU using the addx/subx instructions too.  And
2253      for GTU/LEU, if both operands are registers swap them and fall
2254      back to the easy case.  */
2255   if (code == GTU || code == LEU)
2256     {
2257       if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
2258           && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
2259         {
2260           tem = x;
2261           x = y;
2262           y = tem;
2263           code = swap_condition (code);
2264         }
2265     }
2266
2267   if (code == LTU || code == GEU)
2268     {
2269       emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2270                               gen_rtx_fmt_ee (code, SImode,
2271                                               gen_compare_reg_1 (code, x, y),
2272                                               const0_rtx)));
2273       return true;
2274     }
2275
2276   /* Nope, do branches.  */
2277   return false;
2278 }
2279
2280 /* Emit a conditional jump insn for the v9 architecture using comparison code
2281    CODE and jump target LABEL.
2282    This function exists to take advantage of the v9 brxx insns.  */
2283
2284 static void
2285 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
2286 {
2287   emit_jump_insn (gen_rtx_SET (VOIDmode,
2288                            pc_rtx,
2289                            gen_rtx_IF_THEN_ELSE (VOIDmode,
2290                                     gen_rtx_fmt_ee (code, GET_MODE (op0),
2291                                                     op0, const0_rtx),
2292                                     gen_rtx_LABEL_REF (VOIDmode, label),
2293                                     pc_rtx)));
2294 }
2295
2296 void
2297 emit_conditional_branch_insn (rtx operands[])
2298 {
2299   /* The quad-word fp compare library routines all return nonzero to indicate
2300      true, which is different from the equivalent libgcc routines, so we must
2301      handle them specially here.  */
2302   if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
2303     {
2304       operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
2305                                               GET_CODE (operands[0]));
2306       operands[1] = XEXP (operands[0], 0);
2307       operands[2] = XEXP (operands[0], 1);
2308     }
2309
2310   if (TARGET_ARCH64 && operands[2] == const0_rtx
2311       && GET_CODE (operands[1]) == REG
2312       && GET_MODE (operands[1]) == DImode)
2313     {
2314       emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
2315       return;
2316     }
2317
2318   operands[1] = gen_compare_reg (operands[0]);
2319   operands[2] = const0_rtx;
2320   operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
2321                                 operands[1], operands[2]);
2322   emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
2323                                   operands[3]));
2324 }
2325
2326
2327 /* Generate a DFmode part of a hard TFmode register.
2328    REG is the TFmode hard register, LOW is 1 for the
2329    low 64bit of the register and 0 otherwise.
2330  */
2331 rtx
2332 gen_df_reg (rtx reg, int low)
2333 {
2334   int regno = REGNO (reg);
2335
2336   if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
2337     regno += (TARGET_ARCH64 && regno < 32) ? 1 : 2;
2338   return gen_rtx_REG (DFmode, regno);
2339 }
2340 \f
2341 /* Generate a call to FUNC with OPERANDS.  Operand 0 is the return value.
2342    Unlike normal calls, TFmode operands are passed by reference.  It is
2343    assumed that no more than 3 operands are required.  */
2344
2345 static void
2346 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
2347 {
2348   rtx ret_slot = NULL, arg[3], func_sym;
2349   int i;
2350
2351   /* We only expect to be called for conversions, unary, and binary ops.  */
2352   gcc_assert (nargs == 2 || nargs == 3);
2353
2354   for (i = 0; i < nargs; ++i)
2355     {
2356       rtx this_arg = operands[i];
2357       rtx this_slot;
2358
2359       /* TFmode arguments and return values are passed by reference.  */
2360       if (GET_MODE (this_arg) == TFmode)
2361         {
2362           int force_stack_temp;
2363
2364           force_stack_temp = 0;
2365           if (TARGET_BUGGY_QP_LIB && i == 0)
2366             force_stack_temp = 1;
2367
2368           if (GET_CODE (this_arg) == MEM
2369               && ! force_stack_temp)
2370             this_arg = XEXP (this_arg, 0);
2371           else if (CONSTANT_P (this_arg)
2372                    && ! force_stack_temp)
2373             {
2374               this_slot = force_const_mem (TFmode, this_arg);
2375               this_arg = XEXP (this_slot, 0);
2376             }
2377           else
2378             {
2379               this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode), 0);
2380
2381               /* Operand 0 is the return value.  We'll copy it out later.  */
2382               if (i > 0)
2383                 emit_move_insn (this_slot, this_arg);
2384               else
2385                 ret_slot = this_slot;
2386
2387               this_arg = XEXP (this_slot, 0);
2388             }
2389         }
2390
2391       arg[i] = this_arg;
2392     }
2393
2394   func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
2395
2396   if (GET_MODE (operands[0]) == TFmode)
2397     {
2398       if (nargs == 2)
2399         emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
2400                            arg[0], GET_MODE (arg[0]),
2401                            arg[1], GET_MODE (arg[1]));
2402       else
2403         emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
2404                            arg[0], GET_MODE (arg[0]),
2405                            arg[1], GET_MODE (arg[1]),
2406                            arg[2], GET_MODE (arg[2]));
2407
2408       if (ret_slot)
2409         emit_move_insn (operands[0], ret_slot);
2410     }
2411   else
2412     {
2413       rtx ret;
2414
2415       gcc_assert (nargs == 2);
2416
2417       ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
2418                                      GET_MODE (operands[0]), 1,
2419                                      arg[1], GET_MODE (arg[1]));
2420
2421       if (ret != operands[0])
2422         emit_move_insn (operands[0], ret);
2423     }
2424 }
2425
2426 /* Expand soft-float TFmode calls to sparc abi routines.  */
2427
2428 static void
2429 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
2430 {
2431   const char *func;
2432
2433   switch (code)
2434     {
2435     case PLUS:
2436       func = "_Qp_add";
2437       break;
2438     case MINUS:
2439       func = "_Qp_sub";
2440       break;
2441     case MULT:
2442       func = "_Qp_mul";
2443       break;
2444     case DIV:
2445       func = "_Qp_div";
2446       break;
2447     default:
2448       gcc_unreachable ();
2449     }
2450
2451   emit_soft_tfmode_libcall (func, 3, operands);
2452 }
2453
2454 static void
2455 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
2456 {
2457   const char *func;
2458
2459   gcc_assert (code == SQRT);
2460   func = "_Qp_sqrt";
2461
2462   emit_soft_tfmode_libcall (func, 2, operands);
2463 }
2464
2465 static void
2466 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
2467 {
2468   const char *func;
2469
2470   switch (code)
2471     {
2472     case FLOAT_EXTEND:
2473       switch (GET_MODE (operands[1]))
2474         {
2475         case SFmode:
2476           func = "_Qp_stoq";
2477           break;
2478         case DFmode:
2479           func = "_Qp_dtoq";
2480           break;
2481         default:
2482           gcc_unreachable ();
2483         }
2484       break;
2485
2486     case FLOAT_TRUNCATE:
2487       switch (GET_MODE (operands[0]))
2488         {
2489         case SFmode:
2490           func = "_Qp_qtos";
2491           break;
2492         case DFmode:
2493           func = "_Qp_qtod";
2494           break;
2495         default:
2496           gcc_unreachable ();
2497         }
2498       break;
2499
2500     case FLOAT:
2501       switch (GET_MODE (operands[1]))
2502         {
2503         case SImode:
2504           func = "_Qp_itoq";
2505           if (TARGET_ARCH64)
2506             operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
2507           break;
2508         case DImode:
2509           func = "_Qp_xtoq";
2510           break;
2511         default:
2512           gcc_unreachable ();
2513         }
2514       break;
2515
2516     case UNSIGNED_FLOAT:
2517       switch (GET_MODE (operands[1]))
2518         {
2519         case SImode:
2520           func = "_Qp_uitoq";
2521           if (TARGET_ARCH64)
2522             operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
2523           break;
2524         case DImode:
2525           func = "_Qp_uxtoq";
2526           break;
2527         default:
2528           gcc_unreachable ();
2529         }
2530       break;
2531
2532     case FIX:
2533       switch (GET_MODE (operands[0]))
2534         {
2535         case SImode:
2536           func = "_Qp_qtoi";
2537           break;
2538         case DImode:
2539           func = "_Qp_qtox";
2540           break;
2541         default:
2542           gcc_unreachable ();
2543         }
2544       break;
2545
2546     case UNSIGNED_FIX:
2547       switch (GET_MODE (operands[0]))
2548         {
2549         case SImode:
2550           func = "_Qp_qtoui";
2551           break;
2552         case DImode:
2553           func = "_Qp_qtoux";
2554           break;
2555         default:
2556           gcc_unreachable ();
2557         }
2558       break;
2559
2560     default:
2561       gcc_unreachable ();
2562     }
2563
2564   emit_soft_tfmode_libcall (func, 2, operands);
2565 }
2566
2567 /* Expand a hard-float tfmode operation.  All arguments must be in
2568    registers.  */
2569
2570 static void
2571 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
2572 {
2573   rtx op, dest;
2574
2575   if (GET_RTX_CLASS (code) == RTX_UNARY)
2576     {
2577       operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2578       op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
2579     }
2580   else
2581     {
2582       operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2583       operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
2584       op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
2585                            operands[1], operands[2]);
2586     }
2587
2588   if (register_operand (operands[0], VOIDmode))
2589     dest = operands[0];
2590   else
2591     dest = gen_reg_rtx (GET_MODE (operands[0]));
2592
2593   emit_insn (gen_rtx_SET (VOIDmode, dest, op));
2594
2595   if (dest != operands[0])
2596     emit_move_insn (operands[0], dest);
2597 }
2598
2599 void
2600 emit_tfmode_binop (enum rtx_code code, rtx *operands)
2601 {
2602   if (TARGET_HARD_QUAD)
2603     emit_hard_tfmode_operation (code, operands);
2604   else
2605     emit_soft_tfmode_binop (code, operands);
2606 }
2607
2608 void
2609 emit_tfmode_unop (enum rtx_code code, rtx *operands)
2610 {
2611   if (TARGET_HARD_QUAD)
2612     emit_hard_tfmode_operation (code, operands);
2613   else
2614     emit_soft_tfmode_unop (code, operands);
2615 }
2616
2617 void
2618 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
2619 {
2620   if (TARGET_HARD_QUAD)
2621     emit_hard_tfmode_operation (code, operands);
2622   else
2623     emit_soft_tfmode_cvt (code, operands);
2624 }
2625 \f
2626 /* Return nonzero if a branch/jump/call instruction will be emitting
2627    nop into its delay slot.  */
2628
2629 int
2630 empty_delay_slot (rtx insn)
2631 {
2632   rtx seq;
2633
2634   /* If no previous instruction (should not happen), return true.  */
2635   if (PREV_INSN (insn) == NULL)
2636     return 1;
2637
2638   seq = NEXT_INSN (PREV_INSN (insn));
2639   if (GET_CODE (PATTERN (seq)) == SEQUENCE)
2640     return 0;
2641
2642   return 1;
2643 }
2644
2645 /* Return nonzero if TRIAL can go into the call delay slot.  */
2646
2647 int
2648 tls_call_delay (rtx trial)
2649 {
2650   rtx pat;
2651
2652   /* Binutils allows
2653        call __tls_get_addr, %tgd_call (foo)
2654         add %l7, %o0, %o0, %tgd_add (foo)
2655      while Sun as/ld does not.  */
2656   if (TARGET_GNU_TLS || !TARGET_TLS)
2657     return 1;
2658
2659   pat = PATTERN (trial);
2660
2661   /* We must reject tgd_add{32|64}, i.e.
2662        (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
2663      and tldm_add{32|64}, i.e.
2664        (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
2665      for Sun as/ld.  */
2666   if (GET_CODE (pat) == SET
2667       && GET_CODE (SET_SRC (pat)) == PLUS)
2668     {
2669       rtx unspec = XEXP (SET_SRC (pat), 1);
2670
2671       if (GET_CODE (unspec) == UNSPEC
2672           && (XINT (unspec, 1) == UNSPEC_TLSGD
2673               || XINT (unspec, 1) == UNSPEC_TLSLDM))
2674         return 0;
2675     }
2676
2677   return 1;
2678 }
2679
2680 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
2681    instruction.  RETURN_P is true if the v9 variant 'return' is to be
2682    considered in the test too.
2683
2684    TRIAL must be a SET whose destination is a REG appropriate for the
2685    'restore' instruction or, if RETURN_P is true, for the 'return'
2686    instruction.  */
2687
2688 static int
2689 eligible_for_restore_insn (rtx trial, bool return_p)
2690 {
2691   rtx pat = PATTERN (trial);
2692   rtx src = SET_SRC (pat);
2693
2694   /* The 'restore src,%g0,dest' pattern for word mode and below.  */
2695   if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2696       && arith_operand (src, GET_MODE (src)))
2697     {
2698       if (TARGET_ARCH64)
2699         return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2700       else
2701         return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
2702     }
2703
2704   /* The 'restore src,%g0,dest' pattern for double-word mode.  */
2705   else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2706            && arith_double_operand (src, GET_MODE (src)))
2707     return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2708
2709   /* The 'restore src,%g0,dest' pattern for float if no FPU.  */
2710   else if (! TARGET_FPU && register_operand (src, SFmode))
2711     return 1;
2712
2713   /* The 'restore src,%g0,dest' pattern for double if no FPU.  */
2714   else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
2715     return 1;
2716
2717   /* If we have the 'return' instruction, anything that does not use
2718      local or output registers and can go into a delay slot wins.  */
2719   else if (return_p && TARGET_V9 && ! epilogue_renumber (&pat, 1)
2720            && (get_attr_in_uncond_branch_delay (trial)
2721                == IN_UNCOND_BRANCH_DELAY_TRUE))
2722     return 1;
2723
2724   /* The 'restore src1,src2,dest' pattern for SImode.  */
2725   else if (GET_CODE (src) == PLUS
2726            && register_operand (XEXP (src, 0), SImode)
2727            && arith_operand (XEXP (src, 1), SImode))
2728     return 1;
2729
2730   /* The 'restore src1,src2,dest' pattern for DImode.  */
2731   else if (GET_CODE (src) == PLUS
2732            && register_operand (XEXP (src, 0), DImode)
2733            && arith_double_operand (XEXP (src, 1), DImode))
2734     return 1;
2735
2736   /* The 'restore src1,%lo(src2),dest' pattern.  */
2737   else if (GET_CODE (src) == LO_SUM
2738            && ! TARGET_CM_MEDMID
2739            && ((register_operand (XEXP (src, 0), SImode)
2740                 && immediate_operand (XEXP (src, 1), SImode))
2741                || (TARGET_ARCH64
2742                    && register_operand (XEXP (src, 0), DImode)
2743                    && immediate_operand (XEXP (src, 1), DImode))))
2744     return 1;
2745
2746   /* The 'restore src,src,dest' pattern.  */
2747   else if (GET_CODE (src) == ASHIFT
2748            && (register_operand (XEXP (src, 0), SImode)
2749                || register_operand (XEXP (src, 0), DImode))
2750            && XEXP (src, 1) == const1_rtx)
2751     return 1;
2752
2753   return 0;
2754 }
2755
2756 /* Return nonzero if TRIAL can go into the function return's
2757    delay slot.  */
2758
2759 int
2760 eligible_for_return_delay (rtx trial)
2761 {
2762   rtx pat;
2763
2764   if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2765     return 0;
2766
2767   if (get_attr_length (trial) != 1)
2768     return 0;
2769
2770   /* If there are any call-saved registers, we should scan TRIAL if it
2771      does not reference them.  For now just make it easy.  */
2772   if (num_gfregs)
2773     return 0;
2774
2775   /* If the function uses __builtin_eh_return, the eh_return machinery
2776      occupies the delay slot.  */
2777   if (crtl->calls_eh_return)
2778     return 0;
2779
2780   /* In the case of a true leaf function, anything can go into the slot.  */
2781   if (sparc_leaf_function_p)
2782     return get_attr_in_uncond_branch_delay (trial)
2783            == IN_UNCOND_BRANCH_DELAY_TRUE;
2784
2785   pat = PATTERN (trial);
2786
2787   /* Otherwise, only operations which can be done in tandem with
2788      a `restore' or `return' insn can go into the delay slot.  */
2789   if (GET_CODE (SET_DEST (pat)) != REG
2790       || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24))
2791     return 0;
2792
2793   /* If this instruction sets up floating point register and we have a return
2794      instruction, it can probably go in.  But restore will not work
2795      with FP_REGS.  */
2796   if (REGNO (SET_DEST (pat)) >= 32)
2797     return (TARGET_V9
2798             && ! epilogue_renumber (&pat, 1)
2799             && (get_attr_in_uncond_branch_delay (trial)
2800                 == IN_UNCOND_BRANCH_DELAY_TRUE));
2801
2802   return eligible_for_restore_insn (trial, true);
2803 }
2804
2805 /* Return nonzero if TRIAL can go into the sibling call's
2806    delay slot.  */
2807
2808 int
2809 eligible_for_sibcall_delay (rtx trial)
2810 {
2811   rtx pat;
2812
2813   if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2814     return 0;
2815
2816   if (get_attr_length (trial) != 1)
2817     return 0;
2818
2819   pat = PATTERN (trial);
2820
2821   if (sparc_leaf_function_p)
2822     {
2823       /* If the tail call is done using the call instruction,
2824          we have to restore %o7 in the delay slot.  */
2825       if (LEAF_SIBCALL_SLOT_RESERVED_P)
2826         return 0;
2827
2828       /* %g1 is used to build the function address */
2829       if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
2830         return 0;
2831
2832       return 1;
2833     }
2834
2835   /* Otherwise, only operations which can be done in tandem with
2836      a `restore' insn can go into the delay slot.  */
2837   if (GET_CODE (SET_DEST (pat)) != REG
2838       || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
2839       || REGNO (SET_DEST (pat)) >= 32)
2840     return 0;
2841
2842   /* If it mentions %o7, it can't go in, because sibcall will clobber it
2843      in most cases.  */
2844   if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
2845     return 0;
2846
2847   return eligible_for_restore_insn (trial, false);
2848 }
2849
2850 int
2851 short_branch (int uid1, int uid2)
2852 {
2853   int delta = INSN_ADDRESSES (uid1) - INSN_ADDRESSES (uid2);
2854
2855   /* Leave a few words of "slop".  */
2856   if (delta >= -1023 && delta <= 1022)
2857     return 1;
2858
2859   return 0;
2860 }
2861
2862 /* Return nonzero if REG is not used after INSN.
2863    We assume REG is a reload reg, and therefore does
2864    not live past labels or calls or jumps.  */
2865 int
2866 reg_unused_after (rtx reg, rtx insn)
2867 {
2868   enum rtx_code code, prev_code = UNKNOWN;
2869
2870   while ((insn = NEXT_INSN (insn)))
2871     {
2872       if (prev_code == CALL_INSN && call_used_regs[REGNO (reg)])
2873         return 1;
2874
2875       code = GET_CODE (insn);
2876       if (GET_CODE (insn) == CODE_LABEL)
2877         return 1;
2878
2879       if (INSN_P (insn))
2880         {
2881           rtx set = single_set (insn);
2882           int in_src = set && reg_overlap_mentioned_p (reg, SET_SRC (set));
2883           if (set && in_src)
2884             return 0;
2885           if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
2886             return 1;
2887           if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
2888             return 0;
2889         }
2890       prev_code = code;
2891     }
2892   return 1;
2893 }
2894 \f
2895 /* Determine if it's legal to put X into the constant pool.  This
2896    is not possible if X contains the address of a symbol that is
2897    not constant (TLS) or not known at final link time (PIC).  */
2898
2899 static bool
2900 sparc_cannot_force_const_mem (rtx x)
2901 {
2902   switch (GET_CODE (x))
2903     {
2904     case CONST_INT:
2905     case CONST_DOUBLE:
2906     case CONST_VECTOR:
2907       /* Accept all non-symbolic constants.  */
2908       return false;
2909
2910     case LABEL_REF:
2911       /* Labels are OK iff we are non-PIC.  */
2912       return flag_pic != 0;
2913
2914     case SYMBOL_REF:
2915       /* 'Naked' TLS symbol references are never OK,
2916          non-TLS symbols are OK iff we are non-PIC.  */
2917       if (SYMBOL_REF_TLS_MODEL (x))
2918         return true;
2919       else
2920         return flag_pic != 0;
2921
2922     case CONST:
2923       return sparc_cannot_force_const_mem (XEXP (x, 0));
2924     case PLUS:
2925     case MINUS:
2926       return sparc_cannot_force_const_mem (XEXP (x, 0))
2927          || sparc_cannot_force_const_mem (XEXP (x, 1));
2928     case UNSPEC:
2929       return true;
2930     default:
2931       gcc_unreachable ();
2932     }
2933 }
2934 \f
2935 /* PIC support.  */
2936 static GTY(()) bool pic_helper_needed = false;
2937 static GTY(()) rtx pic_helper_symbol;
2938 static GTY(()) rtx global_offset_table;
2939
2940 /* Ensure that we are not using patterns that are not OK with PIC.  */
2941
2942 int
2943 check_pic (int i)
2944 {
2945   switch (flag_pic)
2946     {
2947     case 1:
2948       gcc_assert (GET_CODE (recog_data.operand[i]) != SYMBOL_REF
2949                   && (GET_CODE (recog_data.operand[i]) != CONST
2950                   || (GET_CODE (XEXP (recog_data.operand[i], 0)) == MINUS
2951                       && (XEXP (XEXP (recog_data.operand[i], 0), 0)
2952                           == global_offset_table)
2953                       && (GET_CODE (XEXP (XEXP (recog_data.operand[i], 0), 1))
2954                           == CONST))));
2955     case 2:
2956     default:
2957       return 1;
2958     }
2959 }
2960
2961 /* Return true if X is an address which needs a temporary register when
2962    reloaded while generating PIC code.  */
2963
2964 int
2965 pic_address_needs_scratch (rtx x)
2966 {
2967   /* An address which is a symbolic plus a non SMALL_INT needs a temp reg.  */
2968   if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
2969       && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
2970       && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2971       && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
2972     return 1;
2973
2974   return 0;
2975 }
2976
2977 /* Determine if a given RTX is a valid constant.  We already know this
2978    satisfies CONSTANT_P.  */
2979
2980 bool
2981 legitimate_constant_p (rtx x)
2982 {
2983   switch (GET_CODE (x))
2984     {
2985     case CONST:
2986     case SYMBOL_REF:
2987       if (sparc_tls_referenced_p (x))
2988         return false;
2989       break;
2990
2991     case CONST_DOUBLE:
2992       if (GET_MODE (x) == VOIDmode)
2993         return true;
2994
2995       /* Floating point constants are generally not ok.
2996          The only exception is 0.0 in VIS.  */
2997       if (TARGET_VIS
2998           && SCALAR_FLOAT_MODE_P (GET_MODE (x))
2999           && const_zero_operand (x, GET_MODE (x)))
3000         return true;
3001
3002       return false;
3003
3004     case CONST_VECTOR:
3005       /* Vector constants are generally not ok.
3006          The only exception is 0 in VIS.  */
3007       if (TARGET_VIS
3008           && const_zero_operand (x, GET_MODE (x)))
3009         return true;
3010
3011       return false;
3012
3013     default:
3014       break;
3015     }
3016
3017   return true;
3018 }
3019
3020 /* Determine if a given RTX is a valid constant address.  */
3021
3022 bool
3023 constant_address_p (rtx x)
3024 {
3025   switch (GET_CODE (x))
3026     {
3027     case LABEL_REF:
3028     case CONST_INT:
3029     case HIGH:
3030       return true;
3031
3032     case CONST:
3033       if (flag_pic && pic_address_needs_scratch (x))
3034         return false;
3035       return legitimate_constant_p (x);
3036
3037     case SYMBOL_REF:
3038       return !flag_pic && legitimate_constant_p (x);
3039
3040     default:
3041       return false;
3042     }
3043 }
3044
3045 /* Nonzero if the constant value X is a legitimate general operand
3046    when generating PIC code.  It is given that flag_pic is on and
3047    that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
3048
3049 bool
3050 legitimate_pic_operand_p (rtx x)
3051 {
3052   if (pic_address_needs_scratch (x))
3053     return false;
3054   if (sparc_tls_referenced_p (x))
3055     return false;
3056   return true;
3057 }
3058
3059 /* Return nonzero if ADDR is a valid memory address.
3060    STRICT specifies whether strict register checking applies.  */
3061
3062 static bool
3063 sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3064 {
3065   rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3066
3067   if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3068     rs1 = addr;
3069   else if (GET_CODE (addr) == PLUS)
3070     {
3071       rs1 = XEXP (addr, 0);
3072       rs2 = XEXP (addr, 1);
3073
3074       /* Canonicalize.  REG comes first, if there are no regs,
3075          LO_SUM comes first.  */
3076       if (!REG_P (rs1)
3077           && GET_CODE (rs1) != SUBREG
3078           && (REG_P (rs2)
3079               || GET_CODE (rs2) == SUBREG
3080               || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3081         {
3082           rs1 = XEXP (addr, 1);
3083           rs2 = XEXP (addr, 0);
3084         }
3085
3086       if ((flag_pic == 1
3087            && rs1 == pic_offset_table_rtx
3088            && !REG_P (rs2)
3089            && GET_CODE (rs2) != SUBREG
3090            && GET_CODE (rs2) != LO_SUM
3091            && GET_CODE (rs2) != MEM
3092            && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3093            && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3094            && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3095           || ((REG_P (rs1)
3096                || GET_CODE (rs1) == SUBREG)
3097               && RTX_OK_FOR_OFFSET_P (rs2)))
3098         {
3099           imm1 = rs2;
3100           rs2 = NULL;
3101         }
3102       else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3103                && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3104         {
3105           /* We prohibit REG + REG for TFmode when there are no quad move insns
3106              and we consequently need to split.  We do this because REG+REG
3107              is not an offsettable address.  If we get the situation in reload
3108              where source and destination of a movtf pattern are both MEMs with
3109              REG+REG address, then only one of them gets converted to an
3110              offsettable address.  */
3111           if (mode == TFmode
3112               && ! (TARGET_FPU && TARGET_ARCH64 && TARGET_HARD_QUAD))
3113             return 0;
3114
3115           /* We prohibit REG + REG on ARCH32 if not optimizing for
3116              DFmode/DImode because then mem_min_alignment is likely to be zero
3117              after reload and the  forced split would lack a matching splitter
3118              pattern.  */
3119           if (TARGET_ARCH32 && !optimize
3120               && (mode == DFmode || mode == DImode))
3121             return 0;
3122         }
3123       else if (USE_AS_OFFSETABLE_LO10
3124                && GET_CODE (rs1) == LO_SUM
3125                && TARGET_ARCH64
3126                && ! TARGET_CM_MEDMID
3127                && RTX_OK_FOR_OLO10_P (rs2))
3128         {
3129           rs2 = NULL;
3130           imm1 = XEXP (rs1, 1);
3131           rs1 = XEXP (rs1, 0);
3132           if (!CONSTANT_P (imm1)
3133               || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3134             return 0;
3135         }
3136     }
3137   else if (GET_CODE (addr) == LO_SUM)
3138     {
3139       rs1 = XEXP (addr, 0);
3140       imm1 = XEXP (addr, 1);
3141
3142       if (!CONSTANT_P (imm1)
3143           || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3144         return 0;
3145
3146       /* We can't allow TFmode in 32-bit mode, because an offset greater
3147          than the alignment (8) may cause the LO_SUM to overflow.  */
3148       if (mode == TFmode && TARGET_ARCH32)
3149         return 0;
3150     }
3151   else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
3152     return 1;
3153   else
3154     return 0;
3155
3156   if (GET_CODE (rs1) == SUBREG)
3157     rs1 = SUBREG_REG (rs1);
3158   if (!REG_P (rs1))
3159     return 0;
3160
3161   if (rs2)
3162     {
3163       if (GET_CODE (rs2) == SUBREG)
3164         rs2 = SUBREG_REG (rs2);
3165       if (!REG_P (rs2))
3166         return 0;
3167     }
3168
3169   if (strict)
3170     {
3171       if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
3172           || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
3173         return 0;
3174     }
3175   else
3176     {
3177       if ((REGNO (rs1) >= 32
3178            && REGNO (rs1) != FRAME_POINTER_REGNUM
3179            && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
3180           || (rs2
3181               && (REGNO (rs2) >= 32
3182                   && REGNO (rs2) != FRAME_POINTER_REGNUM
3183                   && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
3184         return 0;
3185     }
3186   return 1;
3187 }
3188
3189 /* Construct the SYMBOL_REF for the tls_get_offset function.  */
3190
3191 static GTY(()) rtx sparc_tls_symbol;
3192
3193 static rtx
3194 sparc_tls_get_addr (void)
3195 {
3196   if (!sparc_tls_symbol)
3197     sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
3198
3199   return sparc_tls_symbol;
3200 }
3201
3202 static rtx
3203 sparc_tls_got (void)
3204 {
3205   rtx temp;
3206   if (flag_pic)
3207     {
3208       crtl->uses_pic_offset_table = 1;
3209       return pic_offset_table_rtx;
3210     }
3211
3212   if (!global_offset_table)
3213     global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3214   temp = gen_reg_rtx (Pmode);
3215   emit_move_insn (temp, global_offset_table);
3216   return temp;
3217 }
3218
3219 /* Return true if X contains a thread-local symbol.  */
3220
3221 static bool
3222 sparc_tls_referenced_p (rtx x)
3223 {
3224   if (!TARGET_HAVE_TLS)
3225     return false;
3226
3227   if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3228     x = XEXP (XEXP (x, 0), 0);
3229
3230   if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
3231     return true;
3232
3233   /* That's all we handle in sparc_legitimize_tls_address for now.  */
3234   return false;
3235 }
3236
3237 /* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
3238    this (thread-local) address.  */
3239
3240 static rtx
3241 sparc_legitimize_tls_address (rtx addr)
3242 {
3243   rtx temp1, temp2, temp3, ret, o0, got, insn;
3244
3245   gcc_assert (can_create_pseudo_p ());
3246
3247   if (GET_CODE (addr) == SYMBOL_REF)
3248     switch (SYMBOL_REF_TLS_MODEL (addr))
3249       {
3250       case TLS_MODEL_GLOBAL_DYNAMIC:
3251         start_sequence ();
3252         temp1 = gen_reg_rtx (SImode);
3253         temp2 = gen_reg_rtx (SImode);
3254         ret = gen_reg_rtx (Pmode);
3255         o0 = gen_rtx_REG (Pmode, 8);
3256         got = sparc_tls_got ();
3257         emit_insn (gen_tgd_hi22 (temp1, addr));
3258         emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
3259         if (TARGET_ARCH32)
3260           {
3261             emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
3262             insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
3263                                                    addr, const1_rtx));
3264           }
3265         else
3266           {
3267             emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
3268             insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
3269                                                    addr, const1_rtx));
3270           }
3271         CALL_INSN_FUNCTION_USAGE (insn)
3272           = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, o0),
3273                                CALL_INSN_FUNCTION_USAGE (insn));
3274         insn = get_insns ();
3275         end_sequence ();
3276         emit_libcall_block (insn, ret, o0, addr);
3277         break;
3278
3279       case TLS_MODEL_LOCAL_DYNAMIC:
3280         start_sequence ();
3281         temp1 = gen_reg_rtx (SImode);
3282         temp2 = gen_reg_rtx (SImode);
3283         temp3 = gen_reg_rtx (Pmode);
3284         ret = gen_reg_rtx (Pmode);
3285         o0 = gen_rtx_REG (Pmode, 8);
3286         got = sparc_tls_got ();
3287         emit_insn (gen_tldm_hi22 (temp1));
3288         emit_insn (gen_tldm_lo10 (temp2, temp1));
3289         if (TARGET_ARCH32)
3290           {
3291             emit_insn (gen_tldm_add32 (o0, got, temp2));
3292             insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
3293                                                     const1_rtx));
3294           }
3295         else
3296           {
3297             emit_insn (gen_tldm_add64 (o0, got, temp2));
3298             insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
3299                                                     const1_rtx));
3300           }
3301         CALL_INSN_FUNCTION_USAGE (insn)
3302           = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, o0),
3303                                CALL_INSN_FUNCTION_USAGE (insn));
3304         insn = get_insns ();
3305         end_sequence ();
3306         emit_libcall_block (insn, temp3, o0,
3307                             gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
3308                                             UNSPEC_TLSLD_BASE));
3309         temp1 = gen_reg_rtx (SImode);
3310         temp2 = gen_reg_rtx (SImode);
3311         emit_insn (gen_tldo_hix22 (temp1, addr));
3312         emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
3313         if (TARGET_ARCH32)
3314           emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
3315         else
3316           emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
3317         break;
3318
3319       case TLS_MODEL_INITIAL_EXEC:
3320         temp1 = gen_reg_rtx (SImode);
3321         temp2 = gen_reg_rtx (SImode);
3322         temp3 = gen_reg_rtx (Pmode);
3323         got = sparc_tls_got ();
3324         emit_insn (gen_tie_hi22 (temp1, addr));
3325         emit_insn (gen_tie_lo10 (temp2, temp1, addr));
3326         if (TARGET_ARCH32)
3327           emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
3328         else
3329           emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
3330         if (TARGET_SUN_TLS)
3331           {
3332             ret = gen_reg_rtx (Pmode);
3333             if (TARGET_ARCH32)
3334               emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
3335                                         temp3, addr));
3336             else
3337               emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
3338                                         temp3, addr));
3339           }
3340         else
3341           ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
3342         break;
3343
3344       case TLS_MODEL_LOCAL_EXEC:
3345         temp1 = gen_reg_rtx (Pmode);
3346         temp2 = gen_reg_rtx (Pmode);
3347         if (TARGET_ARCH32)
3348           {
3349             emit_insn (gen_tle_hix22_sp32 (temp1, addr));
3350             emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
3351           }
3352         else
3353           {
3354             emit_insn (gen_tle_hix22_sp64 (temp1, addr));
3355             emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
3356           }
3357         ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
3358         break;
3359
3360       default:
3361         gcc_unreachable ();
3362       }
3363
3364   else if (GET_CODE (addr) == CONST)
3365     {
3366       rtx base, offset;
3367
3368       gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
3369
3370       base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
3371       offset = XEXP (XEXP (addr, 0), 1);
3372
3373       base = force_operand (base, NULL_RTX);
3374       if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
3375         offset = force_reg (Pmode, offset);
3376       ret = gen_rtx_PLUS (Pmode, base, offset);
3377     }
3378
3379   else
3380     gcc_unreachable ();  /* for now ... */
3381
3382   return ret;
3383 }
3384
3385 /* Legitimize PIC addresses.  If the address is already position-independent,
3386    we return ORIG.  Newly generated position-independent addresses go into a
3387    reg.  This is REG if nonzero, otherwise we allocate register(s) as
3388    necessary.  */
3389
3390 static rtx
3391 sparc_legitimize_pic_address (rtx orig, rtx reg)
3392 {
3393   bool gotdata_op = false;
3394
3395   if (GET_CODE (orig) == SYMBOL_REF
3396       /* See the comment in sparc_expand_move.  */
3397       || (TARGET_VXWORKS_RTP && GET_CODE (orig) == LABEL_REF))
3398     {
3399       rtx pic_ref, address;
3400       rtx insn;
3401
3402       if (reg == 0)
3403         {
3404           gcc_assert (! reload_in_progress && ! reload_completed);
3405           reg = gen_reg_rtx (Pmode);
3406         }
3407
3408       if (flag_pic == 2)
3409         {
3410           /* If not during reload, allocate another temp reg here for loading
3411              in the address, so that these instructions can be optimized
3412              properly.  */
3413           rtx temp_reg = ((reload_in_progress || reload_completed)
3414                           ? reg : gen_reg_rtx (Pmode));
3415
3416           /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
3417              won't get confused into thinking that these two instructions
3418              are loading in the true address of the symbol.  If in the
3419              future a PIC rtx exists, that should be used instead.  */
3420           if (TARGET_ARCH64)
3421             {
3422               emit_insn (gen_movdi_high_pic (temp_reg, orig));
3423               emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
3424             }
3425           else
3426             {
3427               emit_insn (gen_movsi_high_pic (temp_reg, orig));
3428               emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
3429             }
3430           address = temp_reg;
3431           gotdata_op = true;
3432         }
3433       else
3434         address = orig;
3435
3436       crtl->uses_pic_offset_table = 1;
3437       if (gotdata_op)
3438         {
3439           if (TARGET_ARCH64)
3440             insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
3441                                                         pic_offset_table_rtx,
3442                                                         address, orig));
3443           else
3444             insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
3445                                                         pic_offset_table_rtx,
3446                                                         address, orig));
3447         }
3448       else
3449         {
3450           pic_ref = gen_const_mem (Pmode,
3451                                    gen_rtx_PLUS (Pmode,
3452                                                  pic_offset_table_rtx, address));
3453           insn = emit_move_insn (reg, pic_ref);
3454         }
3455       /* Put a REG_EQUAL note on this insn, so that it can be optimized
3456          by loop.  */
3457       set_unique_reg_note (insn, REG_EQUAL, orig);
3458       return reg;
3459     }
3460   else if (GET_CODE (orig) == CONST)
3461     {
3462       rtx base, offset;
3463
3464       if (GET_CODE (XEXP (orig, 0)) == PLUS
3465           && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
3466         return orig;
3467
3468       if (reg == 0)
3469         {
3470           gcc_assert (! reload_in_progress && ! reload_completed);
3471           reg = gen_reg_rtx (Pmode);
3472         }
3473
3474       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3475       base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
3476       offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
3477                                              base == reg ? NULL_RTX : reg);
3478
3479       if (GET_CODE (offset) == CONST_INT)
3480         {
3481           if (SMALL_INT (offset))
3482             return plus_constant (base, INTVAL (offset));
3483           else if (! reload_in_progress && ! reload_completed)
3484             offset = force_reg (Pmode, offset);
3485           else
3486             /* If we reach here, then something is seriously wrong.  */
3487             gcc_unreachable ();
3488         }
3489       return gen_rtx_PLUS (Pmode, base, offset);
3490     }
3491   else if (GET_CODE (orig) == LABEL_REF)
3492     /* ??? Why do we do this?  */
3493     /* Now movsi_pic_label_ref uses it, but we ought to be checking that
3494        the register is live instead, in case it is eliminated.  */
3495     crtl->uses_pic_offset_table = 1;
3496
3497   return orig;
3498 }
3499
3500 /* Try machine-dependent ways of modifying an illegitimate address X
3501    to be legitimate.  If we find one, return the new, valid address.
3502
3503    OLDX is the address as it was before break_out_memory_refs was called.
3504    In some cases it is useful to look at this to decide what needs to be done.
3505
3506    MODE is the mode of the operand pointed to by X.
3507
3508    On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG.  */
3509
3510 static rtx
3511 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3512                           enum machine_mode mode)
3513 {
3514   rtx orig_x = x;
3515
3516   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
3517     x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
3518                       force_operand (XEXP (x, 0), NULL_RTX));
3519   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
3520     x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3521                       force_operand (XEXP (x, 1), NULL_RTX));
3522   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
3523     x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
3524                       XEXP (x, 1));
3525   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
3526     x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3527                       force_operand (XEXP (x, 1), NULL_RTX));
3528
3529   if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
3530     return x;
3531
3532   if (sparc_tls_referenced_p (x))
3533     x = sparc_legitimize_tls_address (x);
3534   else if (flag_pic)
3535     x = sparc_legitimize_pic_address (x, NULL_RTX);
3536   else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
3537     x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3538                       copy_to_mode_reg (Pmode, XEXP (x, 1)));
3539   else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
3540     x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
3541                       copy_to_mode_reg (Pmode, XEXP (x, 0)));
3542   else if (GET_CODE (x) == SYMBOL_REF
3543            || GET_CODE (x) == CONST
3544            || GET_CODE (x) == LABEL_REF)
3545     x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
3546
3547   return x;
3548 }
3549
3550 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS.  Returns a value to
3551    replace the input X, or the original X if no replacement is called for.
3552    The output parameter *WIN is 1 if the calling macro should goto WIN,
3553    0 if it should not.
3554
3555    For SPARC, we wish to handle addresses by splitting them into
3556    HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
3557    This cuts the number of extra insns by one.
3558
3559    Do nothing when generating PIC code and the address is a symbolic
3560    operand or requires a scratch register.  */
3561
3562 rtx
3563 sparc_legitimize_reload_address (rtx x, enum machine_mode mode,
3564                                  int opnum, int type,
3565                                  int ind_levels ATTRIBUTE_UNUSED, int *win)
3566 {
3567   /* Decompose SImode constants into HIGH+LO_SUM.  */
3568   if (CONSTANT_P (x)
3569       && (mode != TFmode || TARGET_ARCH64)
3570       && GET_MODE (x) == SImode
3571       && GET_CODE (x) != LO_SUM
3572       && GET_CODE (x) != HIGH
3573       && sparc_cmodel <= CM_MEDLOW
3574       && !(flag_pic
3575            && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
3576     {
3577       x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
3578       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3579                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3580                    opnum, (enum reload_type)type);
3581       *win = 1;
3582       return x;
3583     }
3584
3585   /* We have to recognize what we have already generated above.  */
3586   if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
3587     {
3588       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3589                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3590                    opnum, (enum reload_type)type);
3591       *win = 1;
3592       return x;
3593     }
3594
3595   *win = 0;
3596   return x;
3597 }
3598
3599 /* Return true if ADDR (a legitimate address expression)
3600    has an effect that depends on the machine mode it is used for.
3601
3602    In PIC mode,
3603
3604       (mem:HI [%l7+a])
3605
3606    is not equivalent to
3607
3608       (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
3609
3610    because [%l7+a+1] is interpreted as the address of (a+1).  */
3611
3612
3613 static bool
3614 sparc_mode_dependent_address_p (const_rtx addr)
3615 {
3616   if (flag_pic && GET_CODE (addr) == PLUS)
3617     {
3618       rtx op0 = XEXP (addr, 0);
3619       rtx op1 = XEXP (addr, 1);
3620       if (op0 == pic_offset_table_rtx
3621           && SYMBOLIC_CONST (op1))
3622         return true;
3623     }
3624
3625   return false;
3626 }
3627
3628 #ifdef HAVE_GAS_HIDDEN
3629 # define USE_HIDDEN_LINKONCE 1
3630 #else
3631 # define USE_HIDDEN_LINKONCE 0
3632 #endif
3633
3634 static void
3635 get_pc_thunk_name (char name[32], unsigned int regno)
3636 {
3637   const char *pic_name = reg_names[regno];
3638
3639   /* Skip the leading '%' as that cannot be used in a
3640      symbol name.  */
3641   pic_name += 1;
3642
3643   if (USE_HIDDEN_LINKONCE)
3644     sprintf (name, "__sparc_get_pc_thunk.%s", pic_name);
3645   else
3646     ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
3647 }
3648
3649 /* Emit code to load the PIC register.  */
3650
3651 static void
3652 load_pic_register (void)
3653 {
3654   int orig_flag_pic = flag_pic;
3655
3656   if (TARGET_VXWORKS_RTP)
3657     {
3658       emit_insn (gen_vxworks_load_got ());
3659       emit_use (pic_offset_table_rtx);
3660       return;
3661     }
3662
3663   /* If we haven't initialized the special PIC symbols, do so now.  */
3664   if (!pic_helper_needed)
3665     {
3666       char name[32];
3667
3668       pic_helper_needed = true;
3669
3670       get_pc_thunk_name (name, REGNO (pic_offset_table_rtx));
3671       pic_helper_symbol = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3672
3673       global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3674     }
3675
3676   flag_pic = 0;
3677   if (TARGET_ARCH64)
3678     emit_insn (gen_load_pcrel_symdi (pic_offset_table_rtx, global_offset_table,
3679                                      pic_helper_symbol));
3680   else
3681     emit_insn (gen_load_pcrel_symsi (pic_offset_table_rtx, global_offset_table,
3682                                      pic_helper_symbol));
3683   flag_pic = orig_flag_pic;
3684
3685   /* Need to emit this whether or not we obey regdecls,
3686      since setjmp/longjmp can cause life info to screw up.
3687      ??? In the case where we don't obey regdecls, this is not sufficient
3688      since we may not fall out the bottom.  */
3689   emit_use (pic_offset_table_rtx);
3690 }
3691
3692 /* Emit a call instruction with the pattern given by PAT.  ADDR is the
3693    address of the call target.  */
3694
3695 void
3696 sparc_emit_call_insn (rtx pat, rtx addr)
3697 {
3698   rtx insn;
3699
3700   insn = emit_call_insn (pat);
3701
3702   /* The PIC register is live on entry to VxWorks PIC PLT entries.  */
3703   if (TARGET_VXWORKS_RTP
3704       && flag_pic
3705       && GET_CODE (addr) == SYMBOL_REF
3706       && (SYMBOL_REF_DECL (addr)
3707           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
3708           : !SYMBOL_REF_LOCAL_P (addr)))
3709     {
3710       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
3711       crtl->uses_pic_offset_table = 1;
3712     }
3713 }
3714 \f
3715 /* Return 1 if RTX is a MEM which is known to be aligned to at
3716    least a DESIRED byte boundary.  */
3717
3718 int
3719 mem_min_alignment (rtx mem, int desired)
3720 {
3721   rtx addr, base, offset;
3722
3723   /* If it's not a MEM we can't accept it.  */
3724   if (GET_CODE (mem) != MEM)
3725     return 0;
3726
3727   /* Obviously...  */
3728   if (!TARGET_UNALIGNED_DOUBLES
3729       && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
3730     return 1;
3731
3732   /* ??? The rest of the function predates MEM_ALIGN so
3733      there is probably a bit of redundancy.  */
3734   addr = XEXP (mem, 0);
3735   base = offset = NULL_RTX;
3736   if (GET_CODE (addr) == PLUS)
3737     {
3738       if (GET_CODE (XEXP (addr, 0)) == REG)
3739         {
3740           base = XEXP (addr, 0);
3741
3742           /* What we are saying here is that if the base
3743              REG is aligned properly, the compiler will make
3744              sure any REG based index upon it will be so
3745              as well.  */
3746           if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
3747             offset = XEXP (addr, 1);
3748           else
3749             offset = const0_rtx;
3750         }
3751     }
3752   else if (GET_CODE (addr) == REG)
3753     {
3754       base = addr;
3755       offset = const0_rtx;
3756     }
3757
3758   if (base != NULL_RTX)
3759     {
3760       int regno = REGNO (base);
3761
3762       if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
3763         {
3764           /* Check if the compiler has recorded some information
3765              about the alignment of the base REG.  If reload has
3766              completed, we already matched with proper alignments.
3767              If not running global_alloc, reload might give us
3768              unaligned pointer to local stack though.  */
3769           if (((cfun != 0
3770                 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
3771                || (optimize && reload_completed))
3772               && (INTVAL (offset) & (desired - 1)) == 0)
3773             return 1;
3774         }
3775       else
3776         {
3777           if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
3778             return 1;
3779         }
3780     }
3781   else if (! TARGET_UNALIGNED_DOUBLES
3782            || CONSTANT_P (addr)
3783            || GET_CODE (addr) == LO_SUM)
3784     {
3785       /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
3786          is true, in which case we can only assume that an access is aligned if
3787          it is to a constant address, or the address involves a LO_SUM.  */
3788       return 1;
3789     }
3790
3791   /* An obviously unaligned address.  */
3792   return 0;
3793 }
3794
3795 \f
3796 /* Vectors to keep interesting information about registers where it can easily
3797    be got.  We used to use the actual mode value as the bit number, but there
3798    are more than 32 modes now.  Instead we use two tables: one indexed by
3799    hard register number, and one indexed by mode.  */
3800
3801 /* The purpose of sparc_mode_class is to shrink the range of modes so that
3802    they all fit (as bit numbers) in a 32-bit word (again).  Each real mode is
3803    mapped into one sparc_mode_class mode.  */
3804
3805 enum sparc_mode_class {
3806   S_MODE, D_MODE, T_MODE, O_MODE,
3807   SF_MODE, DF_MODE, TF_MODE, OF_MODE,
3808   CC_MODE, CCFP_MODE
3809 };
3810
3811 /* Modes for single-word and smaller quantities.  */
3812 #define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
3813
3814 /* Modes for double-word and smaller quantities.  */
3815 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
3816
3817 /* Modes for quad-word and smaller quantities.  */
3818 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
3819
3820 /* Modes for 8-word and smaller quantities.  */
3821 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
3822
3823 /* Modes for single-float quantities.  We must allow any single word or
3824    smaller quantity.  This is because the fix/float conversion instructions
3825    take integer inputs/outputs from the float registers.  */
3826 #define SF_MODES (S_MODES)
3827
3828 /* Modes for double-float and smaller quantities.  */
3829 #define DF_MODES (D_MODES)
3830
3831 /* Modes for quad-float and smaller quantities.  */
3832 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
3833
3834 /* Modes for quad-float pairs and smaller quantities.  */
3835 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
3836
3837 /* Modes for double-float only quantities.  */
3838 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
3839
3840 /* Modes for quad-float and double-float only quantities.  */
3841 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
3842
3843 /* Modes for quad-float pairs and double-float only quantities.  */
3844 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
3845
3846 /* Modes for condition codes.  */
3847 #define CC_MODES (1 << (int) CC_MODE)
3848 #define CCFP_MODES (1 << (int) CCFP_MODE)
3849
3850 /* Value is 1 if register/mode pair is acceptable on sparc.
3851    The funny mixture of D and T modes is because integer operations
3852    do not specially operate on tetra quantities, so non-quad-aligned
3853    registers can hold quadword quantities (except %o4 and %i4 because
3854    they cross fixed registers).  */
3855
3856 /* This points to either the 32 bit or the 64 bit version.  */
3857 const int *hard_regno_mode_classes;
3858
3859 static const int hard_32bit_mode_classes[] = {
3860   S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
3861   T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
3862   T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
3863   T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
3864
3865   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3866   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3867   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3868   OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
3869
3870   /* FP regs f32 to f63.  Only the even numbered registers actually exist,
3871      and none can hold SFmode/SImode values.  */
3872   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3873   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3874   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3875   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3876
3877   /* %fcc[0123] */
3878   CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
3879
3880   /* %icc */
3881   CC_MODES
3882 };
3883
3884 static const int hard_64bit_mode_classes[] = {
3885   D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3886   O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3887   T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3888   O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3889
3890   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3891   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3892   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3893   OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
3894
3895   /* FP regs f32 to f63.  Only the even numbered registers actually exist,
3896      and none can hold SFmode/SImode values.  */
3897   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3898   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3899   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3900   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3901
3902   /* %fcc[0123] */
3903   CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
3904
3905   /* %icc */
3906   CC_MODES
3907 };
3908
3909 int sparc_mode_class [NUM_MACHINE_MODES];
3910
3911 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
3912
3913 static void
3914 sparc_init_modes (void)
3915 {
3916   int i;
3917
3918   for (i = 0; i < NUM_MACHINE_MODES; i++)
3919     {
3920       switch (GET_MODE_CLASS (i))
3921         {
3922         case MODE_INT:
3923         case MODE_PARTIAL_INT:
3924         case MODE_COMPLEX_INT:
3925           if (GET_MODE_SIZE (i) <= 4)
3926             sparc_mode_class[i] = 1 << (int) S_MODE;
3927           else if (GET_MODE_SIZE (i) == 8)
3928             sparc_mode_class[i] = 1 << (int) D_MODE;
3929           else if (GET_MODE_SIZE (i) == 16)
3930             sparc_mode_class[i] = 1 << (int) T_MODE;
3931           else if (GET_MODE_SIZE (i) == 32)
3932             sparc_mode_class[i] = 1 << (int) O_MODE;
3933           else
3934             sparc_mode_class[i] = 0;
3935           break;
3936         case MODE_VECTOR_INT:
3937           if (GET_MODE_SIZE (i) <= 4)
3938             sparc_mode_class[i] = 1 << (int)SF_MODE;
3939           else if (GET_MODE_SIZE (i) == 8)
3940             sparc_mode_class[i] = 1 << (int)DF_MODE;
3941           break;
3942         case MODE_FLOAT:
3943         case MODE_COMPLEX_FLOAT:
3944           if (GET_MODE_SIZE (i) <= 4)
3945             sparc_mode_class[i] = 1 << (int) SF_MODE;
3946           else if (GET_MODE_SIZE (i) == 8)
3947             sparc_mode_class[i] = 1 << (int) DF_MODE;
3948           else if (GET_MODE_SIZE (i) == 16)
3949             sparc_mode_class[i] = 1 << (int) TF_MODE;
3950           else if (GET_MODE_SIZE (i) == 32)
3951             sparc_mode_class[i] = 1 << (int) OF_MODE;
3952           else
3953             sparc_mode_class[i] = 0;
3954           break;
3955         case MODE_CC:
3956           if (i == (int) CCFPmode || i == (int) CCFPEmode)
3957             sparc_mode_class[i] = 1 << (int) CCFP_MODE;
3958           else
3959             sparc_mode_class[i] = 1 << (int) CC_MODE;
3960           break;
3961         default:
3962           sparc_mode_class[i] = 0;
3963           break;
3964         }
3965     }
3966
3967   if (TARGET_ARCH64)
3968     hard_regno_mode_classes = hard_64bit_mode_classes;
3969   else
3970     hard_regno_mode_classes = hard_32bit_mode_classes;
3971
3972   /* Initialize the array used by REGNO_REG_CLASS.  */
3973   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3974     {
3975       if (i < 16 && TARGET_V8PLUS)
3976         sparc_regno_reg_class[i] = I64_REGS;
3977       else if (i < 32 || i == FRAME_POINTER_REGNUM)
3978         sparc_regno_reg_class[i] = GENERAL_REGS;
3979       else if (i < 64)
3980         sparc_regno_reg_class[i] = FP_REGS;
3981       else if (i < 96)
3982         sparc_regno_reg_class[i] = EXTRA_FP_REGS;
3983       else if (i < 100)
3984         sparc_regno_reg_class[i] = FPCC_REGS;
3985       else
3986         sparc_regno_reg_class[i] = NO_REGS;
3987     }
3988 }
3989 \f
3990 /* Compute the frame size required by the function.  This function is called
3991    during the reload pass and also by sparc_expand_prologue.  */
3992
3993 HOST_WIDE_INT
3994 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function_p)
3995 {
3996   int outgoing_args_size = (crtl->outgoing_args_size
3997                             + REG_PARM_STACK_SPACE (current_function_decl));
3998   int n_regs = 0;  /* N_REGS is the number of 4-byte regs saved thus far.  */
3999   int i;
4000
4001   if (TARGET_ARCH64)
4002     {
4003       for (i = 0; i < 8; i++)
4004         if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4005           n_regs += 2;
4006     }
4007   else
4008     {
4009       for (i = 0; i < 8; i += 2)
4010         if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
4011             || (df_regs_ever_live_p (i+1) && ! call_used_regs[i+1]))
4012           n_regs += 2;
4013     }
4014
4015   for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
4016     if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
4017         || (df_regs_ever_live_p (i+1) && ! call_used_regs[i+1]))
4018       n_regs += 2;
4019
4020   /* Set up values for use in prologue and epilogue.  */
4021   num_gfregs = n_regs;
4022
4023   if (leaf_function_p
4024       && n_regs == 0
4025       && size == 0
4026       && crtl->outgoing_args_size == 0)
4027     actual_fsize = apparent_fsize = 0;
4028   else
4029     {
4030       /* We subtract STARTING_FRAME_OFFSET, remember it's negative.  */
4031       apparent_fsize = (size - STARTING_FRAME_OFFSET + 7) & -8;
4032       apparent_fsize += n_regs * 4;
4033       actual_fsize = apparent_fsize + ((outgoing_args_size + 7) & -8);
4034     }
4035
4036   /* Make sure nothing can clobber our register windows.
4037      If a SAVE must be done, or there is a stack-local variable,
4038      the register window area must be allocated.  */
4039   if (! leaf_function_p || size > 0)
4040     actual_fsize += FIRST_PARM_OFFSET (current_function_decl);
4041
4042   return SPARC_STACK_ALIGN (actual_fsize);
4043 }
4044
4045 /* Output any necessary .register pseudo-ops.  */
4046
4047 void
4048 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
4049 {
4050 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
4051   int i;
4052
4053   if (TARGET_ARCH32)
4054     return;
4055
4056   /* Check if %g[2367] were used without
4057      .register being printed for them already.  */
4058   for (i = 2; i < 8; i++)
4059     {
4060       if (df_regs_ever_live_p (i)
4061           && ! sparc_hard_reg_printed [i])
4062         {
4063           sparc_hard_reg_printed [i] = 1;
4064           /* %g7 is used as TLS base register, use #ignore
4065              for it instead of #scratch.  */
4066           fprintf (file, "\t.register\t%%g%d, #%s\n", i,
4067                    i == 7 ? "ignore" : "scratch");
4068         }
4069       if (i == 3) i = 5;
4070     }
4071 #endif
4072 }
4073
4074 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
4075
4076 #if PROBE_INTERVAL > 4096
4077 #error Cannot use indexed addressing mode for stack probing
4078 #endif
4079
4080 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
4081    inclusive.  These are offsets from the current stack pointer.
4082
4083    Note that we don't use the REG+REG addressing mode for the probes because
4084    of the stack bias in 64-bit mode.  And it doesn't really buy us anything
4085    so the advantages of having a single code win here.  */
4086
4087 static void
4088 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
4089 {
4090   rtx g1 = gen_rtx_REG (Pmode, 1);
4091
4092   /* See if we have a constant small number of probes to generate.  If so,
4093      that's the easy case.  */
4094   if (size <= PROBE_INTERVAL)
4095     {
4096       emit_move_insn (g1, GEN_INT (first));
4097       emit_insn (gen_rtx_SET (VOIDmode, g1,
4098                               gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4099       emit_stack_probe (plus_constant (g1, -size));
4100     }
4101
4102   /* The run-time loop is made up of 10 insns in the generic case while the
4103      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
4104   else if (size <= 5 * PROBE_INTERVAL)
4105     {
4106       HOST_WIDE_INT i;
4107
4108       emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
4109       emit_insn (gen_rtx_SET (VOIDmode, g1,
4110                               gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4111       emit_stack_probe (g1);
4112
4113       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
4114          it exceeds SIZE.  If only two probes are needed, this will not
4115          generate any code.  Then probe at FIRST + SIZE.  */
4116       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
4117         {
4118           emit_insn (gen_rtx_SET (VOIDmode, g1,
4119                                   plus_constant (g1, -PROBE_INTERVAL)));
4120           emit_stack_probe (g1);
4121         }
4122
4123       emit_stack_probe (plus_constant (g1, (i - PROBE_INTERVAL) - size));
4124     }
4125
4126   /* Otherwise, do the same as above, but in a loop.  Note that we must be
4127      extra careful with variables wrapping around because we might be at
4128      the very top (or the very bottom) of the address space and we have
4129      to be able to handle this case properly; in particular, we use an
4130      equality test for the loop condition.  */
4131   else
4132     {
4133       HOST_WIDE_INT rounded_size;
4134       rtx g4 = gen_rtx_REG (Pmode, 4);
4135
4136       emit_move_insn (g1, GEN_INT (first));
4137
4138
4139       /* Step 1: round SIZE to the previous multiple of the interval.  */
4140
4141       rounded_size = size & -PROBE_INTERVAL;
4142       emit_move_insn (g4, GEN_INT (rounded_size));
4143
4144
4145       /* Step 2: compute initial and final value of the loop counter.  */
4146
4147       /* TEST_ADDR = SP + FIRST.  */
4148       emit_insn (gen_rtx_SET (VOIDmode, g1,
4149                               gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4150
4151       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
4152       emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4)));
4153
4154
4155       /* Step 3: the loop
4156
4157          while (TEST_ADDR != LAST_ADDR)
4158            {
4159              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
4160              probe at TEST_ADDR
4161            }
4162
4163          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
4164          until it is equal to ROUNDED_SIZE.  */
4165
4166       if (TARGET_64BIT)
4167         emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
4168       else
4169         emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
4170
4171
4172       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
4173          that SIZE is equal to ROUNDED_SIZE.  */
4174
4175       if (size != rounded_size)
4176         emit_stack_probe (plus_constant (g4, rounded_size - size));
4177     }
4178
4179   /* Make sure nothing is scheduled before we are done.  */
4180   emit_insn (gen_blockage ());
4181 }
4182
4183 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
4184    absolute addresses.  */
4185
4186 const char *
4187 output_probe_stack_range (rtx reg1, rtx reg2)
4188 {
4189   static int labelno = 0;
4190   char loop_lab[32], end_lab[32];
4191   rtx xops[2];
4192
4193   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
4194   ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
4195
4196   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
4197
4198    /* Jump to END_LAB if TEST_ADDR == LAST_ADDR.  */
4199   xops[0] = reg1;
4200   xops[1] = reg2;
4201   output_asm_insn ("cmp\t%0, %1", xops);
4202   if (TARGET_ARCH64)
4203     fputs ("\tbe,pn\t%xcc,", asm_out_file);
4204   else
4205     fputs ("\tbe\t", asm_out_file);
4206   assemble_name_raw (asm_out_file, end_lab);
4207   fputc ('\n', asm_out_file);
4208
4209   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
4210   xops[1] = GEN_INT (-PROBE_INTERVAL);
4211   output_asm_insn (" add\t%0, %1, %0", xops);
4212
4213   /* Probe at TEST_ADDR and branch.  */
4214   if (TARGET_ARCH64)
4215     fputs ("\tba,pt\t%xcc,", asm_out_file);
4216   else
4217     fputs ("\tba\t", asm_out_file);
4218   assemble_name_raw (asm_out_file, loop_lab);
4219   fputc ('\n', asm_out_file);
4220   xops[1] = GEN_INT (SPARC_STACK_BIAS);
4221   output_asm_insn (" st\t%%g0, [%0+%1]", xops);
4222
4223   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
4224
4225   return "";
4226 }
4227
4228 /* Save/restore call-saved registers from LOW to HIGH at BASE+OFFSET
4229    as needed.  LOW should be double-word aligned for 32-bit registers.
4230    Return the new OFFSET.  */
4231
4232 #define SORR_SAVE    0
4233 #define SORR_RESTORE 1
4234
4235 static int
4236 save_or_restore_regs (int low, int high, rtx base, int offset, int action)
4237 {
4238   rtx mem, insn;
4239   int i;
4240
4241   if (TARGET_ARCH64 && high <= 32)
4242     {
4243       for (i = low; i < high; i++)
4244         {
4245           if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4246             {
4247               mem = gen_rtx_MEM (DImode, plus_constant (base, offset));
4248               set_mem_alias_set (mem, sparc_sr_alias_set);
4249               if (action == SORR_SAVE)
4250                 {
4251                   insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
4252                   RTX_FRAME_RELATED_P (insn) = 1;
4253                 }
4254               else  /* action == SORR_RESTORE */
4255                 emit_move_insn (gen_rtx_REG (DImode, i), mem);
4256               offset += 8;
4257             }
4258         }
4259     }
4260   else
4261     {
4262       for (i = low; i < high; i += 2)
4263         {
4264           bool reg0 = df_regs_ever_live_p (i) && ! call_used_regs[i];
4265           bool reg1 = df_regs_ever_live_p (i+1) && ! call_used_regs[i+1];
4266           enum machine_mode mode;
4267           int regno;
4268
4269           if (reg0 && reg1)
4270             {
4271               mode = i < 32 ? DImode : DFmode;
4272               regno = i;
4273             }
4274           else if (reg0)
4275             {
4276               mode = i < 32 ? SImode : SFmode;
4277               regno = i;
4278             }
4279           else if (reg1)
4280             {
4281               mode = i < 32 ? SImode : SFmode;
4282               regno = i + 1;
4283               offset += 4;
4284             }
4285           else
4286             continue;
4287
4288           mem = gen_rtx_MEM (mode, plus_constant (base, offset));
4289           set_mem_alias_set (mem, sparc_sr_alias_set);
4290           if (action == SORR_SAVE)
4291             {
4292               insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
4293               RTX_FRAME_RELATED_P (insn) = 1;
4294             }
4295           else  /* action == SORR_RESTORE */
4296             emit_move_insn (gen_rtx_REG (mode, regno), mem);
4297
4298           /* Always preserve double-word alignment.  */
4299           offset = (offset + 7) & -8;
4300         }
4301     }
4302
4303   return offset;
4304 }
4305
4306 /* Emit code to save call-saved registers.  */
4307
4308 static void
4309 emit_save_or_restore_regs (int action)
4310 {
4311   HOST_WIDE_INT offset;
4312   rtx base;
4313
4314   offset = frame_base_offset - apparent_fsize;
4315
4316   if (offset < -4096 || offset + num_gfregs * 4 > 4095)
4317     {
4318       /* ??? This might be optimized a little as %g1 might already have a
4319          value close enough that a single add insn will do.  */
4320       /* ??? Although, all of this is probably only a temporary fix
4321          because if %g1 can hold a function result, then
4322          sparc_expand_epilogue will lose (the result will be
4323          clobbered).  */
4324       base = gen_rtx_REG (Pmode, 1);
4325       emit_move_insn (base, GEN_INT (offset));
4326       emit_insn (gen_rtx_SET (VOIDmode,
4327                               base,
4328                               gen_rtx_PLUS (Pmode, frame_base_reg, base)));
4329       offset = 0;
4330     }
4331   else
4332     base = frame_base_reg;
4333
4334   offset = save_or_restore_regs (0, 8, base, offset, action);
4335   save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, action);
4336 }
4337
4338 /* Generate a save_register_window insn.  */
4339
4340 static rtx
4341 gen_save_register_window (rtx increment)
4342 {
4343   if (TARGET_ARCH64)
4344     return gen_save_register_windowdi (increment);
4345   else
4346     return gen_save_register_windowsi (increment);
4347 }
4348
4349 /* Generate an increment for the stack pointer.  */
4350
4351 static rtx
4352 gen_stack_pointer_inc (rtx increment)
4353 {
4354   return gen_rtx_SET (VOIDmode,
4355                       stack_pointer_rtx,
4356                       gen_rtx_PLUS (Pmode,
4357                                     stack_pointer_rtx,
4358                                     increment));
4359 }
4360
4361 /* Generate a decrement for the stack pointer.  */
4362
4363 static rtx
4364 gen_stack_pointer_dec (rtx decrement)
4365 {
4366   return gen_rtx_SET (VOIDmode,
4367                       stack_pointer_rtx,
4368                       gen_rtx_MINUS (Pmode,
4369                                      stack_pointer_rtx,
4370                                      decrement));
4371 }
4372
4373 /* Expand the function prologue.  The prologue is responsible for reserving
4374    storage for the frame, saving the call-saved registers and loading the
4375    PIC register if needed.  */
4376
4377 void
4378 sparc_expand_prologue (void)
4379 {
4380   rtx insn;
4381   int i;
4382
4383   /* Compute a snapshot of current_function_uses_only_leaf_regs.  Relying
4384      on the final value of the flag means deferring the prologue/epilogue
4385      expansion until just before the second scheduling pass, which is too
4386      late to emit multiple epilogues or return insns.
4387
4388      Of course we are making the assumption that the value of the flag
4389      will not change between now and its final value.  Of the three parts
4390      of the formula, only the last one can reasonably vary.  Let's take a
4391      closer look, after assuming that the first two ones are set to true
4392      (otherwise the last value is effectively silenced).
4393
4394      If only_leaf_regs_used returns false, the global predicate will also
4395      be false so the actual frame size calculated below will be positive.
4396      As a consequence, the save_register_window insn will be emitted in
4397      the instruction stream; now this insn explicitly references %fp
4398      which is not a leaf register so only_leaf_regs_used will always
4399      return false subsequently.
4400
4401      If only_leaf_regs_used returns true, we hope that the subsequent
4402      optimization passes won't cause non-leaf registers to pop up.  For
4403      example, the regrename pass has special provisions to not rename to
4404      non-leaf registers in a leaf function.  */
4405   sparc_leaf_function_p
4406     = optimize > 0 && leaf_function_p () && only_leaf_regs_used ();
4407
4408   /* Need to use actual_fsize, since we are also allocating
4409      space for our callee (and our own register save area).  */
4410   actual_fsize
4411     = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
4412
4413   /* Advertise that the data calculated just above are now valid.  */
4414   sparc_prologue_data_valid_p = true;
4415
4416   if (flag_stack_usage)
4417     current_function_static_stack_size = actual_fsize;
4418
4419   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && actual_fsize)
4420     sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, actual_fsize);
4421
4422   if (sparc_leaf_function_p)
4423     {
4424       frame_base_reg = stack_pointer_rtx;
4425       frame_base_offset = actual_fsize + SPARC_STACK_BIAS;
4426     }
4427   else
4428     {
4429       frame_base_reg = hard_frame_pointer_rtx;
4430       frame_base_offset = SPARC_STACK_BIAS;
4431     }
4432
4433   if (actual_fsize == 0)
4434     /* do nothing.  */ ;
4435   else if (sparc_leaf_function_p)
4436     {
4437       if (actual_fsize <= 4096)
4438         insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-actual_fsize)));
4439       else if (actual_fsize <= 8192)
4440         {
4441           insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
4442           /* %sp is still the CFA register.  */
4443           RTX_FRAME_RELATED_P (insn) = 1;
4444           insn
4445             = emit_insn (gen_stack_pointer_inc (GEN_INT (4096-actual_fsize)));
4446         }
4447       else
4448         {
4449           rtx reg = gen_rtx_REG (Pmode, 1);
4450           emit_move_insn (reg, GEN_INT (-actual_fsize));
4451           insn = emit_insn (gen_stack_pointer_inc (reg));
4452           add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4453                         gen_stack_pointer_inc (GEN_INT (-actual_fsize)));
4454         }
4455
4456       RTX_FRAME_RELATED_P (insn) = 1;
4457     }
4458   else
4459     {
4460       if (actual_fsize <= 4096)
4461         insn = emit_insn (gen_save_register_window (GEN_INT (-actual_fsize)));
4462       else if (actual_fsize <= 8192)
4463         {
4464           insn = emit_insn (gen_save_register_window (GEN_INT (-4096)));
4465           /* %sp is not the CFA register anymore.  */
4466           emit_insn (gen_stack_pointer_inc (GEN_INT (4096-actual_fsize)));
4467         }
4468       else
4469         {
4470           rtx reg = gen_rtx_REG (Pmode, 1);
4471           emit_move_insn (reg, GEN_INT (-actual_fsize));
4472           insn = emit_insn (gen_save_register_window (reg));
4473         }
4474
4475       RTX_FRAME_RELATED_P (insn) = 1;
4476       for (i=0; i < XVECLEN (PATTERN (insn), 0); i++)
4477         RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, i)) = 1;
4478     }
4479
4480   if (num_gfregs)
4481     emit_save_or_restore_regs (SORR_SAVE);
4482
4483   /* Load the PIC register if needed.  */
4484   if (flag_pic && crtl->uses_pic_offset_table)
4485     load_pic_register ();
4486 }
4487
4488 /* This function generates the assembly code for function entry, which boils
4489    down to emitting the necessary .register directives.  */
4490
4491 static void
4492 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4493 {
4494   /* Check that the assumption we made in sparc_expand_prologue is valid.  */
4495   gcc_assert (sparc_leaf_function_p == current_function_uses_only_leaf_regs);
4496
4497   sparc_output_scratch_registers (file);
4498 }
4499
4500 /* Expand the function epilogue, either normal or part of a sibcall.
4501    We emit all the instructions except the return or the call.  */
4502
4503 void
4504 sparc_expand_epilogue (void)
4505 {
4506   if (num_gfregs)
4507     emit_save_or_restore_regs (SORR_RESTORE);
4508
4509   if (actual_fsize == 0)
4510     /* do nothing.  */ ;
4511   else if (sparc_leaf_function_p)
4512     {
4513       if (actual_fsize <= 4096)
4514         emit_insn (gen_stack_pointer_dec (GEN_INT (- actual_fsize)));
4515       else if (actual_fsize <= 8192)
4516         {
4517           emit_insn (gen_stack_pointer_dec (GEN_INT (-4096)));
4518           emit_insn (gen_stack_pointer_dec (GEN_INT (4096 - actual_fsize)));
4519         }
4520       else
4521         {
4522           rtx reg = gen_rtx_REG (Pmode, 1);
4523           emit_move_insn (reg, GEN_INT (-actual_fsize));
4524           emit_insn (gen_stack_pointer_dec (reg));
4525         }
4526     }
4527 }
4528
4529 /* Return true if it is appropriate to emit `return' instructions in the
4530    body of a function.  */
4531
4532 bool
4533 sparc_can_use_return_insn_p (void)
4534 {
4535   return sparc_prologue_data_valid_p
4536          && (actual_fsize == 0 || !sparc_leaf_function_p);
4537 }
4538
4539 /* This function generates the assembly code for function exit.  */
4540
4541 static void
4542 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4543 {
4544   /* If the last two instructions of a function are "call foo; dslot;"
4545      the return address might point to the first instruction in the next
4546      function and we have to output a dummy nop for the sake of sane
4547      backtraces in such cases.  This is pointless for sibling calls since
4548      the return address is explicitly adjusted.  */
4549
4550   rtx insn, last_real_insn;
4551
4552   insn = get_last_insn ();
4553
4554   last_real_insn = prev_real_insn (insn);
4555   if (last_real_insn
4556       && GET_CODE (last_real_insn) == INSN
4557       && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
4558     last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
4559
4560   if (last_real_insn
4561       && CALL_P (last_real_insn)
4562       && !SIBLING_CALL_P (last_real_insn))
4563     fputs("\tnop\n", file);
4564
4565   sparc_output_deferred_case_vectors ();
4566 }
4567
4568 /* Output a 'restore' instruction.  */
4569
4570 static void
4571 output_restore (rtx pat)
4572 {
4573   rtx operands[3];
4574
4575   if (! pat)
4576     {
4577       fputs ("\t restore\n", asm_out_file);
4578       return;
4579     }
4580
4581   gcc_assert (GET_CODE (pat) == SET);
4582
4583   operands[0] = SET_DEST (pat);
4584   pat = SET_SRC (pat);
4585
4586   switch (GET_CODE (pat))
4587     {
4588       case PLUS:
4589         operands[1] = XEXP (pat, 0);
4590         operands[2] = XEXP (pat, 1);
4591         output_asm_insn (" restore %r1, %2, %Y0", operands);
4592         break;
4593       case LO_SUM:
4594         operands[1] = XEXP (pat, 0);
4595         operands[2] = XEXP (pat, 1);
4596         output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
4597         break;
4598       case ASHIFT:
4599         operands[1] = XEXP (pat, 0);
4600         gcc_assert (XEXP (pat, 1) == const1_rtx);
4601         output_asm_insn (" restore %r1, %r1, %Y0", operands);
4602         break;
4603       default:
4604         operands[1] = pat;
4605         output_asm_insn (" restore %%g0, %1, %Y0", operands);
4606         break;
4607     }
4608 }
4609
4610 /* Output a return.  */
4611
4612 const char *
4613 output_return (rtx insn)
4614 {
4615   if (sparc_leaf_function_p)
4616     {
4617       /* This is a leaf function so we don't have to bother restoring the
4618          register window, which frees us from dealing with the convoluted
4619          semantics of restore/return.  We simply output the jump to the
4620          return address and the insn in the delay slot (if any).  */
4621
4622       gcc_assert (! crtl->calls_eh_return);
4623
4624       return "jmp\t%%o7+%)%#";
4625     }
4626   else
4627     {
4628       /* This is a regular function so we have to restore the register window.
4629          We may have a pending insn for the delay slot, which will be either
4630          combined with the 'restore' instruction or put in the delay slot of
4631          the 'return' instruction.  */
4632
4633       if (crtl->calls_eh_return)
4634         {
4635           /* If the function uses __builtin_eh_return, the eh_return
4636              machinery occupies the delay slot.  */
4637           gcc_assert (! final_sequence);
4638
4639           if (! flag_delayed_branch)
4640             fputs ("\tadd\t%fp, %g1, %fp\n", asm_out_file);
4641
4642           if (TARGET_V9)
4643             fputs ("\treturn\t%i7+8\n", asm_out_file);
4644           else
4645             fputs ("\trestore\n\tjmp\t%o7+8\n", asm_out_file);
4646
4647           if (flag_delayed_branch)
4648             fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
4649           else
4650             fputs ("\t nop\n", asm_out_file);
4651         }
4652       else if (final_sequence)
4653         {
4654           rtx delay, pat;
4655
4656           delay = NEXT_INSN (insn);
4657           gcc_assert (delay);
4658
4659           pat = PATTERN (delay);
4660
4661           if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
4662             {
4663               epilogue_renumber (&pat, 0);
4664               return "return\t%%i7+%)%#";
4665             }
4666           else
4667             {
4668               output_asm_insn ("jmp\t%%i7+%)", NULL);
4669               output_restore (pat);
4670               PATTERN (delay) = gen_blockage ();
4671               INSN_CODE (delay) = -1;
4672             }
4673         }
4674       else
4675         {
4676           /* The delay slot is empty.  */
4677           if (TARGET_V9)
4678             return "return\t%%i7+%)\n\t nop";
4679           else if (flag_delayed_branch)
4680             return "jmp\t%%i7+%)\n\t restore";
4681           else
4682             return "restore\n\tjmp\t%%o7+%)\n\t nop";
4683         }
4684     }
4685
4686   return "";
4687 }
4688
4689 /* Output a sibling call.  */
4690
4691 const char *
4692 output_sibcall (rtx insn, rtx call_operand)
4693 {
4694   rtx operands[1];
4695
4696   gcc_assert (flag_delayed_branch);
4697
4698   operands[0] = call_operand;
4699
4700   if (sparc_leaf_function_p)
4701     {
4702       /* This is a leaf function so we don't have to bother restoring the
4703          register window.  We simply output the jump to the function and
4704          the insn in the delay slot (if any).  */
4705
4706       gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
4707
4708       if (final_sequence)
4709         output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
4710                          operands);
4711       else
4712         /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
4713            it into branch if possible.  */
4714         output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
4715                          operands);
4716     }
4717   else
4718     {
4719       /* This is a regular function so we have to restore the register window.
4720          We may have a pending insn for the delay slot, which will be combined
4721          with the 'restore' instruction.  */
4722
4723       output_asm_insn ("call\t%a0, 0", operands);
4724
4725       if (final_sequence)
4726         {
4727           rtx delay = NEXT_INSN (insn);
4728           gcc_assert (delay);
4729
4730           output_restore (PATTERN (delay));
4731
4732           PATTERN (delay) = gen_blockage ();
4733           INSN_CODE (delay) = -1;
4734         }
4735       else
4736         output_restore (NULL_RTX);
4737     }
4738
4739   return "";
4740 }
4741 \f
4742 /* Functions for handling argument passing.
4743
4744    For 32-bit, the first 6 args are normally in registers and the rest are
4745    pushed.  Any arg that starts within the first 6 words is at least
4746    partially passed in a register unless its data type forbids.
4747
4748    For 64-bit, the argument registers are laid out as an array of 16 elements
4749    and arguments are added sequentially.  The first 6 int args and up to the
4750    first 16 fp args (depending on size) are passed in regs.
4751
4752    Slot    Stack   Integral   Float   Float in structure   Double   Long Double
4753    ----    -----   --------   -----   ------------------   ------   -----------
4754     15   [SP+248]              %f31       %f30,%f31         %d30
4755     14   [SP+240]              %f29       %f28,%f29         %d28       %q28
4756     13   [SP+232]              %f27       %f26,%f27         %d26
4757     12   [SP+224]              %f25       %f24,%f25         %d24       %q24
4758     11   [SP+216]              %f23       %f22,%f23         %d22
4759     10   [SP+208]              %f21       %f20,%f21         %d20       %q20
4760      9   [SP+200]              %f19       %f18,%f19         %d18
4761      8   [SP+192]              %f17       %f16,%f17         %d16       %q16
4762      7   [SP+184]              %f15       %f14,%f15         %d14
4763      6   [SP+176]              %f13       %f12,%f13         %d12       %q12
4764      5   [SP+168]     %o5      %f11       %f10,%f11         %d10
4765      4   [SP+160]     %o4       %f9        %f8,%f9           %d8        %q8
4766      3   [SP+152]     %o3       %f7        %f6,%f7           %d6
4767      2   [SP+144]     %o2       %f5        %f4,%f5           %d4        %q4
4768      1   [SP+136]     %o1       %f3        %f2,%f3           %d2
4769      0   [SP+128]     %o0       %f1        %f0,%f1           %d0        %q0
4770
4771    Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
4772
4773    Integral arguments are always passed as 64-bit quantities appropriately
4774    extended.
4775
4776    Passing of floating point values is handled as follows.
4777    If a prototype is in scope:
4778      If the value is in a named argument (i.e. not a stdarg function or a
4779      value not part of the `...') then the value is passed in the appropriate
4780      fp reg.
4781      If the value is part of the `...' and is passed in one of the first 6
4782      slots then the value is passed in the appropriate int reg.
4783      If the value is part of the `...' and is not passed in one of the first 6
4784      slots then the value is passed in memory.
4785    If a prototype is not in scope:
4786      If the value is one of the first 6 arguments the value is passed in the
4787      appropriate integer reg and the appropriate fp reg.
4788      If the value is not one of the first 6 arguments the value is passed in
4789      the appropriate fp reg and in memory.
4790
4791
4792    Summary of the calling conventions implemented by GCC on the SPARC:
4793
4794    32-bit ABI:
4795                                 size      argument     return value
4796
4797       small integer              <4       int. reg.      int. reg.
4798       word                        4       int. reg.      int. reg.
4799       double word                 8       int. reg.      int. reg.
4800
4801       _Complex small integer     <8       int. reg.      int. reg.
4802       _Complex word               8       int. reg.      int. reg.
4803       _Complex double word       16        memory        int. reg.
4804
4805       vector integer            <=8       int. reg.       FP reg.
4806       vector integer             >8        memory         memory
4807
4808       float                       4       int. reg.       FP reg.
4809       double                      8       int. reg.       FP reg.
4810       long double                16        memory         memory
4811
4812       _Complex float              8        memory         FP reg.
4813       _Complex double            16        memory         FP reg.
4814       _Complex long double       32        memory         FP reg.
4815
4816       vector float              any        memory         memory
4817
4818       aggregate                 any        memory         memory
4819
4820
4821
4822     64-bit ABI:
4823                                 size      argument     return value
4824
4825       small integer              <8       int. reg.      int. reg.
4826       word                        8       int. reg.      int. reg.
4827       double word                16       int. reg.      int. reg.
4828
4829       _Complex small integer    <16       int. reg.      int. reg.
4830       _Complex word              16       int. reg.      int. reg.
4831       _Complex double word       32        memory        int. reg.
4832
4833       vector integer           <=16        FP reg.        FP reg.
4834       vector integer       16<s<=32        memory         FP reg.
4835       vector integer            >32        memory         memory
4836
4837       float                       4        FP reg.        FP reg.
4838       double                      8        FP reg.        FP reg.
4839       long double                16        FP reg.        FP reg.
4840
4841       _Complex float              8        FP reg.        FP reg.
4842       _Complex double            16        FP reg.        FP reg.
4843       _Complex long double       32        memory         FP reg.
4844
4845       vector float             <=16        FP reg.        FP reg.
4846       vector float         16<s<=32        memory         FP reg.
4847       vector float              >32        memory         memory
4848
4849       aggregate                <=16         reg.           reg.
4850       aggregate            16<s<=32        memory          reg.
4851       aggregate                 >32        memory         memory
4852
4853
4854
4855 Note #1: complex floating-point types follow the extended SPARC ABIs as
4856 implemented by the Sun compiler.
4857
4858 Note #2: integral vector types follow the scalar floating-point types
4859 conventions to match what is implemented by the Sun VIS SDK.
4860
4861 Note #3: floating-point vector types follow the aggregate types
4862 conventions.  */
4863
4864
4865 /* Maximum number of int regs for args.  */
4866 #define SPARC_INT_ARG_MAX 6
4867 /* Maximum number of fp regs for args.  */
4868 #define SPARC_FP_ARG_MAX 16
4869
4870 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
4871
4872 /* Handle the INIT_CUMULATIVE_ARGS macro.
4873    Initialize a variable CUM of type CUMULATIVE_ARGS
4874    for a call to a function whose data type is FNTYPE.
4875    For a library call, FNTYPE is 0.  */
4876
4877 void
4878 init_cumulative_args (struct sparc_args *cum, tree fntype,
4879                       rtx libname ATTRIBUTE_UNUSED,
4880                       tree fndecl ATTRIBUTE_UNUSED)
4881 {
4882   cum->words = 0;
4883   cum->prototype_p = fntype && TYPE_ARG_TYPES (fntype);
4884   cum->libcall_p = fntype == 0;
4885 }
4886
4887 /* Handle the TARGET_PROMOTE_PROTOTYPES target hook.
4888    When a prototype says `char' or `short', really pass an `int'.  */
4889
4890 static bool
4891 sparc_promote_prototypes (const_tree fntype ATTRIBUTE_UNUSED)
4892 {
4893   return TARGET_ARCH32 ? true : false;
4894 }
4895
4896 /* Handle promotion of pointer and integer arguments.  */
4897
4898 static enum machine_mode
4899 sparc_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
4900                              enum machine_mode mode,
4901                              int *punsignedp ATTRIBUTE_UNUSED,
4902                              const_tree fntype ATTRIBUTE_UNUSED,
4903                              int for_return ATTRIBUTE_UNUSED)
4904 {
4905   if (POINTER_TYPE_P (type))
4906     {
4907       *punsignedp = POINTERS_EXTEND_UNSIGNED;
4908       return Pmode;
4909     }
4910
4911   /* For TARGET_ARCH64 we need this, as we don't have instructions
4912      for arithmetic operations which do zero/sign extension at the same time,
4913      so without this we end up with a srl/sra after every assignment to an
4914      user variable,  which means very very bad code.  */
4915   if (TARGET_ARCH64
4916       && GET_MODE_CLASS (mode) == MODE_INT
4917       && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4918     return word_mode;
4919
4920   return mode;
4921 }
4922
4923 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook.  */
4924
4925 static bool
4926 sparc_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
4927 {
4928   return TARGET_ARCH64 ? true : false;
4929 }
4930
4931 /* Scan the record type TYPE and return the following predicates:
4932     - INTREGS_P: the record contains at least one field or sub-field
4933       that is eligible for promotion in integer registers.
4934     - FP_REGS_P: the record contains at least one field or sub-field
4935       that is eligible for promotion in floating-point registers.
4936     - PACKED_P: the record contains at least one field that is packed.
4937
4938    Sub-fields are not taken into account for the PACKED_P predicate.  */
4939
4940 static void
4941 scan_record_type (tree type, int *intregs_p, int *fpregs_p, int *packed_p)
4942 {
4943   tree field;
4944
4945   for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4946     {
4947       if (TREE_CODE (field) == FIELD_DECL)
4948         {
4949           if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
4950             scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
4951           else if ((FLOAT_TYPE_P (TREE_TYPE (field))
4952                    || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
4953                   && TARGET_FPU)
4954             *fpregs_p = 1;
4955           else
4956             *intregs_p = 1;
4957
4958           if (packed_p && DECL_PACKED (field))
4959             *packed_p = 1;
4960         }
4961     }
4962 }
4963
4964 /* Compute the slot number to pass an argument in.
4965    Return the slot number or -1 if passing on the stack.
4966
4967    CUM is a variable of type CUMULATIVE_ARGS which gives info about
4968     the preceding args and about the function being called.
4969    MODE is the argument's machine mode.
4970    TYPE is the data type of the argument (as a tree).
4971     This is null for libcalls where that information may
4972     not be available.
4973    NAMED is nonzero if this argument is a named parameter
4974     (otherwise it is an extra parameter matching an ellipsis).
4975    INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
4976    *PREGNO records the register number to use if scalar type.
4977    *PPADDING records the amount of padding needed in words.  */
4978
4979 static int
4980 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
4981                      tree type, int named, int incoming_p,
4982                      int *pregno, int *ppadding)
4983 {
4984   int regbase = (incoming_p
4985                  ? SPARC_INCOMING_INT_ARG_FIRST
4986                  : SPARC_OUTGOING_INT_ARG_FIRST);
4987   int slotno = cum->words;
4988   enum mode_class mclass;
4989   int regno;
4990
4991   *ppadding = 0;
4992
4993   if (type && TREE_ADDRESSABLE (type))
4994     return -1;
4995
4996   if (TARGET_ARCH32
4997       && mode == BLKmode
4998       && type
4999       && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
5000     return -1;
5001
5002   /* For SPARC64, objects requiring 16-byte alignment get it.  */
5003   if (TARGET_ARCH64
5004       && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
5005       && (slotno & 1) != 0)
5006     slotno++, *ppadding = 1;
5007
5008   mclass = GET_MODE_CLASS (mode);
5009   if (type && TREE_CODE (type) == VECTOR_TYPE)
5010     {
5011       /* Vector types deserve special treatment because they are
5012          polymorphic wrt their mode, depending upon whether VIS
5013          instructions are enabled.  */
5014       if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5015         {
5016           /* The SPARC port defines no floating-point vector modes.  */
5017           gcc_assert (mode == BLKmode);
5018         }
5019       else
5020         {
5021           /* Integral vector types should either have a vector
5022              mode or an integral mode, because we are guaranteed
5023              by pass_by_reference that their size is not greater
5024              than 16 bytes and TImode is 16-byte wide.  */
5025           gcc_assert (mode != BLKmode);
5026
5027           /* Vector integers are handled like floats according to
5028              the Sun VIS SDK.  */
5029           mclass = MODE_FLOAT;
5030         }
5031     }
5032
5033   switch (mclass)
5034     {
5035     case MODE_FLOAT:
5036     case MODE_COMPLEX_FLOAT:
5037     case MODE_VECTOR_INT:
5038       if (TARGET_ARCH64 && TARGET_FPU && named)
5039         {
5040           if (slotno >= SPARC_FP_ARG_MAX)
5041             return -1;
5042           regno = SPARC_FP_ARG_FIRST + slotno * 2;
5043           /* Arguments filling only one single FP register are
5044              right-justified in the outer double FP register.  */
5045           if (GET_MODE_SIZE (mode) <= 4)
5046             regno++;
5047           break;
5048         }
5049       /* fallthrough */
5050
5051     case MODE_INT:
5052     case MODE_COMPLEX_INT:
5053       if (slotno >= SPARC_INT_ARG_MAX)
5054         return -1;
5055       regno = regbase + slotno;
5056       break;
5057
5058     case MODE_RANDOM:
5059       if (mode == VOIDmode)
5060         /* MODE is VOIDmode when generating the actual call.  */
5061         return -1;
5062
5063       gcc_assert (mode == BLKmode);
5064
5065       if (TARGET_ARCH32
5066           || !type
5067           || (TREE_CODE (type) != VECTOR_TYPE
5068               && TREE_CODE (type) != RECORD_TYPE))
5069         {
5070           if (slotno >= SPARC_INT_ARG_MAX)
5071             return -1;
5072           regno = regbase + slotno;
5073         }
5074       else  /* TARGET_ARCH64 && type */
5075         {
5076           int intregs_p = 0, fpregs_p = 0, packed_p = 0;
5077
5078           /* First see what kinds of registers we would need.  */
5079           if (TREE_CODE (type) == VECTOR_TYPE)
5080             fpregs_p = 1;
5081           else
5082             scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
5083
5084           /* The ABI obviously doesn't specify how packed structures
5085              are passed.  These are defined to be passed in int regs
5086              if possible, otherwise memory.  */
5087           if (packed_p || !named)
5088             fpregs_p = 0, intregs_p = 1;
5089
5090           /* If all arg slots are filled, then must pass on stack.  */
5091           if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
5092             return -1;
5093
5094           /* If there are only int args and all int arg slots are filled,
5095              then must pass on stack.  */
5096           if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
5097             return -1;
5098
5099           /* Note that even if all int arg slots are filled, fp members may
5100              still be passed in regs if such regs are available.
5101              *PREGNO isn't set because there may be more than one, it's up
5102              to the caller to compute them.  */
5103           return slotno;
5104         }
5105       break;
5106
5107     default :
5108       gcc_unreachable ();
5109     }
5110
5111   *pregno = regno;
5112   return slotno;
5113 }
5114
5115 /* Handle recursive register counting for structure field layout.  */
5116
5117 struct function_arg_record_value_parms
5118 {
5119   rtx ret;              /* return expression being built.  */
5120   int slotno;           /* slot number of the argument.  */
5121   int named;            /* whether the argument is named.  */
5122   int regbase;          /* regno of the base register.  */
5123   int stack;            /* 1 if part of the argument is on the stack.  */
5124   int intoffset;        /* offset of the first pending integer field.  */
5125   unsigned int nregs;   /* number of words passed in registers.  */
5126 };
5127
5128 static void function_arg_record_value_3
5129  (HOST_WIDE_INT, struct function_arg_record_value_parms *);
5130 static void function_arg_record_value_2
5131  (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
5132 static void function_arg_record_value_1
5133  (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
5134 static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
5135 static rtx function_arg_union_value (int, enum machine_mode, int, int);
5136
5137 /* A subroutine of function_arg_record_value.  Traverse the structure
5138    recursively and determine how many registers will be required.  */
5139
5140 static void
5141 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
5142                              struct function_arg_record_value_parms *parms,
5143                              bool packed_p)
5144 {
5145   tree field;
5146
5147   /* We need to compute how many registers are needed so we can
5148      allocate the PARALLEL but before we can do that we need to know
5149      whether there are any packed fields.  The ABI obviously doesn't
5150      specify how structures are passed in this case, so they are
5151      defined to be passed in int regs if possible, otherwise memory,
5152      regardless of whether there are fp values present.  */
5153
5154   if (! packed_p)
5155     for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5156       {
5157         if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
5158           {
5159             packed_p = true;
5160             break;
5161           }
5162       }
5163
5164   /* Compute how many registers we need.  */
5165   for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5166     {
5167       if (TREE_CODE (field) == FIELD_DECL)
5168         {
5169           HOST_WIDE_INT bitpos = startbitpos;
5170
5171           if (DECL_SIZE (field) != 0)
5172             {
5173               if (integer_zerop (DECL_SIZE (field)))
5174                 continue;
5175
5176               if (host_integerp (bit_position (field), 1))
5177                 bitpos += int_bit_position (field);
5178             }
5179
5180           /* ??? FIXME: else assume zero offset.  */
5181
5182           if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
5183             function_arg_record_value_1 (TREE_TYPE (field),
5184                                          bitpos,
5185                                          parms,
5186                                          packed_p);
5187           else if ((FLOAT_TYPE_P (TREE_TYPE (field))
5188                     || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
5189                    && TARGET_FPU
5190                    && parms->named
5191                    && ! packed_p)
5192             {
5193               if (parms->intoffset != -1)
5194                 {
5195                   unsigned int startbit, endbit;
5196                   int intslots, this_slotno;
5197
5198                   startbit = parms->intoffset & -BITS_PER_WORD;
5199                   endbit   = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
5200
5201                   intslots = (endbit - startbit) / BITS_PER_WORD;
5202                   this_slotno = parms->slotno + parms->intoffset
5203                     / BITS_PER_WORD;
5204
5205                   if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
5206                     {
5207                       intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
5208                       /* We need to pass this field on the stack.  */
5209                       parms->stack = 1;
5210                     }
5211
5212                   parms->nregs += intslots;
5213                   parms->intoffset = -1;
5214                 }
5215
5216               /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
5217                  If it wasn't true we wouldn't be here.  */
5218               if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
5219                   && DECL_MODE (field) == BLKmode)
5220                 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
5221               else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
5222                 parms->nregs += 2;
5223               else
5224                 parms->nregs += 1;
5225             }
5226           else
5227             {
5228               if (parms->intoffset == -1)
5229                 parms->intoffset = bitpos;
5230             }
5231         }
5232     }
5233 }
5234
5235 /* A subroutine of function_arg_record_value.  Assign the bits of the
5236    structure between parms->intoffset and bitpos to integer registers.  */
5237
5238 static void
5239 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
5240                              struct function_arg_record_value_parms *parms)
5241 {
5242   enum machine_mode mode;
5243   unsigned int regno;
5244   unsigned int startbit, endbit;
5245   int this_slotno, intslots, intoffset;
5246   rtx reg;
5247
5248   if (parms->intoffset == -1)
5249     return;
5250
5251   intoffset = parms->intoffset;
5252   parms->intoffset = -1;
5253
5254   startbit = intoffset & -BITS_PER_WORD;
5255   endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
5256   intslots = (endbit - startbit) / BITS_PER_WORD;
5257   this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
5258
5259   intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
5260   if (intslots <= 0)
5261     return;
5262
5263   /* If this is the trailing part of a word, only load that much into
5264      the register.  Otherwise load the whole register.  Note that in
5265      the latter case we may pick up unwanted bits.  It's not a problem
5266      at the moment but may wish to revisit.  */
5267
5268   if (intoffset % BITS_PER_WORD != 0)
5269     mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
5270                                    MODE_INT);
5271   else
5272     mode = word_mode;
5273
5274   intoffset /= BITS_PER_UNIT;
5275   do
5276     {
5277       regno = parms->regbase + this_slotno;
5278       reg = gen_rtx_REG (mode, regno);
5279       XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
5280         = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
5281
5282       this_slotno += 1;
5283       intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
5284       mode = word_mode;
5285       parms->nregs += 1;
5286       intslots -= 1;
5287     }
5288   while (intslots > 0);
5289 }
5290
5291 /* A subroutine of function_arg_record_value.  Traverse the structure
5292    recursively and assign bits to floating point registers.  Track which
5293    bits in between need integer registers; invoke function_arg_record_value_3
5294    to make that happen.  */
5295
5296 static void
5297 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
5298                              struct function_arg_record_value_parms *parms,
5299                              bool packed_p)
5300 {
5301   tree field;
5302
5303   if (! packed_p)
5304     for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5305       {
5306         if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
5307           {
5308             packed_p = true;
5309             break;
5310           }
5311       }
5312
5313   for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5314     {
5315       if (TREE_CODE (field) == FIELD_DECL)
5316         {
5317           HOST_WIDE_INT bitpos = startbitpos;
5318
5319           if (DECL_SIZE (field) != 0)
5320             {
5321               if (integer_zerop (DECL_SIZE (field)))
5322                 continue;
5323
5324               if (host_integerp (bit_position (field), 1))
5325                 bitpos += int_bit_position (field);
5326             }
5327
5328           /* ??? FIXME: else assume zero offset.  */
5329
5330           if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
5331             function_arg_record_value_2 (TREE_TYPE (field),
5332                                          bitpos,
5333                                          parms,
5334                                          packed_p);
5335           else if ((FLOAT_TYPE_P (TREE_TYPE (field))
5336                     || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
5337                    && TARGET_FPU
5338                    && parms->named
5339                    && ! packed_p)
5340             {
5341               int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
5342               int regno, nregs, pos;
5343               enum machine_mode mode = DECL_MODE (field);
5344               rtx reg;
5345
5346               function_arg_record_value_3 (bitpos, parms);
5347
5348               if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
5349                   && mode == BLKmode)
5350                 {
5351                   mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
5352                   nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
5353                 }
5354               else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
5355                 {
5356                   mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
5357                   nregs = 2;
5358                 }
5359               else
5360                 nregs = 1;
5361
5362               regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
5363               if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
5364                 regno++;
5365               reg = gen_rtx_REG (mode, regno);
5366               pos = bitpos / BITS_PER_UNIT;
5367               XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
5368                 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
5369               parms->nregs += 1;
5370               while (--nregs > 0)
5371                 {
5372                   regno += GET_MODE_SIZE (mode) / 4;
5373                   reg = gen_rtx_REG (mode, regno);
5374                   pos += GET_MODE_SIZE (mode);
5375                   XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
5376                     = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
5377                   parms->nregs += 1;
5378                 }
5379             }
5380           else
5381             {
5382               if (parms->intoffset == -1)
5383                 parms->intoffset = bitpos;
5384             }
5385         }
5386     }
5387 }
5388
5389 /* Used by function_arg and sparc_function_value_1 to implement the complex
5390    conventions of the 64-bit ABI for passing and returning structures.
5391    Return an expression valid as a return value for the FUNCTION_ARG
5392    and TARGET_FUNCTION_VALUE.
5393
5394    TYPE is the data type of the argument (as a tree).
5395     This is null for libcalls where that information may
5396     not be available.
5397    MODE is the argument's machine mode.
5398    SLOTNO is the index number of the argument's slot in the parameter array.
5399    NAMED is nonzero if this argument is a named parameter
5400     (otherwise it is an extra parameter matching an ellipsis).
5401    REGBASE is the regno of the base register for the parameter array.  */
5402
5403 static rtx
5404 function_arg_record_value (const_tree type, enum machine_mode mode,
5405                            int slotno, int named, int regbase)
5406 {
5407   HOST_WIDE_INT typesize = int_size_in_bytes (type);
5408   struct function_arg_record_value_parms parms;
5409   unsigned int nregs;
5410
5411   parms.ret = NULL_RTX;
5412   parms.slotno = slotno;
5413   parms.named = named;
5414   parms.regbase = regbase;
5415   parms.stack = 0;
5416
5417   /* Compute how many registers we need.  */
5418   parms.nregs = 0;
5419   parms.intoffset = 0;
5420   function_arg_record_value_1 (type, 0, &parms, false);
5421
5422   /* Take into account pending integer fields.  */
5423   if (parms.intoffset != -1)
5424     {
5425       unsigned int startbit, endbit;
5426       int intslots, this_slotno;
5427
5428       startbit = parms.intoffset & -BITS_PER_WORD;
5429       endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
5430       intslots = (endbit - startbit) / BITS_PER_WORD;
5431       this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
5432
5433       if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
5434         {
5435           intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
5436           /* We need to pass this field on the stack.  */
5437           parms.stack = 1;
5438         }
5439
5440       parms.nregs += intslots;
5441     }
5442   nregs = parms.nregs;
5443
5444   /* Allocate the vector and handle some annoying special cases.  */
5445   if (nregs == 0)
5446     {
5447       /* ??? Empty structure has no value?  Duh?  */
5448       if (typesize <= 0)
5449         {
5450           /* Though there's nothing really to store, return a word register
5451              anyway so the rest of gcc doesn't go nuts.  Returning a PARALLEL
5452              leads to breakage due to the fact that there are zero bytes to
5453              load.  */
5454           return gen_rtx_REG (mode, regbase);
5455         }
5456       else
5457         {
5458           /* ??? C++ has structures with no fields, and yet a size.  Give up
5459              for now and pass everything back in integer registers.  */
5460           nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5461         }
5462       if (nregs + slotno > SPARC_INT_ARG_MAX)
5463         nregs = SPARC_INT_ARG_MAX - slotno;
5464     }
5465   gcc_assert (nregs != 0);
5466
5467   parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
5468
5469   /* If at least one field must be passed on the stack, generate
5470      (parallel [(expr_list (nil) ...) ...]) so that all fields will
5471      also be passed on the stack.  We can't do much better because the
5472      semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
5473      of structures for which the fields passed exclusively in registers
5474      are not at the beginning of the structure.  */
5475   if (parms.stack)
5476     XVECEXP (parms.ret, 0, 0)
5477       = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
5478
5479   /* Fill in the entries.  */
5480   parms.nregs = 0;
5481   parms.intoffset = 0;
5482   function_arg_record_value_2 (type, 0, &parms, false);
5483   function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
5484
5485   gcc_assert (parms.nregs == nregs);
5486
5487   return parms.ret;
5488 }
5489
5490 /* Used by function_arg and sparc_function_value_1 to implement the conventions
5491    of the 64-bit ABI for passing and returning unions.
5492    Return an expression valid as a return value for the FUNCTION_ARG
5493    and TARGET_FUNCTION_VALUE.
5494
5495    SIZE is the size in bytes of the union.
5496    MODE is the argument's machine mode.
5497    REGNO is the hard register the union will be passed in.  */
5498
5499 static rtx
5500 function_arg_union_value (int size, enum machine_mode mode, int slotno,
5501                           int regno)
5502 {
5503   int nwords = ROUND_ADVANCE (size), i;
5504   rtx regs;
5505
5506   /* See comment in previous function for empty structures.  */
5507   if (nwords == 0)
5508     return gen_rtx_REG (mode, regno);
5509
5510   if (slotno == SPARC_INT_ARG_MAX - 1)
5511     nwords = 1;
5512
5513   regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
5514
5515   for (i = 0; i < nwords; i++)
5516     {
5517       /* Unions are passed left-justified.  */
5518       XVECEXP (regs, 0, i)
5519         = gen_rtx_EXPR_LIST (VOIDmode,
5520                              gen_rtx_REG (word_mode, regno),
5521                              GEN_INT (UNITS_PER_WORD * i));
5522       regno++;
5523     }
5524
5525   return regs;
5526 }
5527
5528 /* Used by function_arg and sparc_function_value_1 to implement the conventions
5529    for passing and returning large (BLKmode) vectors.
5530    Return an expression valid as a return value for the FUNCTION_ARG
5531    and TARGET_FUNCTION_VALUE.
5532
5533    SIZE is the size in bytes of the vector (at least 8 bytes).
5534    REGNO is the FP hard register the vector will be passed in.  */
5535
5536 static rtx
5537 function_arg_vector_value (int size, int regno)
5538 {
5539   int i, nregs = size / 8;
5540   rtx regs;
5541
5542   regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
5543
5544   for (i = 0; i < nregs; i++)
5545     {
5546       XVECEXP (regs, 0, i)
5547         = gen_rtx_EXPR_LIST (VOIDmode,
5548                              gen_rtx_REG (DImode, regno + 2*i),
5549                              GEN_INT (i*8));
5550     }
5551
5552   return regs;
5553 }
5554
5555 /* Handle the FUNCTION_ARG macro.
5556    Determine where to put an argument to a function.
5557    Value is zero to push the argument on the stack,
5558    or a hard register in which to store the argument.
5559
5560    CUM is a variable of type CUMULATIVE_ARGS which gives info about
5561     the preceding args and about the function being called.
5562    MODE is the argument's machine mode.
5563    TYPE is the data type of the argument (as a tree).
5564     This is null for libcalls where that information may
5565     not be available.
5566    NAMED is nonzero if this argument is a named parameter
5567     (otherwise it is an extra parameter matching an ellipsis).
5568    INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.  */
5569
5570 rtx
5571 function_arg (const struct sparc_args *cum, enum machine_mode mode,
5572               tree type, int named, int incoming_p)
5573 {
5574   int regbase = (incoming_p
5575                  ? SPARC_INCOMING_INT_ARG_FIRST
5576                  : SPARC_OUTGOING_INT_ARG_FIRST);
5577   int slotno, regno, padding;
5578   enum mode_class mclass = GET_MODE_CLASS (mode);
5579
5580   slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
5581                                 &regno, &padding);
5582   if (slotno == -1)
5583     return 0;
5584
5585   /* Vector types deserve special treatment because they are polymorphic wrt
5586      their mode, depending upon whether VIS instructions are enabled.  */
5587   if (type && TREE_CODE (type) == VECTOR_TYPE)
5588     {
5589       HOST_WIDE_INT size = int_size_in_bytes (type);
5590       gcc_assert ((TARGET_ARCH32 && size <= 8)
5591                   || (TARGET_ARCH64 && size <= 16));
5592
5593       if (mode == BLKmode)
5594         return function_arg_vector_value (size,
5595                                           SPARC_FP_ARG_FIRST + 2*slotno);
5596       else
5597         mclass = MODE_FLOAT;
5598     }
5599
5600   if (TARGET_ARCH32)
5601     return gen_rtx_REG (mode, regno);
5602
5603   /* Structures up to 16 bytes in size are passed in arg slots on the stack
5604      and are promoted to registers if possible.  */
5605   if (type && TREE_CODE (type) == RECORD_TYPE)
5606     {
5607       HOST_WIDE_INT size = int_size_in_bytes (type);
5608       gcc_assert (size <= 16);
5609
5610       return function_arg_record_value (type, mode, slotno, named, regbase);
5611     }
5612
5613   /* Unions up to 16 bytes in size are passed in integer registers.  */
5614   else if (type && TREE_CODE (type) == UNION_TYPE)
5615     {
5616       HOST_WIDE_INT size = int_size_in_bytes (type);
5617       gcc_assert (size <= 16);
5618
5619       return function_arg_union_value (size, mode, slotno, regno);
5620     }
5621
5622   /* v9 fp args in reg slots beyond the int reg slots get passed in regs
5623      but also have the slot allocated for them.
5624      If no prototype is in scope fp values in register slots get passed
5625      in two places, either fp regs and int regs or fp regs and memory.  */
5626   else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
5627            && SPARC_FP_REG_P (regno))
5628     {
5629       rtx reg = gen_rtx_REG (mode, regno);
5630       if (cum->prototype_p || cum->libcall_p)
5631         {
5632           /* "* 2" because fp reg numbers are recorded in 4 byte
5633              quantities.  */
5634 #if 0
5635           /* ??? This will cause the value to be passed in the fp reg and
5636              in the stack.  When a prototype exists we want to pass the
5637              value in the reg but reserve space on the stack.  That's an
5638              optimization, and is deferred [for a bit].  */
5639           if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
5640             return gen_rtx_PARALLEL (mode,
5641                             gen_rtvec (2,
5642                                        gen_rtx_EXPR_LIST (VOIDmode,
5643                                                 NULL_RTX, const0_rtx),
5644                                        gen_rtx_EXPR_LIST (VOIDmode,
5645                                                 reg, const0_rtx)));
5646           else
5647 #else
5648           /* ??? It seems that passing back a register even when past
5649              the area declared by REG_PARM_STACK_SPACE will allocate
5650              space appropriately, and will not copy the data onto the
5651              stack, exactly as we desire.
5652
5653              This is due to locate_and_pad_parm being called in
5654              expand_call whenever reg_parm_stack_space > 0, which
5655              while beneficial to our example here, would seem to be
5656              in error from what had been intended.  Ho hum...  -- r~ */
5657 #endif
5658             return reg;
5659         }
5660       else
5661         {
5662           rtx v0, v1;
5663
5664           if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
5665             {
5666               int intreg;
5667
5668               /* On incoming, we don't need to know that the value
5669                  is passed in %f0 and %i0, and it confuses other parts
5670                  causing needless spillage even on the simplest cases.  */
5671               if (incoming_p)
5672                 return reg;
5673
5674               intreg = (SPARC_OUTGOING_INT_ARG_FIRST
5675                         + (regno - SPARC_FP_ARG_FIRST) / 2);
5676
5677               v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
5678               v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
5679                                       const0_rtx);
5680               return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
5681             }
5682           else
5683             {
5684               v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
5685               v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
5686               return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
5687             }
5688         }
5689     }
5690
5691   /* All other aggregate types are passed in an integer register in a mode
5692      corresponding to the size of the type.  */
5693   else if (type && AGGREGATE_TYPE_P (type))
5694     {
5695       HOST_WIDE_INT size = int_size_in_bytes (type);
5696       gcc_assert (size <= 16);
5697
5698       mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5699     }
5700
5701   return gen_rtx_REG (mode, regno);
5702 }
5703
5704 /* For an arg passed partly in registers and partly in memory,
5705    this is the number of bytes of registers used.
5706    For args passed entirely in registers or entirely in memory, zero.
5707
5708    Any arg that starts in the first 6 regs but won't entirely fit in them
5709    needs partial registers on v8.  On v9, structures with integer
5710    values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
5711    values that begin in the last fp reg [where "last fp reg" varies with the
5712    mode] will be split between that reg and memory.  */
5713
5714 static int
5715 sparc_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5716                          tree type, bool named)
5717 {
5718   int slotno, regno, padding;
5719
5720   /* We pass 0 for incoming_p here, it doesn't matter.  */
5721   slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
5722
5723   if (slotno == -1)
5724     return 0;
5725
5726   if (TARGET_ARCH32)
5727     {
5728       if ((slotno + (mode == BLKmode
5729                      ? ROUND_ADVANCE (int_size_in_bytes (type))
5730                      : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
5731           > SPARC_INT_ARG_MAX)
5732         return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
5733     }
5734   else
5735     {
5736       /* We are guaranteed by pass_by_reference that the size of the
5737          argument is not greater than 16 bytes, so we only need to return
5738          one word if the argument is partially passed in registers.  */
5739
5740       if (type && AGGREGATE_TYPE_P (type))
5741         {
5742           int size = int_size_in_bytes (type);
5743
5744           if (size > UNITS_PER_WORD
5745               && slotno == SPARC_INT_ARG_MAX - 1)
5746             return UNITS_PER_WORD;
5747         }
5748       else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
5749                || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5750                    && ! (TARGET_FPU && named)))
5751         {
5752           /* The complex types are passed as packed types.  */
5753           if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
5754               && slotno == SPARC_INT_ARG_MAX - 1)
5755             return UNITS_PER_WORD;
5756         }
5757       else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5758         {
5759           if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
5760               > SPARC_FP_ARG_MAX)
5761             return UNITS_PER_WORD;
5762         }
5763     }
5764
5765   return 0;
5766 }
5767
5768 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
5769    Specify whether to pass the argument by reference.  */
5770
5771 static bool
5772 sparc_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5773                          enum machine_mode mode, const_tree type,
5774                          bool named ATTRIBUTE_UNUSED)
5775 {
5776   if (TARGET_ARCH32)
5777     /* Original SPARC 32-bit ABI says that structures and unions,
5778        and quad-precision floats are passed by reference.  For Pascal,
5779        also pass arrays by reference.  All other base types are passed
5780        in registers.
5781
5782        Extended ABI (as implemented by the Sun compiler) says that all
5783        complex floats are passed by reference.  Pass complex integers
5784        in registers up to 8 bytes.  More generally, enforce the 2-word
5785        cap for passing arguments in registers.
5786
5787        Vector ABI (as implemented by the Sun VIS SDK) says that vector
5788        integers are passed like floats of the same size, that is in
5789        registers up to 8 bytes.  Pass all vector floats by reference
5790        like structure and unions.  */
5791     return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
5792             || mode == SCmode
5793             /* Catch CDImode, TFmode, DCmode and TCmode.  */
5794             || GET_MODE_SIZE (mode) > 8
5795             || (type
5796                 && TREE_CODE (type) == VECTOR_TYPE
5797                 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
5798   else
5799     /* Original SPARC 64-bit ABI says that structures and unions
5800        smaller than 16 bytes are passed in registers, as well as
5801        all other base types.
5802
5803        Extended ABI (as implemented by the Sun compiler) says that
5804        complex floats are passed in registers up to 16 bytes.  Pass
5805        all complex integers in registers up to 16 bytes.  More generally,
5806        enforce the 2-word cap for passing arguments in registers.
5807
5808        Vector ABI (as implemented by the Sun VIS SDK) says that vector
5809        integers are passed like floats of the same size, that is in
5810        registers (up to 16 bytes).  Pass all vector floats like structure
5811        and unions.  */
5812     return ((type
5813              && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
5814              && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
5815             /* Catch CTImode and TCmode.  */
5816             || GET_MODE_SIZE (mode) > 16);
5817 }
5818
5819 /* Handle the FUNCTION_ARG_ADVANCE macro.
5820    Update the data in CUM to advance over an argument
5821    of mode MODE and data type TYPE.
5822    TYPE is null for libcalls where that information may not be available.  */
5823
5824 void
5825 function_arg_advance (struct sparc_args *cum, enum machine_mode mode,
5826                       tree type, int named)
5827 {
5828   int regno, padding;
5829
5830   /* We pass 0 for incoming_p here, it doesn't matter.  */
5831   function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
5832
5833   /* If argument requires leading padding, add it.  */
5834   cum->words += padding;
5835
5836   if (TARGET_ARCH32)
5837     {
5838       cum->words += (mode != BLKmode
5839                      ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
5840                      : ROUND_ADVANCE (int_size_in_bytes (type)));
5841     }
5842   else
5843     {
5844       if (type && AGGREGATE_TYPE_P (type))
5845         {
5846           int size = int_size_in_bytes (type);
5847
5848           if (size <= 8)
5849             ++cum->words;
5850           else if (size <= 16)
5851             cum->words += 2;
5852           else /* passed by reference */
5853             ++cum->words;
5854         }
5855       else
5856         {
5857           cum->words += (mode != BLKmode
5858                          ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
5859                          : ROUND_ADVANCE (int_size_in_bytes (type)));
5860         }
5861     }
5862 }
5863
5864 /* Handle the FUNCTION_ARG_PADDING macro.
5865    For the 64 bit ABI structs are always stored left shifted in their
5866    argument slot.  */
5867
5868 enum direction
5869 function_arg_padding (enum machine_mode mode, const_tree type)
5870 {
5871   if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
5872     return upward;
5873
5874   /* Fall back to the default.  */
5875   return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
5876 }
5877
5878 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
5879    Specify whether to return the return value in memory.  */
5880
5881 static bool
5882 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5883 {
5884   if (TARGET_ARCH32)
5885     /* Original SPARC 32-bit ABI says that structures and unions,
5886        and quad-precision floats are returned in memory.  All other
5887        base types are returned in registers.
5888
5889        Extended ABI (as implemented by the Sun compiler) says that
5890        all complex floats are returned in registers (8 FP registers
5891        at most for '_Complex long double').  Return all complex integers
5892        in registers (4 at most for '_Complex long long').
5893
5894        Vector ABI (as implemented by the Sun VIS SDK) says that vector
5895        integers are returned like floats of the same size, that is in
5896        registers up to 8 bytes and in memory otherwise.  Return all
5897        vector floats in memory like structure and unions; note that
5898        they always have BLKmode like the latter.  */
5899     return (TYPE_MODE (type) == BLKmode
5900             || TYPE_MODE (type) == TFmode
5901             || (TREE_CODE (type) == VECTOR_TYPE
5902                 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
5903   else
5904     /* Original SPARC 64-bit ABI says that structures and unions
5905        smaller than 32 bytes are returned in registers, as well as
5906        all other base types.
5907
5908        Extended ABI (as implemented by the Sun compiler) says that all
5909        complex floats are returned in registers (8 FP registers at most
5910        for '_Complex long double').  Return all complex integers in
5911        registers (4 at most for '_Complex TItype').
5912
5913        Vector ABI (as implemented by the Sun VIS SDK) says that vector
5914        integers are returned like floats of the same size, that is in
5915        registers.  Return all vector floats like structure and unions;
5916        note that they always have BLKmode like the latter.  */
5917     return ((TYPE_MODE (type) == BLKmode
5918              && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32));
5919 }
5920
5921 /* Handle the TARGET_STRUCT_VALUE target hook.
5922    Return where to find the structure return value address.  */
5923
5924 static rtx
5925 sparc_struct_value_rtx (tree fndecl, int incoming)
5926 {
5927   if (TARGET_ARCH64)
5928     return 0;
5929   else
5930     {
5931       rtx mem;
5932
5933       if (incoming)
5934         mem = gen_rtx_MEM (Pmode, plus_constant (frame_pointer_rtx,
5935                                                  STRUCT_VALUE_OFFSET));
5936       else
5937         mem = gen_rtx_MEM (Pmode, plus_constant (stack_pointer_rtx,
5938                                                  STRUCT_VALUE_OFFSET));
5939
5940       /* Only follow the SPARC ABI for fixed-size structure returns.
5941          Variable size structure returns are handled per the normal
5942          procedures in GCC. This is enabled by -mstd-struct-return */
5943       if (incoming == 2
5944           && sparc_std_struct_return
5945           && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
5946           && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
5947         {
5948           /* We must check and adjust the return address, as it is
5949              optional as to whether the return object is really
5950              provided.  */
5951           rtx ret_rtx = gen_rtx_REG (Pmode, 31);
5952           rtx scratch = gen_reg_rtx (SImode);
5953           rtx endlab = gen_label_rtx ();
5954
5955           /* Calculate the return object size */
5956           tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
5957           rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
5958           /* Construct a temporary return value */
5959           rtx temp_val = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
5960
5961           /* Implement SPARC 32-bit psABI callee returns struck checking
5962              requirements:
5963
5964               Fetch the instruction where we will return to and see if
5965              it's an unimp instruction (the most significant 10 bits
5966              will be zero).  */
5967           emit_move_insn (scratch, gen_rtx_MEM (SImode,
5968                                                 plus_constant (ret_rtx, 8)));
5969           /* Assume the size is valid and pre-adjust */
5970           emit_insn (gen_add3_insn (ret_rtx, ret_rtx, GEN_INT (4)));
5971           emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode, 0, endlab);
5972           emit_insn (gen_sub3_insn (ret_rtx, ret_rtx, GEN_INT (4)));
5973           /* Assign stack temp:
5974              Write the address of the memory pointed to by temp_val into
5975              the memory pointed to by mem */
5976           emit_move_insn (mem, XEXP (temp_val, 0));
5977           emit_label (endlab);
5978         }
5979
5980       set_mem_alias_set (mem, struct_value_alias_set);
5981       return mem;
5982     }
5983 }
5984
5985 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
5986    For v9, function return values are subject to the same rules as arguments,
5987    except that up to 32 bytes may be returned in registers.  */
5988
5989 static rtx
5990 sparc_function_value_1 (const_tree type, enum machine_mode mode,
5991                         bool outgoing)
5992 {
5993   /* Beware that the two values are swapped here wrt function_arg.  */
5994   int regbase = (outgoing
5995                  ? SPARC_INCOMING_INT_ARG_FIRST
5996                  : SPARC_OUTGOING_INT_ARG_FIRST);
5997   enum mode_class mclass = GET_MODE_CLASS (mode);
5998   int regno;
5999
6000   /* Vector types deserve special treatment because they are polymorphic wrt
6001      their mode, depending upon whether VIS instructions are enabled.  */
6002   if (type && TREE_CODE (type) == VECTOR_TYPE)
6003     {
6004       HOST_WIDE_INT size = int_size_in_bytes (type);
6005       gcc_assert ((TARGET_ARCH32 && size <= 8)
6006                   || (TARGET_ARCH64 && size <= 32));
6007
6008       if (mode == BLKmode)
6009         return function_arg_vector_value (size,
6010                                           SPARC_FP_ARG_FIRST);
6011       else
6012         mclass = MODE_FLOAT;
6013     }
6014
6015   if (TARGET_ARCH64 && type)
6016     {
6017       /* Structures up to 32 bytes in size are returned in registers.  */
6018       if (TREE_CODE (type) == RECORD_TYPE)
6019         {
6020           HOST_WIDE_INT size = int_size_in_bytes (type);
6021           gcc_assert (size <= 32);
6022
6023           return function_arg_record_value (type, mode, 0, 1, regbase);
6024         }
6025
6026       /* Unions up to 32 bytes in size are returned in integer registers.  */
6027       else if (TREE_CODE (type) == UNION_TYPE)
6028         {
6029           HOST_WIDE_INT size = int_size_in_bytes (type);
6030           gcc_assert (size <= 32);
6031
6032           return function_arg_union_value (size, mode, 0, regbase);
6033         }
6034
6035       /* Objects that require it are returned in FP registers.  */
6036       else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6037         ;
6038
6039       /* All other aggregate types are returned in an integer register in a
6040          mode corresponding to the size of the type.  */
6041       else if (AGGREGATE_TYPE_P (type))
6042         {
6043           /* All other aggregate types are passed in an integer register
6044              in a mode corresponding to the size of the type.  */
6045           HOST_WIDE_INT size = int_size_in_bytes (type);
6046           gcc_assert (size <= 32);
6047
6048           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6049
6050           /* ??? We probably should have made the same ABI change in
6051              3.4.0 as the one we made for unions.   The latter was
6052              required by the SCD though, while the former is not
6053              specified, so we favored compatibility and efficiency.
6054
6055              Now we're stuck for aggregates larger than 16 bytes,
6056              because OImode vanished in the meantime.  Let's not
6057              try to be unduly clever, and simply follow the ABI
6058              for unions in that case.  */
6059           if (mode == BLKmode)
6060             return function_arg_union_value (size, mode, 0, regbase);
6061           else
6062             mclass = MODE_INT;
6063         }
6064
6065       /* This must match sparc_promote_function_mode.
6066          ??? Maybe 32-bit pointers should actually remain in Pmode?  */
6067       else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6068         mode = word_mode;
6069     }
6070
6071   if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
6072     regno = SPARC_FP_ARG_FIRST;
6073   else
6074     regno = regbase;
6075
6076   return gen_rtx_REG (mode, regno);
6077 }
6078
6079 /* Handle TARGET_FUNCTION_VALUE.
6080
6081    On SPARC the value is found in the first "output" register, but the called
6082    function leaves it in the first "input" register.  */
6083
6084 static rtx
6085 sparc_function_value (const_tree valtype,
6086                       const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
6087                       bool outgoing)
6088 {
6089   return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
6090 }
6091
6092 /* Handle TARGET_LIBCALL_VALUE.  */
6093
6094 static rtx
6095 sparc_libcall_value (enum machine_mode mode,
6096                      const_rtx fun ATTRIBUTE_UNUSED)
6097 {
6098   return sparc_function_value_1 (NULL_TREE, mode, false);
6099 }
6100
6101 /* Handle FUNCTION_VALUE_REGNO_P.
6102    On SPARC, the first "output" reg is used for integer values, and
6103    the first floating point register is used for floating point values.  */
6104
6105 static bool
6106 sparc_function_value_regno_p (const unsigned int regno)
6107 {
6108   return (regno == 8 || regno == 32);
6109 }
6110
6111 /* Do what is necessary for `va_start'.  We look at the current function
6112    to determine if stdarg or varargs is used and return the address of
6113    the first unnamed parameter.  */
6114
6115 static rtx
6116 sparc_builtin_saveregs (void)
6117 {
6118   int first_reg = crtl->args.info.words;
6119   rtx address;
6120   int regno;
6121
6122   for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
6123     emit_move_insn (gen_rtx_MEM (word_mode,
6124                                  gen_rtx_PLUS (Pmode,
6125                                                frame_pointer_rtx,
6126                                                GEN_INT (FIRST_PARM_OFFSET (0)
6127                                                         + (UNITS_PER_WORD
6128                                                            * regno)))),
6129                     gen_rtx_REG (word_mode,
6130                                  SPARC_INCOMING_INT_ARG_FIRST + regno));
6131
6132   address = gen_rtx_PLUS (Pmode,
6133                           frame_pointer_rtx,
6134                           GEN_INT (FIRST_PARM_OFFSET (0)
6135                                    + UNITS_PER_WORD * first_reg));
6136
6137   return address;
6138 }
6139
6140 /* Implement `va_start' for stdarg.  */
6141
6142 static void
6143 sparc_va_start (tree valist, rtx nextarg)
6144 {
6145   nextarg = expand_builtin_saveregs ();
6146   std_expand_builtin_va_start (valist, nextarg);
6147 }
6148
6149 /* Implement `va_arg' for stdarg.  */
6150
6151 static tree
6152 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6153                        gimple_seq *post_p)
6154 {
6155   HOST_WIDE_INT size, rsize, align;
6156   tree addr, incr;
6157   bool indirect;
6158   tree ptrtype = build_pointer_type (type);
6159
6160   if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
6161     {
6162       indirect = true;
6163       size = rsize = UNITS_PER_WORD;
6164       align = 0;
6165     }
6166   else
6167     {
6168       indirect = false;
6169       size = int_size_in_bytes (type);
6170       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6171       align = 0;
6172
6173       if (TARGET_ARCH64)
6174         {
6175           /* For SPARC64, objects requiring 16-byte alignment get it.  */
6176           if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
6177             align = 2 * UNITS_PER_WORD;
6178
6179           /* SPARC-V9 ABI states that structures up to 16 bytes in size
6180              are left-justified in their slots.  */
6181           if (AGGREGATE_TYPE_P (type))
6182             {
6183               if (size == 0)
6184                 size = rsize = UNITS_PER_WORD;
6185               else
6186                 size = rsize;
6187             }
6188         }
6189     }
6190
6191   incr = valist;
6192   if (align)
6193     {
6194       incr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr,
6195                           size_int (align - 1));
6196       incr = fold_convert (sizetype, incr);
6197       incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
6198                           size_int (-align));
6199       incr = fold_convert (ptr_type_node, incr);
6200     }
6201
6202   gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
6203   addr = incr;
6204
6205   if (BYTES_BIG_ENDIAN && size < rsize)
6206     addr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr,
6207                         size_int (rsize - size));
6208
6209   if (indirect)
6210     {
6211       addr = fold_convert (build_pointer_type (ptrtype), addr);
6212       addr = build_va_arg_indirect_ref (addr);
6213     }
6214
6215   /* If the address isn't aligned properly for the type, we need a temporary.
6216      FIXME: This is inefficient, usually we can do this in registers.  */
6217   else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
6218     {
6219       tree tmp = create_tmp_var (type, "va_arg_tmp");
6220       tree dest_addr = build_fold_addr_expr (tmp);
6221       tree copy = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
6222                                    3, dest_addr, addr, size_int (rsize));
6223       TREE_ADDRESSABLE (tmp) = 1;
6224       gimplify_and_add (copy, pre_p);
6225       addr = dest_addr;
6226     }
6227
6228   else
6229     addr = fold_convert (ptrtype, addr);
6230
6231   incr
6232     = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr, size_int (rsize));
6233   gimplify_assign (valist, incr, post_p);
6234
6235   return build_va_arg_indirect_ref (addr);
6236 }
6237 \f
6238 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
6239    Specify whether the vector mode is supported by the hardware.  */
6240
6241 static bool
6242 sparc_vector_mode_supported_p (enum machine_mode mode)
6243 {
6244   return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
6245 }
6246 \f
6247 /* Implement the TARGET_VECTORIZE_UNITS_PER_SIMD_WORD target hook.  */
6248
6249 static unsigned int
6250 sparc_units_per_simd_word (enum machine_mode mode ATTRIBUTE_UNUSED)
6251 {
6252   return TARGET_VIS ? 8 : UNITS_PER_WORD;
6253 }
6254 \f
6255 /* Return the string to output an unconditional branch to LABEL, which is
6256    the operand number of the label.
6257
6258    DEST is the destination insn (i.e. the label), INSN is the source.  */
6259
6260 const char *
6261 output_ubranch (rtx dest, int label, rtx insn)
6262 {
6263   static char string[64];
6264   bool v9_form = false;
6265   char *p;
6266
6267   if (TARGET_V9 && INSN_ADDRESSES_SET_P ())
6268     {
6269       int delta = (INSN_ADDRESSES (INSN_UID (dest))
6270                    - INSN_ADDRESSES (INSN_UID (insn)));
6271       /* Leave some instructions for "slop".  */
6272       if (delta >= -260000 && delta < 260000)
6273         v9_form = true;
6274     }
6275
6276   if (v9_form)
6277     strcpy (string, "ba%*,pt\t%%xcc, ");
6278   else
6279     strcpy (string, "b%*\t");
6280
6281   p = strchr (string, '\0');
6282   *p++ = '%';
6283   *p++ = 'l';
6284   *p++ = '0' + label;
6285   *p++ = '%';
6286   *p++ = '(';
6287   *p = '\0';
6288
6289   return string;
6290 }
6291
6292 /* Return the string to output a conditional branch to LABEL, which is
6293    the operand number of the label.  OP is the conditional expression.
6294    XEXP (OP, 0) is assumed to be a condition code register (integer or
6295    floating point) and its mode specifies what kind of comparison we made.
6296
6297    DEST is the destination insn (i.e. the label), INSN is the source.
6298
6299    REVERSED is nonzero if we should reverse the sense of the comparison.
6300
6301    ANNUL is nonzero if we should generate an annulling branch.  */
6302
6303 const char *
6304 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
6305                 rtx insn)
6306 {
6307   static char string[64];
6308   enum rtx_code code = GET_CODE (op);
6309   rtx cc_reg = XEXP (op, 0);
6310   enum machine_mode mode = GET_MODE (cc_reg);
6311   const char *labelno, *branch;
6312   int spaces = 8, far;
6313   char *p;
6314
6315   /* v9 branches are limited to +-1MB.  If it is too far away,
6316      change
6317
6318      bne,pt %xcc, .LC30
6319
6320      to
6321
6322      be,pn %xcc, .+12
6323       nop
6324      ba .LC30
6325
6326      and
6327
6328      fbne,a,pn %fcc2, .LC29
6329
6330      to
6331
6332      fbe,pt %fcc2, .+16
6333       nop
6334      ba .LC29  */
6335
6336   far = TARGET_V9 && (get_attr_length (insn) >= 3);
6337   if (reversed ^ far)
6338     {
6339       /* Reversal of FP compares takes care -- an ordered compare
6340          becomes an unordered compare and vice versa.  */
6341       if (mode == CCFPmode || mode == CCFPEmode)
6342         code = reverse_condition_maybe_unordered (code);
6343       else
6344         code = reverse_condition (code);
6345     }
6346
6347   /* Start by writing the branch condition.  */
6348   if (mode == CCFPmode || mode == CCFPEmode)
6349     {
6350       switch (code)
6351         {
6352         case NE:
6353           branch = "fbne";
6354           break;
6355         case EQ:
6356           branch = "fbe";
6357           break;
6358         case GE:
6359           branch = "fbge";
6360           break;
6361         case GT:
6362           branch = "fbg";
6363           break;
6364         case LE:
6365           branch = "fble";
6366           break;
6367         case LT:
6368           branch = "fbl";
6369           break;
6370         case UNORDERED:
6371           branch = "fbu";
6372           break;
6373         case ORDERED:
6374           branch = "fbo";
6375           break;
6376         case UNGT:
6377           branch = "fbug";
6378           break;
6379         case UNLT:
6380           branch = "fbul";
6381           break;
6382         case UNEQ:
6383           branch = "fbue";
6384           break;
6385         case UNGE:
6386           branch = "fbuge";
6387           break;
6388         case UNLE:
6389           branch = "fbule";
6390           break;
6391         case LTGT:
6392           branch = "fblg";
6393           break;
6394
6395         default:
6396           gcc_unreachable ();
6397         }
6398
6399       /* ??? !v9: FP branches cannot be preceded by another floating point
6400          insn.  Because there is currently no concept of pre-delay slots,
6401          we can fix this only by always emitting a nop before a floating
6402          point branch.  */
6403
6404       string[0] = '\0';
6405       if (! TARGET_V9)
6406         strcpy (string, "nop\n\t");
6407       strcat (string, branch);
6408     }
6409   else
6410     {
6411       switch (code)
6412         {
6413         case NE:
6414           branch = "bne";
6415           break;
6416         case EQ:
6417           branch = "be";
6418           break;
6419         case GE:
6420           if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
6421             branch = "bpos";
6422           else
6423             branch = "bge";
6424           break;
6425         case GT:
6426           branch = "bg";
6427           break;
6428         case LE:
6429           branch = "ble";
6430           break;
6431         case LT:
6432           if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
6433             branch = "bneg";
6434           else
6435             branch = "bl";
6436           break;
6437         case GEU:
6438           branch = "bgeu";
6439           break;
6440         case GTU:
6441           branch = "bgu";
6442           break;
6443         case LEU:
6444           branch = "bleu";
6445           break;
6446         case LTU:
6447           branch = "blu";
6448           break;
6449
6450         default:
6451           gcc_unreachable ();
6452         }
6453       strcpy (string, branch);
6454     }
6455   spaces -= strlen (branch);
6456   p = strchr (string, '\0');
6457
6458   /* Now add the annulling, the label, and a possible noop.  */
6459   if (annul && ! far)
6460     {
6461       strcpy (p, ",a");
6462       p += 2;
6463       spaces -= 2;
6464     }
6465
6466   if (TARGET_V9)
6467     {
6468       rtx note;
6469       int v8 = 0;
6470
6471       if (! far && insn && INSN_ADDRESSES_SET_P ())
6472         {
6473           int delta = (INSN_ADDRESSES (INSN_UID (dest))
6474                        - INSN_ADDRESSES (INSN_UID (insn)));
6475           /* Leave some instructions for "slop".  */
6476           if (delta < -260000 || delta >= 260000)
6477             v8 = 1;
6478         }
6479
6480       if (mode == CCFPmode || mode == CCFPEmode)
6481         {
6482           static char v9_fcc_labelno[] = "%%fccX, ";
6483           /* Set the char indicating the number of the fcc reg to use.  */
6484           v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
6485           labelno = v9_fcc_labelno;
6486           if (v8)
6487             {
6488               gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
6489               labelno = "";
6490             }
6491         }
6492       else if (mode == CCXmode || mode == CCX_NOOVmode)
6493         {
6494           labelno = "%%xcc, ";
6495           gcc_assert (! v8);
6496         }
6497       else
6498         {
6499           labelno = "%%icc, ";
6500           if (v8)
6501             labelno = "";
6502         }
6503
6504       if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
6505         {
6506           strcpy (p,
6507                   ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
6508                   ? ",pt" : ",pn");
6509           p += 3;
6510           spaces -= 3;
6511         }
6512     }
6513   else
6514     labelno = "";
6515
6516   if (spaces > 0)
6517     *p++ = '\t';
6518   else
6519     *p++ = ' ';
6520   strcpy (p, labelno);
6521   p = strchr (p, '\0');
6522   if (far)
6523     {
6524       strcpy (p, ".+12\n\t nop\n\tb\t");
6525       /* Skip the next insn if requested or
6526          if we know that it will be a nop.  */
6527       if (annul || ! final_sequence)
6528         p[3] = '6';
6529       p += 14;
6530     }
6531   *p++ = '%';
6532   *p++ = 'l';
6533   *p++ = label + '0';
6534   *p++ = '%';
6535   *p++ = '#';
6536   *p = '\0';
6537
6538   return string;
6539 }
6540
6541 /* Emit a library call comparison between floating point X and Y.
6542    COMPARISON is the operator to compare with (EQ, NE, GT, etc).
6543    Return the new operator to be used in the comparison sequence.
6544
6545    TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
6546    values as arguments instead of the TFmode registers themselves,
6547    that's why we cannot call emit_float_lib_cmp.  */
6548
6549 rtx
6550 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
6551 {
6552   const char *qpfunc;
6553   rtx slot0, slot1, result, tem, tem2, libfunc;
6554   enum machine_mode mode;
6555   enum rtx_code new_comparison;
6556
6557   switch (comparison)
6558     {
6559     case EQ:
6560       qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
6561       break;
6562
6563     case NE:
6564       qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
6565       break;
6566
6567     case GT:
6568       qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
6569       break;
6570
6571     case GE:
6572       qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
6573       break;
6574
6575     case LT:
6576       qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
6577       break;
6578
6579     case LE:
6580       qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
6581       break;
6582
6583     case ORDERED:
6584     case UNORDERED:
6585     case UNGT:
6586     case UNLT:
6587     case UNEQ:
6588     case UNGE:
6589     case UNLE:
6590     case LTGT:
6591       qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
6592       break;
6593
6594     default:
6595       gcc_unreachable ();
6596     }
6597
6598   if (TARGET_ARCH64)
6599     {
6600       if (MEM_P (x))
6601         slot0 = x;
6602       else
6603         {
6604           slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
6605           emit_move_insn (slot0, x);
6606         }
6607
6608       if (MEM_P (y))
6609         slot1 = y;
6610       else
6611         {
6612           slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
6613           emit_move_insn (slot1, y);
6614         }
6615
6616       libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
6617       emit_library_call (libfunc, LCT_NORMAL,
6618                          DImode, 2,
6619                          XEXP (slot0, 0), Pmode,
6620                          XEXP (slot1, 0), Pmode);
6621       mode = DImode;
6622     }
6623   else
6624     {
6625       libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
6626       emit_library_call (libfunc, LCT_NORMAL,
6627                          SImode, 2,
6628                          x, TFmode, y, TFmode);
6629       mode = SImode;
6630     }
6631
6632
6633   /* Immediately move the result of the libcall into a pseudo
6634      register so reload doesn't clobber the value if it needs
6635      the return register for a spill reg.  */
6636   result = gen_reg_rtx (mode);
6637   emit_move_insn (result, hard_libcall_value (mode, libfunc));
6638
6639   switch (comparison)
6640     {
6641     default:
6642       return gen_rtx_NE (VOIDmode, result, const0_rtx);
6643     case ORDERED:
6644     case UNORDERED:
6645       new_comparison = (comparison == UNORDERED ? EQ : NE);
6646       return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
6647     case UNGT:
6648     case UNGE:
6649       new_comparison = (comparison == UNGT ? GT : NE);
6650       return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
6651     case UNLE:
6652       return gen_rtx_NE (VOIDmode, result, const2_rtx);
6653     case UNLT:
6654       tem = gen_reg_rtx (mode);
6655       if (TARGET_ARCH32)
6656         emit_insn (gen_andsi3 (tem, result, const1_rtx));
6657       else
6658         emit_insn (gen_anddi3 (tem, result, const1_rtx));
6659       return gen_rtx_NE (VOIDmode, tem, const0_rtx);
6660     case UNEQ:
6661     case LTGT:
6662       tem = gen_reg_rtx (mode);
6663       if (TARGET_ARCH32)
6664         emit_insn (gen_addsi3 (tem, result, const1_rtx));
6665       else
6666         emit_insn (gen_adddi3 (tem, result, const1_rtx));
6667       tem2 = gen_reg_rtx (mode);
6668       if (TARGET_ARCH32)
6669         emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
6670       else
6671         emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
6672       new_comparison = (comparison == UNEQ ? EQ : NE);
6673       return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
6674     }
6675
6676   gcc_unreachable ();
6677 }
6678
6679 /* Generate an unsigned DImode to FP conversion.  This is the same code
6680    optabs would emit if we didn't have TFmode patterns.  */
6681
6682 void
6683 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
6684 {
6685   rtx neglab, donelab, i0, i1, f0, in, out;
6686
6687   out = operands[0];
6688   in = force_reg (DImode, operands[1]);
6689   neglab = gen_label_rtx ();
6690   donelab = gen_label_rtx ();
6691   i0 = gen_reg_rtx (DImode);
6692   i1 = gen_reg_rtx (DImode);
6693   f0 = gen_reg_rtx (mode);
6694
6695   emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
6696
6697   emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
6698   emit_jump_insn (gen_jump (donelab));
6699   emit_barrier ();
6700
6701   emit_label (neglab);
6702
6703   emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
6704   emit_insn (gen_anddi3 (i1, in, const1_rtx));
6705   emit_insn (gen_iordi3 (i0, i0, i1));
6706   emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
6707   emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
6708
6709   emit_label (donelab);
6710 }
6711
6712 /* Generate an FP to unsigned DImode conversion.  This is the same code
6713    optabs would emit if we didn't have TFmode patterns.  */
6714
6715 void
6716 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
6717 {
6718   rtx neglab, donelab, i0, i1, f0, in, out, limit;
6719
6720   out = operands[0];
6721   in = force_reg (mode, operands[1]);
6722   neglab = gen_label_rtx ();
6723   donelab = gen_label_rtx ();
6724   i0 = gen_reg_rtx (DImode);
6725   i1 = gen_reg_rtx (DImode);
6726   limit = gen_reg_rtx (mode);
6727   f0 = gen_reg_rtx (mode);
6728
6729   emit_move_insn (limit,
6730                   CONST_DOUBLE_FROM_REAL_VALUE (
6731                     REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
6732   emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
6733
6734   emit_insn (gen_rtx_SET (VOIDmode,
6735                           out,
6736                           gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
6737   emit_jump_insn (gen_jump (donelab));
6738   emit_barrier ();
6739
6740   emit_label (neglab);
6741
6742   emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
6743   emit_insn (gen_rtx_SET (VOIDmode,
6744                           i0,
6745                           gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
6746   emit_insn (gen_movdi (i1, const1_rtx));
6747   emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
6748   emit_insn (gen_xordi3 (out, i0, i1));
6749
6750   emit_label (donelab);
6751 }
6752
6753 /* Return the string to output a conditional branch to LABEL, testing
6754    register REG.  LABEL is the operand number of the label; REG is the
6755    operand number of the reg.  OP is the conditional expression.  The mode
6756    of REG says what kind of comparison we made.
6757
6758    DEST is the destination insn (i.e. the label), INSN is the source.
6759
6760    REVERSED is nonzero if we should reverse the sense of the comparison.
6761
6762    ANNUL is nonzero if we should generate an annulling branch.  */
6763
6764 const char *
6765 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
6766                  int annul, rtx insn)
6767 {
6768   static char string[64];
6769   enum rtx_code code = GET_CODE (op);
6770   enum machine_mode mode = GET_MODE (XEXP (op, 0));
6771   rtx note;
6772   int far;
6773   char *p;
6774
6775   /* branch on register are limited to +-128KB.  If it is too far away,
6776      change
6777
6778      brnz,pt %g1, .LC30
6779
6780      to
6781
6782      brz,pn %g1, .+12
6783       nop
6784      ba,pt %xcc, .LC30
6785
6786      and
6787
6788      brgez,a,pn %o1, .LC29
6789
6790      to
6791
6792      brlz,pt %o1, .+16
6793       nop
6794      ba,pt %xcc, .LC29  */
6795
6796   far = get_attr_length (insn) >= 3;
6797
6798   /* If not floating-point or if EQ or NE, we can just reverse the code.  */
6799   if (reversed ^ far)
6800     code = reverse_condition (code);
6801
6802   /* Only 64 bit versions of these instructions exist.  */
6803   gcc_assert (mode == DImode);
6804
6805   /* Start by writing the branch condition.  */
6806
6807   switch (code)
6808     {
6809     case NE:
6810       strcpy (string, "brnz");
6811       break;
6812
6813     case EQ:
6814       strcpy (string, "brz");
6815       break;
6816
6817     case GE:
6818       strcpy (string, "brgez");
6819       break;
6820
6821     case LT:
6822       strcpy (string, "brlz");
6823       break;
6824
6825     case LE:
6826       strcpy (string, "brlez");
6827       break;
6828
6829     case GT:
6830       strcpy (string, "brgz");
6831       break;
6832
6833     default:
6834       gcc_unreachable ();
6835     }
6836
6837   p = strchr (string, '\0');
6838
6839   /* Now add the annulling, reg, label, and nop.  */
6840   if (annul && ! far)
6841     {
6842       strcpy (p, ",a");
6843       p += 2;
6844     }
6845
6846   if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
6847     {
6848       strcpy (p,
6849               ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
6850               ? ",pt" : ",pn");
6851       p += 3;
6852     }
6853
6854   *p = p < string + 8 ? '\t' : ' ';
6855   p++;
6856   *p++ = '%';
6857   *p++ = '0' + reg;
6858   *p++ = ',';
6859   *p++ = ' ';
6860   if (far)
6861     {
6862       int veryfar = 1, delta;
6863
6864       if (INSN_ADDRESSES_SET_P ())
6865         {
6866           delta = (INSN_ADDRESSES (INSN_UID (dest))
6867                    - INSN_ADDRESSES (INSN_UID (insn)));
6868           /* Leave some instructions for "slop".  */
6869           if (delta >= -260000 && delta < 260000)
6870             veryfar = 0;
6871         }
6872
6873       strcpy (p, ".+12\n\t nop\n\t");
6874       /* Skip the next insn if requested or
6875          if we know that it will be a nop.  */
6876       if (annul || ! final_sequence)
6877         p[3] = '6';
6878       p += 12;
6879       if (veryfar)
6880         {
6881           strcpy (p, "b\t");
6882           p += 2;
6883         }
6884       else
6885         {
6886           strcpy (p, "ba,pt\t%%xcc, ");
6887           p += 13;
6888         }
6889     }
6890   *p++ = '%';
6891   *p++ = 'l';
6892   *p++ = '0' + label;
6893   *p++ = '%';
6894   *p++ = '#';
6895   *p = '\0';
6896
6897   return string;
6898 }
6899
6900 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
6901    Such instructions cannot be used in the delay slot of return insn on v9.
6902    If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
6903  */
6904
6905 static int
6906 epilogue_renumber (register rtx *where, int test)
6907 {
6908   register const char *fmt;
6909   register int i;
6910   register enum rtx_code code;
6911
6912   if (*where == 0)
6913     return 0;
6914
6915   code = GET_CODE (*where);
6916
6917   switch (code)
6918     {
6919     case REG:
6920       if (REGNO (*where) >= 8 && REGNO (*where) < 24)      /* oX or lX */
6921         return 1;
6922       if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
6923         *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
6924     case SCRATCH:
6925     case CC0:
6926     case PC:
6927     case CONST_INT:
6928     case CONST_DOUBLE:
6929       return 0;
6930
6931       /* Do not replace the frame pointer with the stack pointer because
6932          it can cause the delayed instruction to load below the stack.
6933          This occurs when instructions like:
6934
6935          (set (reg/i:SI 24 %i0)
6936              (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
6937                        (const_int -20 [0xffffffec])) 0))
6938
6939          are in the return delayed slot.  */
6940     case PLUS:
6941       if (GET_CODE (XEXP (*where, 0)) == REG
6942           && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
6943           && (GET_CODE (XEXP (*where, 1)) != CONST_INT
6944               || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
6945         return 1;
6946       break;
6947
6948     case MEM:
6949       if (SPARC_STACK_BIAS
6950           && GET_CODE (XEXP (*where, 0)) == REG
6951           && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
6952         return 1;
6953       break;
6954
6955     default:
6956       break;
6957     }
6958
6959   fmt = GET_RTX_FORMAT (code);
6960
6961   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
6962     {
6963       if (fmt[i] == 'E')
6964         {
6965           register int j;
6966           for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
6967             if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
6968               return 1;
6969         }
6970       else if (fmt[i] == 'e'
6971                && epilogue_renumber (&(XEXP (*where, i)), test))
6972         return 1;
6973     }
6974   return 0;
6975 }
6976 \f
6977 /* Leaf functions and non-leaf functions have different needs.  */
6978
6979 static const int
6980 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
6981
6982 static const int
6983 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
6984
6985 static const int *const reg_alloc_orders[] = {
6986   reg_leaf_alloc_order,
6987   reg_nonleaf_alloc_order};
6988
6989 void
6990 order_regs_for_local_alloc (void)
6991 {
6992   static int last_order_nonleaf = 1;
6993
6994   if (df_regs_ever_live_p (15) != last_order_nonleaf)
6995     {
6996       last_order_nonleaf = !last_order_nonleaf;
6997       memcpy ((char *) reg_alloc_order,
6998               (const char *) reg_alloc_orders[last_order_nonleaf],
6999               FIRST_PSEUDO_REGISTER * sizeof (int));
7000     }
7001 }
7002 \f
7003 /* Return 1 if REG and MEM are legitimate enough to allow the various
7004    mem<-->reg splits to be run.  */
7005
7006 int
7007 sparc_splitdi_legitimate (rtx reg, rtx mem)
7008 {
7009   /* Punt if we are here by mistake.  */
7010   gcc_assert (reload_completed);
7011
7012   /* We must have an offsettable memory reference.  */
7013   if (! offsettable_memref_p (mem))
7014     return 0;
7015
7016   /* If we have legitimate args for ldd/std, we do not want
7017      the split to happen.  */
7018   if ((REGNO (reg) % 2) == 0
7019       && mem_min_alignment (mem, 8))
7020     return 0;
7021
7022   /* Success.  */
7023   return 1;
7024 }
7025
7026 /* Return 1 if x and y are some kind of REG and they refer to
7027    different hard registers.  This test is guaranteed to be
7028    run after reload.  */
7029
7030 int
7031 sparc_absnegfloat_split_legitimate (rtx x, rtx y)
7032 {
7033   if (GET_CODE (x) != REG)
7034     return 0;
7035   if (GET_CODE (y) != REG)
7036     return 0;
7037   if (REGNO (x) == REGNO (y))
7038     return 0;
7039   return 1;
7040 }
7041
7042 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
7043    This makes them candidates for using ldd and std insns.
7044
7045    Note reg1 and reg2 *must* be hard registers.  */
7046
7047 int
7048 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
7049 {
7050   /* We might have been passed a SUBREG.  */
7051   if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
7052     return 0;
7053
7054   if (REGNO (reg1) % 2 != 0)
7055     return 0;
7056
7057   /* Integer ldd is deprecated in SPARC V9 */
7058   if (TARGET_V9 && REGNO (reg1) < 32)
7059     return 0;
7060
7061   return (REGNO (reg1) == REGNO (reg2) - 1);
7062 }
7063
7064 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
7065    an ldd or std insn.
7066
7067    This can only happen when addr1 and addr2, the addresses in mem1
7068    and mem2, are consecutive memory locations (addr1 + 4 == addr2).
7069    addr1 must also be aligned on a 64-bit boundary.
7070
7071    Also iff dependent_reg_rtx is not null it should not be used to
7072    compute the address for mem1, i.e. we cannot optimize a sequence
7073    like:
7074         ld [%o0], %o0
7075         ld [%o0 + 4], %o1
7076    to
7077         ldd [%o0], %o0
7078    nor:
7079         ld [%g3 + 4], %g3
7080         ld [%g3], %g2
7081    to
7082         ldd [%g3], %g2
7083
7084    But, note that the transformation from:
7085         ld [%g2 + 4], %g3
7086         ld [%g2], %g2
7087    to
7088         ldd [%g2], %g2
7089    is perfectly fine.  Thus, the peephole2 patterns always pass us
7090    the destination register of the first load, never the second one.
7091
7092    For stores we don't have a similar problem, so dependent_reg_rtx is
7093    NULL_RTX.  */
7094
7095 int
7096 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
7097 {
7098   rtx addr1, addr2;
7099   unsigned int reg1;
7100   HOST_WIDE_INT offset1;
7101
7102   /* The mems cannot be volatile.  */
7103   if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
7104     return 0;
7105
7106   /* MEM1 should be aligned on a 64-bit boundary.  */
7107   if (MEM_ALIGN (mem1) < 64)
7108     return 0;
7109
7110   addr1 = XEXP (mem1, 0);
7111   addr2 = XEXP (mem2, 0);
7112
7113   /* Extract a register number and offset (if used) from the first addr.  */
7114   if (GET_CODE (addr1) == PLUS)
7115     {
7116       /* If not a REG, return zero.  */
7117       if (GET_CODE (XEXP (addr1, 0)) != REG)
7118         return 0;
7119       else
7120         {
7121           reg1 = REGNO (XEXP (addr1, 0));
7122           /* The offset must be constant!  */
7123           if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
7124             return 0;
7125           offset1 = INTVAL (XEXP (addr1, 1));
7126         }
7127     }
7128   else if (GET_CODE (addr1) != REG)
7129     return 0;
7130   else
7131     {
7132       reg1 = REGNO (addr1);
7133       /* This was a simple (mem (reg)) expression.  Offset is 0.  */
7134       offset1 = 0;
7135     }
7136
7137   /* Make sure the second address is a (mem (plus (reg) (const_int).  */
7138   if (GET_CODE (addr2) != PLUS)
7139     return 0;
7140
7141   if (GET_CODE (XEXP (addr2, 0)) != REG
7142       || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
7143     return 0;
7144
7145   if (reg1 != REGNO (XEXP (addr2, 0)))
7146     return 0;
7147
7148   if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
7149     return 0;
7150
7151   /* The first offset must be evenly divisible by 8 to ensure the
7152      address is 64 bit aligned.  */
7153   if (offset1 % 8 != 0)
7154     return 0;
7155
7156   /* The offset for the second addr must be 4 more than the first addr.  */
7157   if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
7158     return 0;
7159
7160   /* All the tests passed.  addr1 and addr2 are valid for ldd and std
7161      instructions.  */
7162   return 1;
7163 }
7164
7165 /* Return 1 if reg is a pseudo, or is the first register in
7166    a hard register pair.  This makes it suitable for use in
7167    ldd and std insns.  */
7168
7169 int
7170 register_ok_for_ldd (rtx reg)
7171 {
7172   /* We might have been passed a SUBREG.  */
7173   if (!REG_P (reg))
7174     return 0;
7175
7176   if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
7177     return (REGNO (reg) % 2 == 0);
7178
7179   return 1;
7180 }
7181
7182 /* Return 1 if OP is a memory whose address is known to be
7183    aligned to 8-byte boundary, or a pseudo during reload.
7184    This makes it suitable for use in ldd and std insns.  */
7185
7186 int
7187 memory_ok_for_ldd (rtx op)
7188 {
7189   if (MEM_P (op))
7190     {
7191       /* In 64-bit mode, we assume that the address is word-aligned.  */
7192       if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
7193         return 0;
7194
7195       if ((reload_in_progress || reload_completed)
7196           && !strict_memory_address_p (Pmode, XEXP (op, 0)))
7197         return 0;
7198     }
7199   else if (REG_P (op) && REGNO (op) >= FIRST_PSEUDO_REGISTER)
7200     {
7201       if (!(reload_in_progress && reg_renumber [REGNO (op)] < 0))
7202         return 0;
7203     }
7204   else
7205     return 0;
7206
7207   return 1;
7208 }
7209 \f
7210 /* Print operand X (an rtx) in assembler syntax to file FILE.
7211    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
7212    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
7213
7214 void
7215 print_operand (FILE *file, rtx x, int code)
7216 {
7217   switch (code)
7218     {
7219     case '#':
7220       /* Output an insn in a delay slot.  */
7221       if (final_sequence)
7222         sparc_indent_opcode = 1;
7223       else
7224         fputs ("\n\t nop", file);
7225       return;
7226     case '*':
7227       /* Output an annul flag if there's nothing for the delay slot and we
7228          are optimizing.  This is always used with '(' below.
7229          Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
7230          this is a dbx bug.  So, we only do this when optimizing.
7231          On UltraSPARC, a branch in a delay slot causes a pipeline flush.
7232          Always emit a nop in case the next instruction is a branch.  */
7233       if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
7234         fputs (",a", file);
7235       return;
7236     case '(':
7237       /* Output a 'nop' if there's nothing for the delay slot and we are
7238          not optimizing.  This is always used with '*' above.  */
7239       if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
7240         fputs ("\n\t nop", file);
7241       else if (final_sequence)
7242         sparc_indent_opcode = 1;
7243       return;
7244     case ')':
7245       /* Output the right displacement from the saved PC on function return.
7246          The caller may have placed an "unimp" insn immediately after the call
7247          so we have to account for it.  This insn is used in the 32-bit ABI
7248          when calling a function that returns a non zero-sized structure.  The
7249          64-bit ABI doesn't have it.  Be careful to have this test be the same
7250          as that for the call.  The exception is when sparc_std_struct_return
7251          is enabled, the psABI is followed exactly and the adjustment is made
7252          by the code in sparc_struct_value_rtx.  The call emitted is the same
7253          when sparc_std_struct_return is enabled. */
7254      if (!TARGET_ARCH64
7255          && cfun->returns_struct
7256          && !sparc_std_struct_return
7257          && DECL_SIZE (DECL_RESULT (current_function_decl))
7258          && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
7259              == INTEGER_CST
7260          && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
7261         fputs ("12", file);
7262       else
7263         fputc ('8', file);
7264       return;
7265     case '_':
7266       /* Output the Embedded Medium/Anywhere code model base register.  */
7267       fputs (EMBMEDANY_BASE_REG, file);
7268       return;
7269     case '&':
7270       /* Print some local dynamic TLS name.  */
7271       assemble_name (file, get_some_local_dynamic_name ());
7272       return;
7273
7274     case 'Y':
7275       /* Adjust the operand to take into account a RESTORE operation.  */
7276       if (GET_CODE (x) == CONST_INT)
7277         break;
7278       else if (GET_CODE (x) != REG)
7279         output_operand_lossage ("invalid %%Y operand");
7280       else if (REGNO (x) < 8)
7281         fputs (reg_names[REGNO (x)], file);
7282       else if (REGNO (x) >= 24 && REGNO (x) < 32)
7283         fputs (reg_names[REGNO (x)-16], file);
7284       else
7285         output_operand_lossage ("invalid %%Y operand");
7286       return;
7287     case 'L':
7288       /* Print out the low order register name of a register pair.  */
7289       if (WORDS_BIG_ENDIAN)
7290         fputs (reg_names[REGNO (x)+1], file);
7291       else
7292         fputs (reg_names[REGNO (x)], file);
7293       return;
7294     case 'H':
7295       /* Print out the high order register name of a register pair.  */
7296       if (WORDS_BIG_ENDIAN)
7297         fputs (reg_names[REGNO (x)], file);
7298       else
7299         fputs (reg_names[REGNO (x)+1], file);
7300       return;
7301     case 'R':
7302       /* Print out the second register name of a register pair or quad.
7303          I.e., R (%o0) => %o1.  */
7304       fputs (reg_names[REGNO (x)+1], file);
7305       return;
7306     case 'S':
7307       /* Print out the third register name of a register quad.
7308          I.e., S (%o0) => %o2.  */
7309       fputs (reg_names[REGNO (x)+2], file);
7310       return;
7311     case 'T':
7312       /* Print out the fourth register name of a register quad.
7313          I.e., T (%o0) => %o3.  */
7314       fputs (reg_names[REGNO (x)+3], file);
7315       return;
7316     case 'x':
7317       /* Print a condition code register.  */
7318       if (REGNO (x) == SPARC_ICC_REG)
7319         {
7320           /* We don't handle CC[X]_NOOVmode because they're not supposed
7321              to occur here.  */
7322           if (GET_MODE (x) == CCmode)
7323             fputs ("%icc", file);
7324           else if (GET_MODE (x) == CCXmode)
7325             fputs ("%xcc", file);
7326           else
7327             gcc_unreachable ();
7328         }
7329       else
7330         /* %fccN register */
7331         fputs (reg_names[REGNO (x)], file);
7332       return;
7333     case 'm':
7334       /* Print the operand's address only.  */
7335       output_address (XEXP (x, 0));
7336       return;
7337     case 'r':
7338       /* In this case we need a register.  Use %g0 if the
7339          operand is const0_rtx.  */
7340       if (x == const0_rtx
7341           || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
7342         {
7343           fputs ("%g0", file);
7344           return;
7345         }
7346       else
7347         break;
7348
7349     case 'A':
7350       switch (GET_CODE (x))
7351         {
7352         case IOR: fputs ("or", file); break;
7353         case AND: fputs ("and", file); break;
7354         case XOR: fputs ("xor", file); break;
7355         default: output_operand_lossage ("invalid %%A operand");
7356         }
7357       return;
7358
7359     case 'B':
7360       switch (GET_CODE (x))
7361         {
7362         case IOR: fputs ("orn", file); break;
7363         case AND: fputs ("andn", file); break;
7364         case XOR: fputs ("xnor", file); break;
7365         default: output_operand_lossage ("invalid %%B operand");
7366         }
7367       return;
7368
7369       /* These are used by the conditional move instructions.  */
7370     case 'c' :
7371     case 'C':
7372       {
7373         enum rtx_code rc = GET_CODE (x);
7374
7375         if (code == 'c')
7376           {
7377             enum machine_mode mode = GET_MODE (XEXP (x, 0));
7378             if (mode == CCFPmode || mode == CCFPEmode)
7379               rc = reverse_condition_maybe_unordered (GET_CODE (x));
7380             else
7381               rc = reverse_condition (GET_CODE (x));
7382           }
7383         switch (rc)
7384           {
7385           case NE: fputs ("ne", file); break;
7386           case EQ: fputs ("e", file); break;
7387           case GE: fputs ("ge", file); break;
7388           case GT: fputs ("g", file); break;
7389           case LE: fputs ("le", file); break;
7390           case LT: fputs ("l", file); break;
7391           case GEU: fputs ("geu", file); break;
7392           case GTU: fputs ("gu", file); break;
7393           case LEU: fputs ("leu", file); break;
7394           case LTU: fputs ("lu", file); break;
7395           case LTGT: fputs ("lg", file); break;
7396           case UNORDERED: fputs ("u", file); break;
7397           case ORDERED: fputs ("o", file); break;
7398           case UNLT: fputs ("ul", file); break;
7399           case UNLE: fputs ("ule", file); break;
7400           case UNGT: fputs ("ug", file); break;
7401           case UNGE: fputs ("uge", file); break;
7402           case UNEQ: fputs ("ue", file); break;
7403           default: output_operand_lossage (code == 'c'
7404                                            ? "invalid %%c operand"
7405                                            : "invalid %%C operand");
7406           }
7407         return;
7408       }
7409
7410       /* These are used by the movr instruction pattern.  */
7411     case 'd':
7412     case 'D':
7413       {
7414         enum rtx_code rc = (code == 'd'
7415                             ? reverse_condition (GET_CODE (x))
7416                             : GET_CODE (x));
7417         switch (rc)
7418           {
7419           case NE: fputs ("ne", file); break;
7420           case EQ: fputs ("e", file); break;
7421           case GE: fputs ("gez", file); break;
7422           case LT: fputs ("lz", file); break;
7423           case LE: fputs ("lez", file); break;
7424           case GT: fputs ("gz", file); break;
7425           default: output_operand_lossage (code == 'd'
7426                                            ? "invalid %%d operand"
7427                                            : "invalid %%D operand");
7428           }
7429         return;
7430       }
7431
7432     case 'b':
7433       {
7434         /* Print a sign-extended character.  */
7435         int i = trunc_int_for_mode (INTVAL (x), QImode);
7436         fprintf (file, "%d", i);
7437         return;
7438       }
7439
7440     case 'f':
7441       /* Operand must be a MEM; write its address.  */
7442       if (GET_CODE (x) != MEM)
7443         output_operand_lossage ("invalid %%f operand");
7444       output_address (XEXP (x, 0));
7445       return;
7446
7447     case 's':
7448       {
7449         /* Print a sign-extended 32-bit value.  */
7450         HOST_WIDE_INT i;
7451         if (GET_CODE(x) == CONST_INT)
7452           i = INTVAL (x);
7453         else if (GET_CODE(x) == CONST_DOUBLE)
7454           i = CONST_DOUBLE_LOW (x);
7455         else
7456           {
7457             output_operand_lossage ("invalid %%s operand");
7458             return;
7459           }
7460         i = trunc_int_for_mode (i, SImode);
7461         fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
7462         return;
7463       }
7464
7465     case 0:
7466       /* Do nothing special.  */
7467       break;
7468
7469     default:
7470       /* Undocumented flag.  */
7471       output_operand_lossage ("invalid operand output code");
7472     }
7473
7474   if (GET_CODE (x) == REG)
7475     fputs (reg_names[REGNO (x)], file);
7476   else if (GET_CODE (x) == MEM)
7477     {
7478       fputc ('[', file);
7479         /* Poor Sun assembler doesn't understand absolute addressing.  */
7480       if (CONSTANT_P (XEXP (x, 0)))
7481         fputs ("%g0+", file);
7482       output_address (XEXP (x, 0));
7483       fputc (']', file);
7484     }
7485   else if (GET_CODE (x) == HIGH)
7486     {
7487       fputs ("%hi(", file);
7488       output_addr_const (file, XEXP (x, 0));
7489       fputc (')', file);
7490     }
7491   else if (GET_CODE (x) == LO_SUM)
7492     {
7493       print_operand (file, XEXP (x, 0), 0);
7494       if (TARGET_CM_MEDMID)
7495         fputs ("+%l44(", file);
7496       else
7497         fputs ("+%lo(", file);
7498       output_addr_const (file, XEXP (x, 1));
7499       fputc (')', file);
7500     }
7501   else if (GET_CODE (x) == CONST_DOUBLE
7502            && (GET_MODE (x) == VOIDmode
7503                || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
7504     {
7505       if (CONST_DOUBLE_HIGH (x) == 0)
7506         fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
7507       else if (CONST_DOUBLE_HIGH (x) == -1
7508                && CONST_DOUBLE_LOW (x) < 0)
7509         fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
7510       else
7511         output_operand_lossage ("long long constant not a valid immediate operand");
7512     }
7513   else if (GET_CODE (x) == CONST_DOUBLE)
7514     output_operand_lossage ("floating point constant not a valid immediate operand");
7515   else { output_addr_const (file, x); }
7516 }
7517 \f
7518 /* Target hook for assembling integer objects.  The sparc version has
7519    special handling for aligned DI-mode objects.  */
7520
7521 static bool
7522 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
7523 {
7524   /* ??? We only output .xword's for symbols and only then in environments
7525      where the assembler can handle them.  */
7526   if (aligned_p && size == 8
7527       && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
7528     {
7529       if (TARGET_V9)
7530         {
7531           assemble_integer_with_op ("\t.xword\t", x);
7532           return true;
7533         }
7534       else
7535         {
7536           assemble_aligned_integer (4, const0_rtx);
7537           assemble_aligned_integer (4, x);
7538           return true;
7539         }
7540     }
7541   return default_assemble_integer (x, size, aligned_p);
7542 }
7543 \f
7544 /* Return the value of a code used in the .proc pseudo-op that says
7545    what kind of result this function returns.  For non-C types, we pick
7546    the closest C type.  */
7547
7548 #ifndef SHORT_TYPE_SIZE
7549 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
7550 #endif
7551
7552 #ifndef INT_TYPE_SIZE
7553 #define INT_TYPE_SIZE BITS_PER_WORD
7554 #endif
7555
7556 #ifndef LONG_TYPE_SIZE
7557 #define LONG_TYPE_SIZE BITS_PER_WORD
7558 #endif
7559
7560 #ifndef LONG_LONG_TYPE_SIZE
7561 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
7562 #endif
7563
7564 #ifndef FLOAT_TYPE_SIZE
7565 #define FLOAT_TYPE_SIZE BITS_PER_WORD
7566 #endif
7567
7568 #ifndef DOUBLE_TYPE_SIZE
7569 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
7570 #endif
7571
7572 #ifndef LONG_DOUBLE_TYPE_SIZE
7573 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
7574 #endif
7575
7576 unsigned long
7577 sparc_type_code (register tree type)
7578 {
7579   register unsigned long qualifiers = 0;
7580   register unsigned shift;
7581
7582   /* Only the first 30 bits of the qualifier are valid.  We must refrain from
7583      setting more, since some assemblers will give an error for this.  Also,
7584      we must be careful to avoid shifts of 32 bits or more to avoid getting
7585      unpredictable results.  */
7586
7587   for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
7588     {
7589       switch (TREE_CODE (type))
7590         {
7591         case ERROR_MARK:
7592           return qualifiers;
7593
7594         case ARRAY_TYPE:
7595           qualifiers |= (3 << shift);
7596           break;
7597
7598         case FUNCTION_TYPE:
7599         case METHOD_TYPE:
7600           qualifiers |= (2 << shift);
7601           break;
7602
7603         case POINTER_TYPE:
7604         case REFERENCE_TYPE:
7605         case OFFSET_TYPE:
7606           qualifiers |= (1 << shift);
7607           break;
7608
7609         case RECORD_TYPE:
7610           return (qualifiers | 8);
7611
7612         case UNION_TYPE:
7613         case QUAL_UNION_TYPE:
7614           return (qualifiers | 9);
7615
7616         case ENUMERAL_TYPE:
7617           return (qualifiers | 10);
7618
7619         case VOID_TYPE:
7620           return (qualifiers | 16);
7621
7622         case INTEGER_TYPE:
7623           /* If this is a range type, consider it to be the underlying
7624              type.  */
7625           if (TREE_TYPE (type) != 0)
7626             break;
7627
7628           /* Carefully distinguish all the standard types of C,
7629              without messing up if the language is not C.  We do this by
7630              testing TYPE_PRECISION and TYPE_UNSIGNED.  The old code used to
7631              look at both the names and the above fields, but that's redundant.
7632              Any type whose size is between two C types will be considered
7633              to be the wider of the two types.  Also, we do not have a
7634              special code to use for "long long", so anything wider than
7635              long is treated the same.  Note that we can't distinguish
7636              between "int" and "long" in this code if they are the same
7637              size, but that's fine, since neither can the assembler.  */
7638
7639           if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
7640             return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
7641
7642           else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
7643             return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
7644
7645           else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
7646             return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
7647
7648           else
7649             return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
7650
7651         case REAL_TYPE:
7652           /* If this is a range type, consider it to be the underlying
7653              type.  */
7654           if (TREE_TYPE (type) != 0)
7655             break;
7656
7657           /* Carefully distinguish all the standard types of C,
7658              without messing up if the language is not C.  */
7659
7660           if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
7661             return (qualifiers | 6);
7662
7663           else
7664             return (qualifiers | 7);
7665
7666         case COMPLEX_TYPE:      /* GNU Fortran COMPLEX type.  */
7667           /* ??? We need to distinguish between double and float complex types,
7668              but I don't know how yet because I can't reach this code from
7669              existing front-ends.  */
7670           return (qualifiers | 7);      /* Who knows? */
7671
7672         case VECTOR_TYPE:
7673         case BOOLEAN_TYPE:      /* Boolean truth value type.  */
7674         case LANG_TYPE:         /* ? */
7675           return qualifiers;
7676
7677         default:
7678           gcc_unreachable ();           /* Not a type! */
7679         }
7680     }
7681
7682   return qualifiers;
7683 }
7684 \f
7685 /* Nested function support.  */
7686
7687 /* Emit RTL insns to initialize the variable parts of a trampoline.
7688    FNADDR is an RTX for the address of the function's pure code.
7689    CXT is an RTX for the static chain value for the function.
7690
7691    This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
7692    (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
7693    (to store insns).  This is a bit excessive.  Perhaps a different
7694    mechanism would be better here.
7695
7696    Emit enough FLUSH insns to synchronize the data and instruction caches.  */
7697
7698 static void
7699 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
7700 {
7701   /* SPARC 32-bit trampoline:
7702
7703         sethi   %hi(fn), %g1
7704         sethi   %hi(static), %g2
7705         jmp     %g1+%lo(fn)
7706         or      %g2, %lo(static), %g2
7707
7708     SETHI i,r  = 00rr rrr1 00ii iiii iiii iiii iiii iiii
7709     JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
7710    */
7711
7712   emit_move_insn
7713     (adjust_address (m_tramp, SImode, 0),
7714      expand_binop (SImode, ior_optab,
7715                    expand_shift (RSHIFT_EXPR, SImode, fnaddr,
7716                                  size_int (10), 0, 1),
7717                    GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
7718                    NULL_RTX, 1, OPTAB_DIRECT));
7719
7720   emit_move_insn
7721     (adjust_address (m_tramp, SImode, 4),
7722      expand_binop (SImode, ior_optab,
7723                    expand_shift (RSHIFT_EXPR, SImode, cxt,
7724                                  size_int (10), 0, 1),
7725                    GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
7726                    NULL_RTX, 1, OPTAB_DIRECT));
7727
7728   emit_move_insn
7729     (adjust_address (m_tramp, SImode, 8),
7730      expand_binop (SImode, ior_optab,
7731                    expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
7732                    GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
7733                    NULL_RTX, 1, OPTAB_DIRECT));
7734
7735   emit_move_insn
7736     (adjust_address (m_tramp, SImode, 12),
7737      expand_binop (SImode, ior_optab,
7738                    expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
7739                    GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
7740                    NULL_RTX, 1, OPTAB_DIRECT));
7741
7742   /* On UltraSPARC a flush flushes an entire cache line.  The trampoline is
7743      aligned on a 16 byte boundary so one flush clears it all.  */
7744   emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 0))));
7745   if (sparc_cpu != PROCESSOR_ULTRASPARC
7746       && sparc_cpu != PROCESSOR_ULTRASPARC3
7747       && sparc_cpu != PROCESSOR_NIAGARA
7748       && sparc_cpu != PROCESSOR_NIAGARA2)
7749     emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 8))));
7750
7751   /* Call __enable_execute_stack after writing onto the stack to make sure
7752      the stack address is accessible.  */
7753 #ifdef ENABLE_EXECUTE_STACK
7754   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
7755                      LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
7756 #endif
7757
7758 }
7759
7760 /* The 64-bit version is simpler because it makes more sense to load the
7761    values as "immediate" data out of the trampoline.  It's also easier since
7762    we can read the PC without clobbering a register.  */
7763
7764 static void
7765 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
7766 {
7767   /* SPARC 64-bit trampoline:
7768
7769         rd      %pc, %g1
7770         ldx     [%g1+24], %g5
7771         jmp     %g5
7772         ldx     [%g1+16], %g5
7773         +16 bytes data
7774    */
7775
7776   emit_move_insn (adjust_address (m_tramp, SImode, 0),
7777                   GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
7778   emit_move_insn (adjust_address (m_tramp, SImode, 4),
7779                   GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
7780   emit_move_insn (adjust_address (m_tramp, SImode, 8),
7781                   GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
7782   emit_move_insn (adjust_address (m_tramp, SImode, 12),
7783                   GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
7784   emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
7785   emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
7786   emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
7787
7788   if (sparc_cpu != PROCESSOR_ULTRASPARC
7789       && sparc_cpu != PROCESSOR_ULTRASPARC3
7790       && sparc_cpu != PROCESSOR_NIAGARA
7791       && sparc_cpu != PROCESSOR_NIAGARA2)
7792     emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
7793
7794   /* Call __enable_execute_stack after writing onto the stack to make sure
7795      the stack address is accessible.  */
7796 #ifdef ENABLE_EXECUTE_STACK
7797   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
7798                      LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
7799 #endif
7800 }
7801
7802 /* Worker for TARGET_TRAMPOLINE_INIT.  */
7803
7804 static void
7805 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
7806 {
7807   rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
7808   cxt = force_reg (Pmode, cxt);
7809   if (TARGET_ARCH64)
7810     sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
7811   else
7812     sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
7813 }
7814 \f
7815 /* Adjust the cost of a scheduling dependency.  Return the new cost of
7816    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
7817
7818 static int
7819 supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
7820 {
7821   enum attr_type insn_type;
7822
7823   if (! recog_memoized (insn))
7824     return 0;
7825
7826   insn_type = get_attr_type (insn);
7827
7828   if (REG_NOTE_KIND (link) == 0)
7829     {
7830       /* Data dependency; DEP_INSN writes a register that INSN reads some
7831          cycles later.  */
7832
7833       /* if a load, then the dependence must be on the memory address;
7834          add an extra "cycle".  Note that the cost could be two cycles
7835          if the reg was written late in an instruction group; we ca not tell
7836          here.  */
7837       if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
7838         return cost + 3;
7839
7840       /* Get the delay only if the address of the store is the dependence.  */
7841       if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
7842         {
7843           rtx pat = PATTERN(insn);
7844           rtx dep_pat = PATTERN (dep_insn);
7845
7846           if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
7847             return cost;  /* This should not happen!  */
7848
7849           /* The dependency between the two instructions was on the data that
7850              is being stored.  Assume that this implies that the address of the
7851              store is not dependent.  */
7852           if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
7853             return cost;
7854
7855           return cost + 3;  /* An approximation.  */
7856         }
7857
7858       /* A shift instruction cannot receive its data from an instruction
7859          in the same cycle; add a one cycle penalty.  */
7860       if (insn_type == TYPE_SHIFT)
7861         return cost + 3;   /* Split before cascade into shift.  */
7862     }
7863   else
7864     {
7865       /* Anti- or output- dependency; DEP_INSN reads/writes a register that
7866          INSN writes some cycles later.  */
7867
7868       /* These are only significant for the fpu unit; writing a fp reg before
7869          the fpu has finished with it stalls the processor.  */
7870
7871       /* Reusing an integer register causes no problems.  */
7872       if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
7873         return 0;
7874     }
7875
7876   return cost;
7877 }
7878
7879 static int
7880 hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
7881 {
7882   enum attr_type insn_type, dep_type;
7883   rtx pat = PATTERN(insn);
7884   rtx dep_pat = PATTERN (dep_insn);
7885
7886   if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
7887     return cost;
7888
7889   insn_type = get_attr_type (insn);
7890   dep_type = get_attr_type (dep_insn);
7891
7892   switch (REG_NOTE_KIND (link))
7893     {
7894     case 0:
7895       /* Data dependency; DEP_INSN writes a register that INSN reads some
7896          cycles later.  */
7897
7898       switch (insn_type)
7899         {
7900         case TYPE_STORE:
7901         case TYPE_FPSTORE:
7902           /* Get the delay iff the address of the store is the dependence.  */
7903           if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
7904             return cost;
7905
7906           if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
7907             return cost;
7908           return cost + 3;
7909
7910         case TYPE_LOAD:
7911         case TYPE_SLOAD:
7912         case TYPE_FPLOAD:
7913           /* If a load, then the dependence must be on the memory address.  If
7914              the addresses aren't equal, then it might be a false dependency */
7915           if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
7916             {
7917               if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
7918                   || GET_CODE (SET_DEST (dep_pat)) != MEM
7919                   || GET_CODE (SET_SRC (pat)) != MEM
7920                   || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
7921                                     XEXP (SET_SRC (pat), 0)))
7922                 return cost + 2;
7923
7924               return cost + 8;
7925             }
7926           break;
7927
7928         case TYPE_BRANCH:
7929           /* Compare to branch latency is 0.  There is no benefit from
7930              separating compare and branch.  */
7931           if (dep_type == TYPE_COMPARE)
7932             return 0;
7933           /* Floating point compare to branch latency is less than
7934              compare to conditional move.  */
7935           if (dep_type == TYPE_FPCMP)
7936             return cost - 1;
7937           break;
7938         default:
7939           break;
7940         }
7941         break;
7942
7943     case REG_DEP_ANTI:
7944       /* Anti-dependencies only penalize the fpu unit.  */
7945       if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
7946         return 0;
7947       break;
7948
7949     default:
7950       break;
7951     }
7952
7953   return cost;
7954 }
7955
7956 static int
7957 sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
7958 {
7959   switch (sparc_cpu)
7960     {
7961     case PROCESSOR_SUPERSPARC:
7962       cost = supersparc_adjust_cost (insn, link, dep, cost);
7963       break;
7964     case PROCESSOR_HYPERSPARC:
7965     case PROCESSOR_SPARCLITE86X:
7966       cost = hypersparc_adjust_cost (insn, link, dep, cost);
7967       break;
7968     default:
7969       break;
7970     }
7971   return cost;
7972 }
7973
7974 static void
7975 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7976                   int sched_verbose ATTRIBUTE_UNUSED,
7977                   int max_ready ATTRIBUTE_UNUSED)
7978 {}
7979
7980 static int
7981 sparc_use_sched_lookahead (void)
7982 {
7983   if (sparc_cpu == PROCESSOR_NIAGARA
7984       || sparc_cpu == PROCESSOR_NIAGARA2)
7985     return 0;
7986   if (sparc_cpu == PROCESSOR_ULTRASPARC
7987       || sparc_cpu == PROCESSOR_ULTRASPARC3)
7988     return 4;
7989   if ((1 << sparc_cpu) &
7990       ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
7991        (1 << PROCESSOR_SPARCLITE86X)))
7992     return 3;
7993   return 0;
7994 }
7995
7996 static int
7997 sparc_issue_rate (void)
7998 {
7999   switch (sparc_cpu)
8000     {
8001     case PROCESSOR_NIAGARA:
8002     case PROCESSOR_NIAGARA2:
8003     default:
8004       return 1;
8005     case PROCESSOR_V9:
8006       /* Assume V9 processors are capable of at least dual-issue.  */
8007       return 2;
8008     case PROCESSOR_SUPERSPARC:
8009       return 3;
8010     case PROCESSOR_HYPERSPARC:
8011     case PROCESSOR_SPARCLITE86X:
8012       return 2;
8013     case PROCESSOR_ULTRASPARC:
8014     case PROCESSOR_ULTRASPARC3:
8015       return 4;
8016     }
8017 }
8018
8019 static int
8020 set_extends (rtx insn)
8021 {
8022   register rtx pat = PATTERN (insn);
8023
8024   switch (GET_CODE (SET_SRC (pat)))
8025     {
8026       /* Load and some shift instructions zero extend.  */
8027     case MEM:
8028     case ZERO_EXTEND:
8029       /* sethi clears the high bits */
8030     case HIGH:
8031       /* LO_SUM is used with sethi.  sethi cleared the high
8032          bits and the values used with lo_sum are positive */
8033     case LO_SUM:
8034       /* Store flag stores 0 or 1 */
8035     case LT: case LTU:
8036     case GT: case GTU:
8037     case LE: case LEU:
8038     case GE: case GEU:
8039     case EQ:
8040     case NE:
8041       return 1;
8042     case AND:
8043       {
8044         rtx op0 = XEXP (SET_SRC (pat), 0);
8045         rtx op1 = XEXP (SET_SRC (pat), 1);
8046         if (GET_CODE (op1) == CONST_INT)
8047           return INTVAL (op1) >= 0;
8048         if (GET_CODE (op0) != REG)
8049           return 0;
8050         if (sparc_check_64 (op0, insn) == 1)
8051           return 1;
8052         return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
8053       }
8054     case IOR:
8055     case XOR:
8056       {
8057         rtx op0 = XEXP (SET_SRC (pat), 0);
8058         rtx op1 = XEXP (SET_SRC (pat), 1);
8059         if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
8060           return 0;
8061         if (GET_CODE (op1) == CONST_INT)
8062           return INTVAL (op1) >= 0;
8063         return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
8064       }
8065     case LSHIFTRT:
8066       return GET_MODE (SET_SRC (pat)) == SImode;
8067       /* Positive integers leave the high bits zero.  */
8068     case CONST_DOUBLE:
8069       return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
8070     case CONST_INT:
8071       return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
8072     case ASHIFTRT:
8073     case SIGN_EXTEND:
8074       return - (GET_MODE (SET_SRC (pat)) == SImode);
8075     case REG:
8076       return sparc_check_64 (SET_SRC (pat), insn);
8077     default:
8078       return 0;
8079     }
8080 }
8081
8082 /* We _ought_ to have only one kind per function, but...  */
8083 static GTY(()) rtx sparc_addr_diff_list;
8084 static GTY(()) rtx sparc_addr_list;
8085
8086 void
8087 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
8088 {
8089   vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
8090   if (diff)
8091     sparc_addr_diff_list
8092       = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
8093   else
8094     sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
8095 }
8096
8097 static void
8098 sparc_output_addr_vec (rtx vec)
8099 {
8100   rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
8101   int idx, vlen = XVECLEN (body, 0);
8102
8103 #ifdef ASM_OUTPUT_ADDR_VEC_START
8104   ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
8105 #endif
8106
8107 #ifdef ASM_OUTPUT_CASE_LABEL
8108   ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
8109                          NEXT_INSN (lab));
8110 #else
8111   (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
8112 #endif
8113
8114   for (idx = 0; idx < vlen; idx++)
8115     {
8116       ASM_OUTPUT_ADDR_VEC_ELT
8117         (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
8118     }
8119
8120 #ifdef ASM_OUTPUT_ADDR_VEC_END
8121   ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
8122 #endif
8123 }
8124
8125 static void
8126 sparc_output_addr_diff_vec (rtx vec)
8127 {
8128   rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
8129   rtx base = XEXP (XEXP (body, 0), 0);
8130   int idx, vlen = XVECLEN (body, 1);
8131
8132 #ifdef ASM_OUTPUT_ADDR_VEC_START
8133   ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
8134 #endif
8135
8136 #ifdef ASM_OUTPUT_CASE_LABEL
8137   ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
8138                          NEXT_INSN (lab));
8139 #else
8140   (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
8141 #endif
8142
8143   for (idx = 0; idx < vlen; idx++)
8144     {
8145       ASM_OUTPUT_ADDR_DIFF_ELT
8146         (asm_out_file,
8147          body,
8148          CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
8149          CODE_LABEL_NUMBER (base));
8150     }
8151
8152 #ifdef ASM_OUTPUT_ADDR_VEC_END
8153   ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
8154 #endif
8155 }
8156
8157 static void
8158 sparc_output_deferred_case_vectors (void)
8159 {
8160   rtx t;
8161   int align;
8162
8163   if (sparc_addr_list == NULL_RTX
8164       && sparc_addr_diff_list == NULL_RTX)
8165     return;
8166
8167   /* Align to cache line in the function's code section.  */
8168   switch_to_section (current_function_section ());
8169
8170   align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
8171   if (align > 0)
8172     ASM_OUTPUT_ALIGN (asm_out_file, align);
8173
8174   for (t = sparc_addr_list; t ; t = XEXP (t, 1))
8175     sparc_output_addr_vec (XEXP (t, 0));
8176   for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
8177     sparc_output_addr_diff_vec (XEXP (t, 0));
8178
8179   sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
8180 }
8181
8182 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
8183    unknown.  Return 1 if the high bits are zero, -1 if the register is
8184    sign extended.  */
8185 int
8186 sparc_check_64 (rtx x, rtx insn)
8187 {
8188   /* If a register is set only once it is safe to ignore insns this
8189      code does not know how to handle.  The loop will either recognize
8190      the single set and return the correct value or fail to recognize
8191      it and return 0.  */
8192   int set_once = 0;
8193   rtx y = x;
8194
8195   gcc_assert (GET_CODE (x) == REG);
8196
8197   if (GET_MODE (x) == DImode)
8198     y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
8199
8200   if (flag_expensive_optimizations
8201       && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
8202     set_once = 1;
8203
8204   if (insn == 0)
8205     {
8206       if (set_once)
8207         insn = get_last_insn_anywhere ();
8208       else
8209         return 0;
8210     }
8211
8212   while ((insn = PREV_INSN (insn)))
8213     {
8214       switch (GET_CODE (insn))
8215         {
8216         case JUMP_INSN:
8217         case NOTE:
8218           break;
8219         case CODE_LABEL:
8220         case CALL_INSN:
8221         default:
8222           if (! set_once)
8223             return 0;
8224           break;
8225         case INSN:
8226           {
8227             rtx pat = PATTERN (insn);
8228             if (GET_CODE (pat) != SET)
8229               return 0;
8230             if (rtx_equal_p (x, SET_DEST (pat)))
8231               return set_extends (insn);
8232             if (y && rtx_equal_p (y, SET_DEST (pat)))
8233               return set_extends (insn);
8234             if (reg_overlap_mentioned_p (SET_DEST (pat), y))
8235               return 0;
8236           }
8237         }
8238     }
8239   return 0;
8240 }
8241
8242 /* Returns assembly code to perform a DImode shift using
8243    a 64-bit global or out register on SPARC-V8+.  */
8244 const char *
8245 output_v8plus_shift (rtx *operands, rtx insn, const char *opcode)
8246 {
8247   static char asm_code[60];
8248
8249   /* The scratch register is only required when the destination
8250      register is not a 64-bit global or out register.  */
8251   if (which_alternative != 2)
8252     operands[3] = operands[0];
8253
8254   /* We can only shift by constants <= 63. */
8255   if (GET_CODE (operands[2]) == CONST_INT)
8256     operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
8257
8258   if (GET_CODE (operands[1]) == CONST_INT)
8259     {
8260       output_asm_insn ("mov\t%1, %3", operands);
8261     }
8262   else
8263     {
8264       output_asm_insn ("sllx\t%H1, 32, %3", operands);
8265       if (sparc_check_64 (operands[1], insn) <= 0)
8266         output_asm_insn ("srl\t%L1, 0, %L1", operands);
8267       output_asm_insn ("or\t%L1, %3, %3", operands);
8268     }
8269
8270   strcpy(asm_code, opcode);
8271
8272   if (which_alternative != 2)
8273     return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
8274   else
8275     return strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
8276 }
8277 \f
8278 /* Output rtl to increment the profiler label LABELNO
8279    for profiling a function entry.  */
8280
8281 void
8282 sparc_profile_hook (int labelno)
8283 {
8284   char buf[32];
8285   rtx lab, fun;
8286
8287   fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
8288   if (NO_PROFILE_COUNTERS)
8289     {
8290       emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
8291     }
8292   else
8293     {
8294       ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
8295       lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
8296       emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
8297     }
8298 }
8299 \f
8300 /* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
8301
8302 static void
8303 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
8304                                      tree decl ATTRIBUTE_UNUSED)
8305 {
8306   fprintf (asm_out_file, "\t.section\t\"%s\"", name);
8307
8308   if (!(flags & SECTION_DEBUG))
8309     fputs (",#alloc", asm_out_file);
8310   if (flags & SECTION_WRITE)
8311     fputs (",#write", asm_out_file);
8312   if (flags & SECTION_TLS)
8313     fputs (",#tls", asm_out_file);
8314   if (flags & SECTION_CODE)
8315     fputs (",#execinstr", asm_out_file);
8316
8317   /* ??? Handle SECTION_BSS.  */
8318
8319   fputc ('\n', asm_out_file);
8320 }
8321
8322 /* We do not allow indirect calls to be optimized into sibling calls.
8323
8324    We cannot use sibling calls when delayed branches are disabled
8325    because they will likely require the call delay slot to be filled.
8326
8327    Also, on SPARC 32-bit we cannot emit a sibling call when the
8328    current function returns a structure.  This is because the "unimp
8329    after call" convention would cause the callee to return to the
8330    wrong place.  The generic code already disallows cases where the
8331    function being called returns a structure.
8332
8333    It may seem strange how this last case could occur.  Usually there
8334    is code after the call which jumps to epilogue code which dumps the
8335    return value into the struct return area.  That ought to invalidate
8336    the sibling call right?  Well, in the C++ case we can end up passing
8337    the pointer to the struct return area to a constructor (which returns
8338    void) and then nothing else happens.  Such a sibling call would look
8339    valid without the added check here.
8340
8341    VxWorks PIC PLT entries require the global pointer to be initialized
8342    on entry.  We therefore can't emit sibling calls to them.  */
8343 static bool
8344 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8345 {
8346   return (decl
8347           && flag_delayed_branch
8348           && (TARGET_ARCH64 || ! cfun->returns_struct)
8349           && !(TARGET_VXWORKS_RTP
8350                && flag_pic
8351                && !targetm.binds_local_p (decl)));
8352 }
8353 \f
8354 /* libfunc renaming.  */
8355 #include "config/gofast.h"
8356
8357 static void
8358 sparc_init_libfuncs (void)
8359 {
8360   if (TARGET_ARCH32)
8361     {
8362       /* Use the subroutines that Sun's library provides for integer
8363          multiply and divide.  The `*' prevents an underscore from
8364          being prepended by the compiler. .umul is a little faster
8365          than .mul.  */
8366       set_optab_libfunc (smul_optab, SImode, "*.umul");
8367       set_optab_libfunc (sdiv_optab, SImode, "*.div");
8368       set_optab_libfunc (udiv_optab, SImode, "*.udiv");
8369       set_optab_libfunc (smod_optab, SImode, "*.rem");
8370       set_optab_libfunc (umod_optab, SImode, "*.urem");
8371
8372       /* TFmode arithmetic.  These names are part of the SPARC 32bit ABI.  */
8373       set_optab_libfunc (add_optab, TFmode, "_Q_add");
8374       set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
8375       set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
8376       set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
8377       set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
8378
8379       /* We can define the TFmode sqrt optab only if TARGET_FPU.  This
8380          is because with soft-float, the SFmode and DFmode sqrt
8381          instructions will be absent, and the compiler will notice and
8382          try to use the TFmode sqrt instruction for calls to the
8383          builtin function sqrt, but this fails.  */
8384       if (TARGET_FPU)
8385         set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
8386
8387       set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
8388       set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
8389       set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
8390       set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
8391       set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
8392       set_optab_libfunc (le_optab, TFmode, "_Q_fle");
8393
8394       set_conv_libfunc (sext_optab,   TFmode, SFmode, "_Q_stoq");
8395       set_conv_libfunc (sext_optab,   TFmode, DFmode, "_Q_dtoq");
8396       set_conv_libfunc (trunc_optab,  SFmode, TFmode, "_Q_qtos");
8397       set_conv_libfunc (trunc_optab,  DFmode, TFmode, "_Q_qtod");
8398
8399       set_conv_libfunc (sfix_optab,   SImode, TFmode, "_Q_qtoi");
8400       set_conv_libfunc (ufix_optab,   SImode, TFmode, "_Q_qtou");
8401       set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
8402       set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
8403
8404       if (DITF_CONVERSION_LIBFUNCS)
8405         {
8406           set_conv_libfunc (sfix_optab,   DImode, TFmode, "_Q_qtoll");
8407           set_conv_libfunc (ufix_optab,   DImode, TFmode, "_Q_qtoull");
8408           set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
8409           set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
8410         }
8411
8412       if (SUN_CONVERSION_LIBFUNCS)
8413         {
8414           set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
8415           set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
8416           set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
8417           set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
8418         }
8419     }
8420   if (TARGET_ARCH64)
8421     {
8422       /* In the SPARC 64bit ABI, SImode multiply and divide functions
8423          do not exist in the library.  Make sure the compiler does not
8424          emit calls to them by accident.  (It should always use the
8425          hardware instructions.)  */
8426       set_optab_libfunc (smul_optab, SImode, 0);
8427       set_optab_libfunc (sdiv_optab, SImode, 0);
8428       set_optab_libfunc (udiv_optab, SImode, 0);
8429       set_optab_libfunc (smod_optab, SImode, 0);
8430       set_optab_libfunc (umod_optab, SImode, 0);
8431
8432       if (SUN_INTEGER_MULTIPLY_64)
8433         {
8434           set_optab_libfunc (smul_optab, DImode, "__mul64");
8435           set_optab_libfunc (sdiv_optab, DImode, "__div64");
8436           set_optab_libfunc (udiv_optab, DImode, "__udiv64");
8437           set_optab_libfunc (smod_optab, DImode, "__rem64");
8438           set_optab_libfunc (umod_optab, DImode, "__urem64");
8439         }
8440
8441       if (SUN_CONVERSION_LIBFUNCS)
8442         {
8443           set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
8444           set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
8445           set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
8446           set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
8447         }
8448     }
8449
8450   gofast_maybe_init_libfuncs ();
8451 }
8452 \f
8453 #define def_builtin(NAME, CODE, TYPE) \
8454   add_builtin_function((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL, \
8455                        NULL_TREE)
8456
8457 /* Implement the TARGET_INIT_BUILTINS target hook.
8458    Create builtin functions for special SPARC instructions.  */
8459
8460 static void
8461 sparc_init_builtins (void)
8462 {
8463   if (TARGET_VIS)
8464     sparc_vis_init_builtins ();
8465 }
8466
8467 /* Create builtin functions for VIS 1.0 instructions.  */
8468
8469 static void
8470 sparc_vis_init_builtins (void)
8471 {
8472   tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
8473   tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
8474   tree v4hi = build_vector_type (intHI_type_node, 4);
8475   tree v2hi = build_vector_type (intHI_type_node, 2);
8476   tree v2si = build_vector_type (intSI_type_node, 2);
8477
8478   tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
8479   tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
8480   tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
8481   tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
8482   tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
8483   tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
8484   tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
8485   tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
8486   tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
8487   tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
8488   tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
8489   tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
8490   tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
8491                                                          v8qi, v8qi,
8492                                                          intDI_type_node, 0);
8493   tree di_ftype_di_di = build_function_type_list (intDI_type_node,
8494                                                   intDI_type_node,
8495                                                   intDI_type_node, 0);
8496   tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
8497                                                     ptr_type_node,
8498                                                     intSI_type_node, 0);
8499   tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
8500                                                     ptr_type_node,
8501                                                     intDI_type_node, 0);
8502
8503   /* Packing and expanding vectors.  */
8504   def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis, v4qi_ftype_v4hi);
8505   def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
8506                v8qi_ftype_v2si_v8qi);
8507   def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
8508                v2hi_ftype_v2si);
8509   def_builtin ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis, v4hi_ftype_v4qi);
8510   def_builtin ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
8511                v8qi_ftype_v4qi_v4qi);
8512
8513   /* Multiplications.  */
8514   def_builtin ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
8515                v4hi_ftype_v4qi_v4hi);
8516   def_builtin ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
8517                v4hi_ftype_v4qi_v2hi);
8518   def_builtin ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
8519                v4hi_ftype_v4qi_v2hi);
8520   def_builtin ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
8521                v4hi_ftype_v8qi_v4hi);
8522   def_builtin ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
8523                v4hi_ftype_v8qi_v4hi);
8524   def_builtin ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
8525                v2si_ftype_v4qi_v2hi);
8526   def_builtin ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
8527                v2si_ftype_v4qi_v2hi);
8528
8529   /* Data aligning.  */
8530   def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
8531                v4hi_ftype_v4hi_v4hi);
8532   def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
8533                v8qi_ftype_v8qi_v8qi);
8534   def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
8535                v2si_ftype_v2si_v2si);
8536   def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatadi_vis,
8537                di_ftype_di_di);
8538   if (TARGET_ARCH64)
8539     def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
8540                  ptr_ftype_ptr_di);
8541   else
8542     def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
8543                  ptr_ftype_ptr_si);
8544
8545   /* Pixel distance.  */
8546   def_builtin ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
8547                di_ftype_v8qi_v8qi_di);
8548 }
8549
8550 /* Handle TARGET_EXPAND_BUILTIN target hook.
8551    Expand builtin functions for sparc intrinsics.  */
8552
8553 static rtx
8554 sparc_expand_builtin (tree exp, rtx target,
8555                       rtx subtarget ATTRIBUTE_UNUSED,
8556                       enum machine_mode tmode ATTRIBUTE_UNUSED,
8557                       int ignore ATTRIBUTE_UNUSED)
8558 {
8559   tree arg;
8560   call_expr_arg_iterator iter;
8561   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
8562   unsigned int icode = DECL_FUNCTION_CODE (fndecl);
8563   rtx pat, op[4];
8564   enum machine_mode mode[4];
8565   int arg_count = 0;
8566
8567   mode[0] = insn_data[icode].operand[0].mode;
8568   if (!target
8569       || GET_MODE (target) != mode[0]
8570       || ! (*insn_data[icode].operand[0].predicate) (target, mode[0]))
8571     op[0] = gen_reg_rtx (mode[0]);
8572   else
8573     op[0] = target;
8574
8575   FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
8576     {
8577       arg_count++;
8578       mode[arg_count] = insn_data[icode].operand[arg_count].mode;
8579       op[arg_count] = expand_normal (arg);
8580
8581       if (! (*insn_data[icode].operand[arg_count].predicate) (op[arg_count],
8582                                                               mode[arg_count]))
8583         op[arg_count] = copy_to_mode_reg (mode[arg_count], op[arg_count]);
8584     }
8585
8586   switch (arg_count)
8587     {
8588     case 1:
8589       pat = GEN_FCN (icode) (op[0], op[1]);
8590       break;
8591     case 2:
8592       pat = GEN_FCN (icode) (op[0], op[1], op[2]);
8593       break;
8594     case 3:
8595       pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
8596       break;
8597     default:
8598       gcc_unreachable ();
8599     }
8600
8601   if (!pat)
8602     return NULL_RTX;
8603
8604   emit_insn (pat);
8605
8606   return op[0];
8607 }
8608
8609 static int
8610 sparc_vis_mul8x16 (int e8, int e16)
8611 {
8612   return (e8 * e16 + 128) / 256;
8613 }
8614
8615 /* Multiply the vector elements in ELTS0 to the elements in ELTS1 as specified
8616    by FNCODE.  All of the elements in ELTS0 and ELTS1 lists must be integer
8617    constants.  A tree list with the results of the multiplications is returned,
8618    and each element in the list is of INNER_TYPE.  */
8619
8620 static tree
8621 sparc_handle_vis_mul8x16 (int fncode, tree inner_type, tree elts0, tree elts1)
8622 {
8623   tree n_elts = NULL_TREE;
8624   int scale;
8625
8626   switch (fncode)
8627     {
8628     case CODE_FOR_fmul8x16_vis:
8629       for (; elts0 && elts1;
8630            elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
8631         {
8632           int val
8633             = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8634                                  TREE_INT_CST_LOW (TREE_VALUE (elts1)));
8635           n_elts = tree_cons (NULL_TREE,
8636                               build_int_cst (inner_type, val),
8637                               n_elts);
8638         }
8639       break;
8640
8641     case CODE_FOR_fmul8x16au_vis:
8642       scale = TREE_INT_CST_LOW (TREE_VALUE (elts1));
8643
8644       for (; elts0; elts0 = TREE_CHAIN (elts0))
8645         {
8646           int val
8647             = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8648                                  scale);
8649           n_elts = tree_cons (NULL_TREE,
8650                               build_int_cst (inner_type, val),
8651                               n_elts);
8652         }
8653       break;
8654
8655     case CODE_FOR_fmul8x16al_vis:
8656       scale = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (elts1)));
8657
8658       for (; elts0; elts0 = TREE_CHAIN (elts0))
8659         {
8660           int val
8661             = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8662                                  scale);
8663           n_elts = tree_cons (NULL_TREE,
8664                               build_int_cst (inner_type, val),
8665                               n_elts);
8666         }
8667       break;
8668
8669     default:
8670       gcc_unreachable ();
8671     }
8672
8673   return nreverse (n_elts);
8674
8675 }
8676 /* Handle TARGET_FOLD_BUILTIN target hook.
8677    Fold builtin functions for SPARC intrinsics.  If IGNORE is true the
8678    result of the function call is ignored.  NULL_TREE is returned if the
8679    function could not be folded.  */
8680
8681 static tree
8682 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
8683                     tree *args, bool ignore)
8684 {
8685   tree arg0, arg1, arg2;
8686   tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
8687   enum insn_code icode = (enum insn_code) DECL_FUNCTION_CODE (fndecl);
8688
8689   if (ignore
8690       && icode != CODE_FOR_alignaddrsi_vis
8691       && icode != CODE_FOR_alignaddrdi_vis)
8692     return fold_convert (rtype, integer_zero_node);
8693
8694   switch (icode)
8695     {
8696     case CODE_FOR_fexpand_vis:
8697       arg0 = args[0];
8698       STRIP_NOPS (arg0);
8699
8700       if (TREE_CODE (arg0) == VECTOR_CST)
8701         {
8702           tree inner_type = TREE_TYPE (rtype);
8703           tree elts = TREE_VECTOR_CST_ELTS (arg0);
8704           tree n_elts = NULL_TREE;
8705
8706           for (; elts; elts = TREE_CHAIN (elts))
8707             {
8708               unsigned int val = TREE_INT_CST_LOW (TREE_VALUE (elts)) << 4;
8709               n_elts = tree_cons (NULL_TREE,
8710                                   build_int_cst (inner_type, val),
8711                                   n_elts);
8712             }
8713           return build_vector (rtype, nreverse (n_elts));
8714         }
8715       break;
8716
8717     case CODE_FOR_fmul8x16_vis:
8718     case CODE_FOR_fmul8x16au_vis:
8719     case CODE_FOR_fmul8x16al_vis:
8720       arg0 = args[0];
8721       arg1 = args[1];
8722       STRIP_NOPS (arg0);
8723       STRIP_NOPS (arg1);
8724
8725       if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
8726         {
8727           tree inner_type = TREE_TYPE (rtype);
8728           tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
8729           tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
8730           tree n_elts = sparc_handle_vis_mul8x16 (icode, inner_type, elts0,
8731                                                   elts1);
8732
8733           return build_vector (rtype, n_elts);
8734         }
8735       break;
8736
8737     case CODE_FOR_fpmerge_vis:
8738       arg0 = args[0];
8739       arg1 = args[1];
8740       STRIP_NOPS (arg0);
8741       STRIP_NOPS (arg1);
8742
8743       if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
8744         {
8745           tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
8746           tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
8747           tree n_elts = NULL_TREE;
8748
8749           for (; elts0 && elts1;
8750                elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
8751             {
8752               n_elts = tree_cons (NULL_TREE, TREE_VALUE (elts0), n_elts);
8753               n_elts = tree_cons (NULL_TREE, TREE_VALUE (elts1), n_elts);
8754             }
8755
8756           return build_vector (rtype, nreverse (n_elts));
8757         }
8758       break;
8759
8760     case CODE_FOR_pdist_vis:
8761       arg0 = args[0];
8762       arg1 = args[1];
8763       arg2 = args[2];
8764       STRIP_NOPS (arg0);
8765       STRIP_NOPS (arg1);
8766       STRIP_NOPS (arg2);
8767
8768       if (TREE_CODE (arg0) == VECTOR_CST
8769           && TREE_CODE (arg1) == VECTOR_CST
8770           && TREE_CODE (arg2) == INTEGER_CST)
8771         {
8772           int overflow = 0;
8773           unsigned HOST_WIDE_INT low = TREE_INT_CST_LOW (arg2);
8774           HOST_WIDE_INT high = TREE_INT_CST_HIGH (arg2);
8775           tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
8776           tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
8777
8778           for (; elts0 && elts1;
8779                elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
8780             {
8781               unsigned HOST_WIDE_INT
8782                 low0 = TREE_INT_CST_LOW (TREE_VALUE (elts0)),
8783                 low1 = TREE_INT_CST_LOW (TREE_VALUE (elts1));
8784               HOST_WIDE_INT high0 = TREE_INT_CST_HIGH (TREE_VALUE (elts0));
8785               HOST_WIDE_INT high1 = TREE_INT_CST_HIGH (TREE_VALUE (elts1));
8786
8787               unsigned HOST_WIDE_INT l;
8788               HOST_WIDE_INT h;
8789
8790               overflow |= neg_double (low1, high1, &l, &h);
8791               overflow |= add_double (low0, high0, l, h, &l, &h);
8792               if (h < 0)
8793                 overflow |= neg_double (l, h, &l, &h);
8794
8795               overflow |= add_double (low, high, l, h, &low, &high);
8796             }
8797
8798           gcc_assert (overflow == 0);
8799
8800           return build_int_cst_wide (rtype, low, high);
8801         }
8802
8803     default:
8804       break;
8805     }
8806
8807   return NULL_TREE;
8808 }
8809 \f
8810 /* ??? This duplicates information provided to the compiler by the
8811    ??? scheduler description.  Some day, teach genautomata to output
8812    ??? the latencies and then CSE will just use that.  */
8813
8814 static bool
8815 sparc_rtx_costs (rtx x, int code, int outer_code, int *total,
8816                  bool speed ATTRIBUTE_UNUSED)
8817 {
8818   enum machine_mode mode = GET_MODE (x);
8819   bool float_mode_p = FLOAT_MODE_P (mode);
8820
8821   switch (code)
8822     {
8823     case CONST_INT:
8824       if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
8825         {
8826           *total = 0;
8827           return true;
8828         }
8829       /* FALLTHRU */
8830
8831     case HIGH:
8832       *total = 2;
8833       return true;
8834
8835     case CONST:
8836     case LABEL_REF:
8837     case SYMBOL_REF:
8838       *total = 4;
8839       return true;
8840
8841     case CONST_DOUBLE:
8842       if (GET_MODE (x) == VOIDmode
8843           && ((CONST_DOUBLE_HIGH (x) == 0
8844                && CONST_DOUBLE_LOW (x) < 0x1000)
8845               || (CONST_DOUBLE_HIGH (x) == -1
8846                   && CONST_DOUBLE_LOW (x) < 0
8847                   && CONST_DOUBLE_LOW (x) >= -0x1000)))
8848         *total = 0;
8849       else
8850         *total = 8;
8851       return true;
8852
8853     case MEM:
8854       /* If outer-code was a sign or zero extension, a cost
8855          of COSTS_N_INSNS (1) was already added in.  This is
8856          why we are subtracting it back out.  */
8857       if (outer_code == ZERO_EXTEND)
8858         {
8859           *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
8860         }
8861       else if (outer_code == SIGN_EXTEND)
8862         {
8863           *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
8864         }
8865       else if (float_mode_p)
8866         {
8867           *total = sparc_costs->float_load;
8868         }
8869       else
8870         {
8871           *total = sparc_costs->int_load;
8872         }
8873
8874       return true;
8875
8876     case PLUS:
8877     case MINUS:
8878       if (float_mode_p)
8879         *total = sparc_costs->float_plusminus;
8880       else
8881         *total = COSTS_N_INSNS (1);
8882       return false;
8883
8884     case MULT:
8885       if (float_mode_p)
8886         *total = sparc_costs->float_mul;
8887       else if (! TARGET_HARD_MUL)
8888         *total = COSTS_N_INSNS (25);
8889       else
8890         {
8891           int bit_cost;
8892
8893           bit_cost = 0;
8894           if (sparc_costs->int_mul_bit_factor)
8895             {
8896               int nbits;
8897
8898               if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8899                 {
8900                   unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
8901                   for (nbits = 0; value != 0; value &= value - 1)
8902                     nbits++;
8903                 }
8904               else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
8905                        && GET_MODE (XEXP (x, 1)) == VOIDmode)
8906                 {
8907                   rtx x1 = XEXP (x, 1);
8908                   unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
8909                   unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
8910
8911                   for (nbits = 0; value1 != 0; value1 &= value1 - 1)
8912                     nbits++;
8913                   for (; value2 != 0; value2 &= value2 - 1)
8914                     nbits++;
8915                 }
8916               else
8917                 nbits = 7;
8918
8919               if (nbits < 3)
8920                 nbits = 3;
8921               bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
8922               bit_cost = COSTS_N_INSNS (bit_cost);
8923             }
8924
8925           if (mode == DImode)
8926             *total = sparc_costs->int_mulX + bit_cost;
8927           else
8928             *total = sparc_costs->int_mul + bit_cost;
8929         }
8930       return false;
8931
8932     case ASHIFT:
8933     case ASHIFTRT:
8934     case LSHIFTRT:
8935       *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
8936       return false;
8937
8938     case DIV:
8939     case UDIV:
8940     case MOD:
8941     case UMOD:
8942       if (float_mode_p)
8943         {
8944           if (mode == DFmode)
8945             *total = sparc_costs->float_div_df;
8946           else
8947             *total = sparc_costs->float_div_sf;
8948         }
8949       else
8950         {
8951           if (mode == DImode)
8952             *total = sparc_costs->int_divX;
8953           else
8954             *total = sparc_costs->int_div;
8955         }
8956       return false;
8957
8958     case NEG:
8959       if (! float_mode_p)
8960         {
8961           *total = COSTS_N_INSNS (1);
8962           return false;
8963         }
8964       /* FALLTHRU */
8965
8966     case ABS:
8967     case FLOAT:
8968     case UNSIGNED_FLOAT:
8969     case FIX:
8970     case UNSIGNED_FIX:
8971     case FLOAT_EXTEND:
8972     case FLOAT_TRUNCATE:
8973       *total = sparc_costs->float_move;
8974       return false;
8975
8976     case SQRT:
8977       if (mode == DFmode)
8978         *total = sparc_costs->float_sqrt_df;
8979       else
8980         *total = sparc_costs->float_sqrt_sf;
8981       return false;
8982
8983     case COMPARE:
8984       if (float_mode_p)
8985         *total = sparc_costs->float_cmp;
8986       else
8987         *total = COSTS_N_INSNS (1);
8988       return false;
8989
8990     case IF_THEN_ELSE:
8991       if (float_mode_p)
8992         *total = sparc_costs->float_cmove;
8993       else
8994         *total = sparc_costs->int_cmove;
8995       return false;
8996
8997     case IOR:
8998       /* Handle the NAND vector patterns.  */
8999       if (sparc_vector_mode_supported_p (GET_MODE (x))
9000           && GET_CODE (XEXP (x, 0)) == NOT
9001           && GET_CODE (XEXP (x, 1)) == NOT)
9002         {
9003           *total = COSTS_N_INSNS (1);
9004           return true;
9005         }
9006       else
9007         return false;
9008
9009     default:
9010       return false;
9011     }
9012 }
9013
9014 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
9015    This is achieved by means of a manual dynamic stack space allocation in
9016    the current frame.  We make the assumption that SEQ doesn't contain any
9017    function calls, with the possible exception of calls to the PIC helper.  */
9018
9019 static void
9020 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
9021 {
9022   /* We must preserve the lowest 16 words for the register save area.  */
9023   HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
9024   /* We really need only 2 words of fresh stack space.  */
9025   HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
9026
9027   rtx slot
9028     = gen_rtx_MEM (word_mode, plus_constant (stack_pointer_rtx,
9029                                              SPARC_STACK_BIAS + offset));
9030
9031   emit_insn (gen_stack_pointer_dec (GEN_INT (size)));
9032   emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
9033   if (reg2)
9034     emit_insn (gen_rtx_SET (VOIDmode,
9035                             adjust_address (slot, word_mode, UNITS_PER_WORD),
9036                             reg2));
9037   emit_insn (seq);
9038   if (reg2)
9039     emit_insn (gen_rtx_SET (VOIDmode,
9040                             reg2,
9041                             adjust_address (slot, word_mode, UNITS_PER_WORD)));
9042   emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
9043   emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
9044 }
9045
9046 /* Output the assembler code for a thunk function.  THUNK_DECL is the
9047    declaration for the thunk function itself, FUNCTION is the decl for
9048    the target function.  DELTA is an immediate constant offset to be
9049    added to THIS.  If VCALL_OFFSET is nonzero, the word at address
9050    (*THIS + VCALL_OFFSET) should be additionally added to THIS.  */
9051
9052 static void
9053 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9054                        HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9055                        tree function)
9056 {
9057   rtx this_rtx, insn, funexp;
9058   unsigned int int_arg_first;
9059
9060   reload_completed = 1;
9061   epilogue_completed = 1;
9062
9063   emit_note (NOTE_INSN_PROLOGUE_END);
9064
9065   if (flag_delayed_branch)
9066     {
9067       /* We will emit a regular sibcall below, so we need to instruct
9068          output_sibcall that we are in a leaf function.  */
9069       sparc_leaf_function_p = current_function_uses_only_leaf_regs = 1;
9070
9071       /* This will cause final.c to invoke leaf_renumber_regs so we
9072          must behave as if we were in a not-yet-leafified function.  */
9073       int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
9074     }
9075   else
9076     {
9077       /* We will emit the sibcall manually below, so we will need to
9078          manually spill non-leaf registers.  */
9079       sparc_leaf_function_p = current_function_uses_only_leaf_regs = 0;
9080
9081       /* We really are in a leaf function.  */
9082       int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
9083     }
9084
9085   /* Find the "this" pointer.  Normally in %o0, but in ARCH64 if the function
9086      returns a structure, the structure return pointer is there instead.  */
9087   if (TARGET_ARCH64
9088       && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9089     this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
9090   else
9091     this_rtx = gen_rtx_REG (Pmode, int_arg_first);
9092
9093   /* Add DELTA.  When possible use a plain add, otherwise load it into
9094      a register first.  */
9095   if (delta)
9096     {
9097       rtx delta_rtx = GEN_INT (delta);
9098
9099       if (! SPARC_SIMM13_P (delta))
9100         {
9101           rtx scratch = gen_rtx_REG (Pmode, 1);
9102           emit_move_insn (scratch, delta_rtx);
9103           delta_rtx = scratch;
9104         }
9105
9106       /* THIS_RTX += DELTA.  */
9107       emit_insn (gen_add2_insn (this_rtx, delta_rtx));
9108     }
9109
9110   /* Add the word at address (*THIS_RTX + VCALL_OFFSET).  */
9111   if (vcall_offset)
9112     {
9113       rtx vcall_offset_rtx = GEN_INT (vcall_offset);
9114       rtx scratch = gen_rtx_REG (Pmode, 1);
9115
9116       gcc_assert (vcall_offset < 0);
9117
9118       /* SCRATCH = *THIS_RTX.  */
9119       emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
9120
9121       /* Prepare for adding VCALL_OFFSET.  The difficulty is that we
9122          may not have any available scratch register at this point.  */
9123       if (SPARC_SIMM13_P (vcall_offset))
9124         ;
9125       /* This is the case if ARCH64 (unless -ffixed-g5 is passed).  */
9126       else if (! fixed_regs[5]
9127                /* The below sequence is made up of at least 2 insns,
9128                   while the default method may need only one.  */
9129                && vcall_offset < -8192)
9130         {
9131           rtx scratch2 = gen_rtx_REG (Pmode, 5);
9132           emit_move_insn (scratch2, vcall_offset_rtx);
9133           vcall_offset_rtx = scratch2;
9134         }
9135       else
9136         {
9137           rtx increment = GEN_INT (-4096);
9138
9139           /* VCALL_OFFSET is a negative number whose typical range can be
9140              estimated as -32768..0 in 32-bit mode.  In almost all cases
9141              it is therefore cheaper to emit multiple add insns than
9142              spilling and loading the constant into a register (at least
9143              6 insns).  */
9144           while (! SPARC_SIMM13_P (vcall_offset))
9145             {
9146               emit_insn (gen_add2_insn (scratch, increment));
9147               vcall_offset += 4096;
9148             }
9149           vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
9150         }
9151
9152       /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET).  */
9153       emit_move_insn (scratch, gen_rtx_MEM (Pmode,
9154                                             gen_rtx_PLUS (Pmode,
9155                                                           scratch,
9156                                                           vcall_offset_rtx)));
9157
9158       /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET).  */
9159       emit_insn (gen_add2_insn (this_rtx, scratch));
9160     }
9161
9162   /* Generate a tail call to the target function.  */
9163   if (! TREE_USED (function))
9164     {
9165       assemble_external (function);
9166       TREE_USED (function) = 1;
9167     }
9168   funexp = XEXP (DECL_RTL (function), 0);
9169
9170   if (flag_delayed_branch)
9171     {
9172       funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
9173       insn = emit_call_insn (gen_sibcall (funexp));
9174       SIBLING_CALL_P (insn) = 1;
9175     }
9176   else
9177     {
9178       /* The hoops we have to jump through in order to generate a sibcall
9179          without using delay slots...  */
9180       rtx spill_reg, spill_reg2, seq, scratch = gen_rtx_REG (Pmode, 1);
9181
9182       if (flag_pic)
9183         {
9184           spill_reg = gen_rtx_REG (word_mode, 15);  /* %o7 */
9185           spill_reg2 = gen_rtx_REG (word_mode, PIC_OFFSET_TABLE_REGNUM);
9186           start_sequence ();
9187           /* Delay emitting the PIC helper function because it needs to
9188              change the section and we are emitting assembly code.  */
9189           load_pic_register ();  /* clobbers %o7 */
9190           scratch = sparc_legitimize_pic_address (funexp, scratch);
9191           seq = get_insns ();
9192           end_sequence ();
9193           emit_and_preserve (seq, spill_reg, spill_reg2);
9194         }
9195       else if (TARGET_ARCH32)
9196         {
9197           emit_insn (gen_rtx_SET (VOIDmode,
9198                                   scratch,
9199                                   gen_rtx_HIGH (SImode, funexp)));
9200           emit_insn (gen_rtx_SET (VOIDmode,
9201                                   scratch,
9202                                   gen_rtx_LO_SUM (SImode, scratch, funexp)));
9203         }
9204       else  /* TARGET_ARCH64 */
9205         {
9206           switch (sparc_cmodel)
9207             {
9208             case CM_MEDLOW:
9209             case CM_MEDMID:
9210               /* The destination can serve as a temporary.  */
9211               sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
9212               break;
9213
9214             case CM_MEDANY:
9215             case CM_EMBMEDANY:
9216               /* The destination cannot serve as a temporary.  */
9217               spill_reg = gen_rtx_REG (DImode, 15);  /* %o7 */
9218               start_sequence ();
9219               sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
9220               seq = get_insns ();
9221               end_sequence ();
9222               emit_and_preserve (seq, spill_reg, 0);
9223               break;
9224
9225             default:
9226               gcc_unreachable ();
9227             }
9228         }
9229
9230       emit_jump_insn (gen_indirect_jump (scratch));
9231     }
9232
9233   emit_barrier ();
9234
9235   /* Run just enough of rest_of_compilation to get the insns emitted.
9236      There's not really enough bulk here to make other passes such as
9237      instruction scheduling worth while.  Note that use_thunk calls
9238      assemble_start_function and assemble_end_function.  */
9239   insn = get_insns ();
9240   insn_locators_alloc ();
9241   shorten_branches (insn);
9242   final_start_function (insn, file, 1);
9243   final (insn, file, 1);
9244   final_end_function ();
9245
9246   reload_completed = 0;
9247   epilogue_completed = 0;
9248 }
9249
9250 /* Return true if sparc_output_mi_thunk would be able to output the
9251    assembler code for the thunk function specified by the arguments
9252    it is passed, and false otherwise.  */
9253 static bool
9254 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
9255                            HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
9256                            HOST_WIDE_INT vcall_offset,
9257                            const_tree function ATTRIBUTE_UNUSED)
9258 {
9259   /* Bound the loop used in the default method above.  */
9260   return (vcall_offset >= -32768 || ! fixed_regs[5]);
9261 }
9262
9263 /* How to allocate a 'struct machine_function'.  */
9264
9265 static struct machine_function *
9266 sparc_init_machine_status (void)
9267 {
9268   return ggc_alloc_cleared_machine_function ();
9269 }
9270
9271 /* Locate some local-dynamic symbol still in use by this function
9272    so that we can print its name in local-dynamic base patterns.  */
9273
9274 static const char *
9275 get_some_local_dynamic_name (void)
9276 {
9277   rtx insn;
9278
9279   if (cfun->machine->some_ld_name)
9280     return cfun->machine->some_ld_name;
9281
9282   for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
9283     if (INSN_P (insn)
9284         && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
9285       return cfun->machine->some_ld_name;
9286
9287   gcc_unreachable ();
9288 }
9289
9290 static int
9291 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
9292 {
9293   rtx x = *px;
9294
9295   if (x
9296       && GET_CODE (x) == SYMBOL_REF
9297       && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
9298     {
9299       cfun->machine->some_ld_name = XSTR (x, 0);
9300       return 1;
9301     }
9302
9303   return 0;
9304 }
9305
9306 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
9307    This is called from dwarf2out.c to emit call frame instructions
9308    for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
9309 static void
9310 sparc_dwarf_handle_frame_unspec (const char *label,
9311                                  rtx pattern ATTRIBUTE_UNUSED,
9312                                  int index ATTRIBUTE_UNUSED)
9313 {
9314   gcc_assert (index == UNSPECV_SAVEW);
9315   dwarf2out_window_save (label);
9316 }
9317
9318 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9319    We need to emit DTP-relative relocations.  */
9320
9321 static void
9322 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
9323 {
9324   switch (size)
9325     {
9326     case 4:
9327       fputs ("\t.word\t%r_tls_dtpoff32(", file);
9328       break;
9329     case 8:
9330       fputs ("\t.xword\t%r_tls_dtpoff64(", file);
9331       break;
9332     default:
9333       gcc_unreachable ();
9334     }
9335   output_addr_const (file, x);
9336   fputs (")", file);
9337 }
9338
9339 /* Do whatever processing is required at the end of a file.  */
9340
9341 static void
9342 sparc_file_end (void)
9343 {
9344   /* If need to emit the special PIC helper function, do so now.  */
9345   if (pic_helper_needed)
9346     {
9347       unsigned int regno = REGNO (pic_offset_table_rtx);
9348       const char *pic_name = reg_names[regno];
9349       char name[32];
9350 #ifdef DWARF2_UNWIND_INFO
9351       bool do_cfi;
9352 #endif
9353
9354       get_pc_thunk_name (name, regno);
9355       if (USE_HIDDEN_LINKONCE)
9356         {
9357           tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9358                                   get_identifier (name),
9359                                   build_function_type (void_type_node,
9360                                                        void_list_node));
9361           DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9362                                            NULL_TREE, void_type_node);
9363           TREE_STATIC (decl) = 1;
9364           make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
9365           DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
9366           DECL_VISIBILITY_SPECIFIED (decl) = 1;
9367           allocate_struct_function (decl, true);
9368           current_function_decl = decl;
9369           init_varasm_status ();
9370           assemble_start_function (decl, name);
9371         }
9372       else
9373         {
9374           const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9375           switch_to_section (text_section);
9376           if (align > 0)
9377             ASM_OUTPUT_ALIGN (asm_out_file, align);
9378           ASM_OUTPUT_LABEL (asm_out_file, name);
9379         }
9380
9381 #ifdef DWARF2_UNWIND_INFO
9382       do_cfi = dwarf2out_do_cfi_asm ();
9383       if (do_cfi)
9384         fprintf (asm_out_file, "\t.cfi_startproc\n");
9385 #endif
9386       if (flag_delayed_branch)
9387         fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
9388                  pic_name, pic_name);
9389       else
9390         fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
9391                  pic_name, pic_name);
9392 #ifdef DWARF2_UNWIND_INFO
9393       if (do_cfi)
9394         fprintf (asm_out_file, "\t.cfi_endproc\n");
9395 #endif
9396     }
9397
9398   if (NEED_INDICATE_EXEC_STACK)
9399     file_end_indicate_exec_stack ();
9400 }
9401
9402 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
9403 /* Implement TARGET_MANGLE_TYPE.  */
9404
9405 static const char *
9406 sparc_mangle_type (const_tree type)
9407 {
9408   if (!TARGET_64BIT
9409       && TYPE_MAIN_VARIANT (type) == long_double_type_node
9410       && TARGET_LONG_DOUBLE_128)
9411     return "g";
9412
9413   /* For all other types, use normal C++ mangling.  */
9414   return NULL;
9415 }
9416 #endif
9417
9418 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
9419    compare and swap on the word containing the byte or half-word.  */
9420
9421 void
9422 sparc_expand_compare_and_swap_12 (rtx result, rtx mem, rtx oldval, rtx newval)
9423 {
9424   rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
9425   rtx addr = gen_reg_rtx (Pmode);
9426   rtx off = gen_reg_rtx (SImode);
9427   rtx oldv = gen_reg_rtx (SImode);
9428   rtx newv = gen_reg_rtx (SImode);
9429   rtx oldvalue = gen_reg_rtx (SImode);
9430   rtx newvalue = gen_reg_rtx (SImode);
9431   rtx res = gen_reg_rtx (SImode);
9432   rtx resv = gen_reg_rtx (SImode);
9433   rtx memsi, val, mask, end_label, loop_label, cc;
9434
9435   emit_insn (gen_rtx_SET (VOIDmode, addr,
9436                           gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
9437
9438   if (Pmode != SImode)
9439     addr1 = gen_lowpart (SImode, addr1);
9440   emit_insn (gen_rtx_SET (VOIDmode, off,
9441                           gen_rtx_AND (SImode, addr1, GEN_INT (3))));
9442
9443   memsi = gen_rtx_MEM (SImode, addr);
9444   set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
9445   MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
9446
9447   val = force_reg (SImode, memsi);
9448
9449   emit_insn (gen_rtx_SET (VOIDmode, off,
9450                           gen_rtx_XOR (SImode, off,
9451                                        GEN_INT (GET_MODE (mem) == QImode
9452                                                 ? 3 : 2))));
9453
9454   emit_insn (gen_rtx_SET (VOIDmode, off,
9455                           gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
9456
9457   if (GET_MODE (mem) == QImode)
9458     mask = force_reg (SImode, GEN_INT (0xff));
9459   else
9460     mask = force_reg (SImode, GEN_INT (0xffff));
9461
9462   emit_insn (gen_rtx_SET (VOIDmode, mask,
9463                           gen_rtx_ASHIFT (SImode, mask, off)));
9464
9465   emit_insn (gen_rtx_SET (VOIDmode, val,
9466                           gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
9467                                        val)));
9468
9469   oldval = gen_lowpart (SImode, oldval);
9470   emit_insn (gen_rtx_SET (VOIDmode, oldv,
9471                           gen_rtx_ASHIFT (SImode, oldval, off)));
9472
9473   newval = gen_lowpart_common (SImode, newval);
9474   emit_insn (gen_rtx_SET (VOIDmode, newv,
9475                           gen_rtx_ASHIFT (SImode, newval, off)));
9476
9477   emit_insn (gen_rtx_SET (VOIDmode, oldv,
9478                           gen_rtx_AND (SImode, oldv, mask)));
9479
9480   emit_insn (gen_rtx_SET (VOIDmode, newv,
9481                           gen_rtx_AND (SImode, newv, mask)));
9482
9483   end_label = gen_label_rtx ();
9484   loop_label = gen_label_rtx ();
9485   emit_label (loop_label);
9486
9487   emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
9488                           gen_rtx_IOR (SImode, oldv, val)));
9489
9490   emit_insn (gen_rtx_SET (VOIDmode, newvalue,
9491                           gen_rtx_IOR (SImode, newv, val)));
9492
9493   emit_insn (gen_sync_compare_and_swapsi (res, memsi, oldvalue, newvalue));
9494
9495   emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
9496
9497   emit_insn (gen_rtx_SET (VOIDmode, resv,
9498                           gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
9499                                        res)));
9500
9501   cc = gen_compare_reg_1 (NE, resv, val);
9502   emit_insn (gen_rtx_SET (VOIDmode, val, resv));
9503
9504   /* Use cbranchcc4 to separate the compare and branch!  */
9505   emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
9506                                   cc, const0_rtx, loop_label));
9507
9508   emit_label (end_label);
9509
9510   emit_insn (gen_rtx_SET (VOIDmode, res,
9511                           gen_rtx_AND (SImode, res, mask)));
9512
9513   emit_insn (gen_rtx_SET (VOIDmode, res,
9514                           gen_rtx_LSHIFTRT (SImode, res, off)));
9515
9516   emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
9517 }
9518
9519 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
9520
9521 bool
9522 sparc_frame_pointer_required (void)
9523 {
9524   return !(leaf_function_p () && only_leaf_regs_used ());
9525 }
9526
9527 /* The way this is structured, we can't eliminate SFP in favor of SP
9528    if the frame pointer is required: we want to use the SFP->HFP elimination
9529    in that case.  But the test in update_eliminables doesn't know we are
9530    assuming below that we only do the former elimination.  */
9531
9532 bool
9533 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
9534 {
9535   return (to == HARD_FRAME_POINTER_REGNUM
9536           || !targetm.frame_pointer_required ());
9537 }
9538
9539 #include "gt-sparc.h"