gcc/config/sparc/sparc.c

   1 /* Subroutines for insn-output.c for SPARC.
   2    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   3    1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   4    Contributed by Michael Tiemann (tiemann@cygnus.com)
   5    64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
   6    at Cygnus Support.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify
  11 it under the terms of the GNU General Public License as published by
  12 the Free Software Foundation; either version 2, or (at your option)
  13 any later version.
  14
  15 GCC is distributed in the hope that it will be useful,
  16 but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 GNU General Public License for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING.  If not, write to
  22 the Free Software Foundation, 59 Temple Place - Suite 330,
  23 Boston, MA 02111-1307, USA.  */
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "tree.h"
  30 #include "rtl.h"
  31 #include "regs.h"
  32 #include "hard-reg-set.h"
  33 #include "real.h"
  34 #include "insn-config.h"
  35 #include "insn-codes.h"
  36 #include "conditions.h"
  37 #include "output.h"
  38 #include "insn-attr.h"
  39 #include "flags.h"
  40 #include "function.h"
  41 #include "expr.h"
  42 #include "optabs.h"
  43 #include "recog.h"
  44 #include "toplev.h"
  45 #include "ggc.h"
  46 #include "tm_p.h"
  47 #include "debug.h"
  48 #include "target.h"
  49 #include "target-def.h"
  50 #include "cfglayout.h"
  51 #include "tree-gimple.h"
  52 #include "langhooks.h"
  53
  54 /* Processor costs */
  55 static const
  56 struct processor_costs cypress_costs = {
  57   COSTS_N_INSNS (2), /* int load */
  58   COSTS_N_INSNS (2), /* int signed load */
  59   COSTS_N_INSNS (2), /* int zeroed load */
  60   COSTS_N_INSNS (2), /* float load */
  61   COSTS_N_INSNS (5), /* fmov, fneg, fabs */
  62   COSTS_N_INSNS (5), /* fadd, fsub */
  63   COSTS_N_INSNS (1), /* fcmp */
  64   COSTS_N_INSNS (1), /* fmov, fmovr */
  65   COSTS_N_INSNS (7), /* fmul */
  66   COSTS_N_INSNS (37), /* fdivs */
  67   COSTS_N_INSNS (37), /* fdivd */
  68   COSTS_N_INSNS (63), /* fsqrts */
  69   COSTS_N_INSNS (63), /* fsqrtd */
  70   COSTS_N_INSNS (1), /* imul */
  71   COSTS_N_INSNS (1), /* imulX */
  72   0, /* imul bit factor */
  73   COSTS_N_INSNS (1), /* idiv */
  74   COSTS_N_INSNS (1), /* idivX */
  75   COSTS_N_INSNS (1), /* movcc/movr */
  76   0, /* shift penalty */
  77 };
  78
  79 static const
  80 struct processor_costs supersparc_costs = {
  81   COSTS_N_INSNS (1), /* int load */
  82   COSTS_N_INSNS (1), /* int signed load */
  83   COSTS_N_INSNS (1), /* int zeroed load */
  84   COSTS_N_INSNS (0), /* float load */
  85   COSTS_N_INSNS (3), /* fmov, fneg, fabs */
  86   COSTS_N_INSNS (3), /* fadd, fsub */
  87   COSTS_N_INSNS (3), /* fcmp */
  88   COSTS_N_INSNS (1), /* fmov, fmovr */
  89   COSTS_N_INSNS (3), /* fmul */
  90   COSTS_N_INSNS (6), /* fdivs */
  91   COSTS_N_INSNS (9), /* fdivd */
  92   COSTS_N_INSNS (12), /* fsqrts */
  93   COSTS_N_INSNS (12), /* fsqrtd */
  94   COSTS_N_INSNS (4), /* imul */
  95   COSTS_N_INSNS (4), /* imulX */
  96   0, /* imul bit factor */
  97   COSTS_N_INSNS (4), /* idiv */
  98   COSTS_N_INSNS (4), /* idivX */
  99   COSTS_N_INSNS (1), /* movcc/movr */
 100   1, /* shift penalty */
 101 };
 102
 103 static const
 104 struct processor_costs hypersparc_costs = {
 105   COSTS_N_INSNS (1), /* int load */
 106   COSTS_N_INSNS (1), /* int signed load */
 107   COSTS_N_INSNS (1), /* int zeroed load */
 108   COSTS_N_INSNS (1), /* float load */
 109   COSTS_N_INSNS (1), /* fmov, fneg, fabs */
 110   COSTS_N_INSNS (1), /* fadd, fsub */
 111   COSTS_N_INSNS (1), /* fcmp */
 112   COSTS_N_INSNS (1), /* fmov, fmovr */
 113   COSTS_N_INSNS (1), /* fmul */
 114   COSTS_N_INSNS (8), /* fdivs */
 115   COSTS_N_INSNS (12), /* fdivd */
 116   COSTS_N_INSNS (17), /* fsqrts */
 117   COSTS_N_INSNS (17), /* fsqrtd */
 118   COSTS_N_INSNS (17), /* imul */
 119   COSTS_N_INSNS (17), /* imulX */
 120   0, /* imul bit factor */
 121   COSTS_N_INSNS (17), /* idiv */
 122   COSTS_N_INSNS (17), /* idivX */
 123   COSTS_N_INSNS (1), /* movcc/movr */
 124   0, /* shift penalty */
 125 };
 126
 127 static const
 128 struct processor_costs sparclet_costs = {
 129   COSTS_N_INSNS (3), /* int load */
 130   COSTS_N_INSNS (3), /* int signed load */
 131   COSTS_N_INSNS (1), /* int zeroed load */
 132   COSTS_N_INSNS (1), /* float load */
 133   COSTS_N_INSNS (1), /* fmov, fneg, fabs */
 134   COSTS_N_INSNS (1), /* fadd, fsub */
 135   COSTS_N_INSNS (1), /* fcmp */
 136   COSTS_N_INSNS (1), /* fmov, fmovr */
 137   COSTS_N_INSNS (1), /* fmul */
 138   COSTS_N_INSNS (1), /* fdivs */
 139   COSTS_N_INSNS (1), /* fdivd */
 140   COSTS_N_INSNS (1), /* fsqrts */
 141   COSTS_N_INSNS (1), /* fsqrtd */
 142   COSTS_N_INSNS (5), /* imul */
 143   COSTS_N_INSNS (5), /* imulX */
 144   0, /* imul bit factor */
 145   COSTS_N_INSNS (5), /* idiv */
 146   COSTS_N_INSNS (5), /* idivX */
 147   COSTS_N_INSNS (1), /* movcc/movr */
 148   0, /* shift penalty */
 149 };
 150
 151 static const
 152 struct processor_costs ultrasparc_costs = {
 153   COSTS_N_INSNS (2), /* int load */
 154   COSTS_N_INSNS (3), /* int signed load */
 155   COSTS_N_INSNS (2), /* int zeroed load */
 156   COSTS_N_INSNS (2), /* float load */
 157   COSTS_N_INSNS (1), /* fmov, fneg, fabs */
 158   COSTS_N_INSNS (4), /* fadd, fsub */
 159   COSTS_N_INSNS (1), /* fcmp */
 160   COSTS_N_INSNS (2), /* fmov, fmovr */
 161   COSTS_N_INSNS (4), /* fmul */
 162   COSTS_N_INSNS (13), /* fdivs */
 163   COSTS_N_INSNS (23), /* fdivd */
 164   COSTS_N_INSNS (13), /* fsqrts */
 165   COSTS_N_INSNS (23), /* fsqrtd */
 166   COSTS_N_INSNS (4), /* imul */
 167   COSTS_N_INSNS (4), /* imulX */
 168   2, /* imul bit factor */
 169   COSTS_N_INSNS (37), /* idiv */
 170   COSTS_N_INSNS (68), /* idivX */
 171   COSTS_N_INSNS (2), /* movcc/movr */
 172   2, /* shift penalty */
 173 };
 174
 175 static const
 176 struct processor_costs ultrasparc3_costs = {
 177   COSTS_N_INSNS (2), /* int load */
 178   COSTS_N_INSNS (3), /* int signed load */
 179   COSTS_N_INSNS (3), /* int zeroed load */
 180   COSTS_N_INSNS (2), /* float load */
 181   COSTS_N_INSNS (3), /* fmov, fneg, fabs */
 182   COSTS_N_INSNS (4), /* fadd, fsub */
 183   COSTS_N_INSNS (5), /* fcmp */
 184   COSTS_N_INSNS (3), /* fmov, fmovr */
 185   COSTS_N_INSNS (4), /* fmul */
 186   COSTS_N_INSNS (17), /* fdivs */
 187   COSTS_N_INSNS (20), /* fdivd */
 188   COSTS_N_INSNS (20), /* fsqrts */
 189   COSTS_N_INSNS (29), /* fsqrtd */
 190   COSTS_N_INSNS (6), /* imul */
 191   COSTS_N_INSNS (6), /* imulX */
 192   0, /* imul bit factor */
 193   COSTS_N_INSNS (40), /* idiv */
 194   COSTS_N_INSNS (71), /* idivX */
 195   COSTS_N_INSNS (2), /* movcc/movr */
 196   0, /* shift penalty */
 197 };
 198
 199 const struct processor_costs *sparc_costs = &cypress_costs;
 200
 201 #ifdef HAVE_AS_RELAX_OPTION
 202 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
 203    "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
 204    With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
 205    somebody does not branch between the sethi and jmp.  */
 206 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
 207 #else
 208 #define LEAF_SIBCALL_SLOT_RESERVED_P \
 209   ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
 210 #endif
 211
 212 /* Global variables for machine-dependent things.  */
 213
 214 /* Size of frame.  Need to know this to emit return insns from leaf procedures.
 215    ACTUAL_FSIZE is set by sparc_compute_frame_size() which is called during the
 216    reload pass.  This is important as the value is later used for scheduling
 217    (to see what can go in a delay slot).
 218    APPARENT_FSIZE is the size of the stack less the register save area and less
 219    the outgoing argument area.  It is used when saving call preserved regs.  */
 220 static HOST_WIDE_INT apparent_fsize;
 221 static HOST_WIDE_INT actual_fsize;
 222
 223 /* Number of live general or floating point registers needed to be
 224    saved (as 4-byte quantities).  */
 225 static int num_gfregs;
 226
 227 /* The alias set for prologue/epilogue register save/restore.  */
 228 static GTY(()) int sparc_sr_alias_set;
 229
 230 /* The alias set for the structure return value.  */
 231 static GTY(()) int struct_value_alias_set;
 232
 233 /* Save the operands last given to a compare for use when we
 234    generate a scc or bcc insn.  */
 235 rtx sparc_compare_op0, sparc_compare_op1;
 236
 237 /* Vector to say how input registers are mapped to output registers.
 238    HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
 239    eliminate it.  You must use -fomit-frame-pointer to get that.  */
 240 char leaf_reg_remap[] =
 241 { 0, 1, 2, 3, 4, 5, 6, 7,
 242   -1, -1, -1, -1, -1, -1, 14, -1,
 243   -1, -1, -1, -1, -1, -1, -1, -1,
 244   8, 9, 10, 11, 12, 13, -1, 15,
 245
 246   32, 33, 34, 35, 36, 37, 38, 39,
 247   40, 41, 42, 43, 44, 45, 46, 47,
 248   48, 49, 50, 51, 52, 53, 54, 55,
 249   56, 57, 58, 59, 60, 61, 62, 63,
 250   64, 65, 66, 67, 68, 69, 70, 71,
 251   72, 73, 74, 75, 76, 77, 78, 79,
 252   80, 81, 82, 83, 84, 85, 86, 87,
 253   88, 89, 90, 91, 92, 93, 94, 95,
 254   96, 97, 98, 99, 100};
 255
 256 /* Vector, indexed by hard register number, which contains 1
 257    for a register that is allowable in a candidate for leaf
 258    function treatment.  */
 259 char sparc_leaf_regs[] =
 260 { 1, 1, 1, 1, 1, 1, 1, 1,
 261   0, 0, 0, 0, 0, 0, 1, 0,
 262   0, 0, 0, 0, 0, 0, 0, 0,
 263   1, 1, 1, 1, 1, 1, 0, 1,
 264   1, 1, 1, 1, 1, 1, 1, 1,
 265   1, 1, 1, 1, 1, 1, 1, 1,
 266   1, 1, 1, 1, 1, 1, 1, 1,
 267   1, 1, 1, 1, 1, 1, 1, 1,
 268   1, 1, 1, 1, 1, 1, 1, 1,
 269   1, 1, 1, 1, 1, 1, 1, 1,
 270   1, 1, 1, 1, 1, 1, 1, 1,
 271   1, 1, 1, 1, 1, 1, 1, 1,
 272   1, 1, 1, 1, 1};
 273
 274 struct machine_function GTY(())
 275 {
 276   /* Some local-dynamic TLS symbol name.  */
 277   const char *some_ld_name;
 278
 279   /* True if the current function is leaf and uses only leaf regs,
 280      so that the SPARC leaf function optimization can be applied.
 281      Private version of current_function_uses_only_leaf_regs, see
 282      sparc_expand_prologue for the rationale.  */
 283   int leaf_function_p;
 284
 285   /* True if the data calculated by sparc_expand_prologue are valid.  */
 286   bool prologue_data_valid_p;
 287 };
 288
 289 #define sparc_leaf_function_p  cfun->machine->leaf_function_p
 290 #define sparc_prologue_data_valid_p  cfun->machine->prologue_data_valid_p
 291
 292 /* Register we pretend to think the frame pointer is allocated to.
 293    Normally, this is %fp, but if we are in a leaf procedure, this
 294    is %sp+"something".  We record "something" separately as it may
 295    be too big for reg+constant addressing.  */
 296 static rtx frame_base_reg;
 297 static HOST_WIDE_INT frame_base_offset;
 298
 299 /* 1 if the next opcode is to be specially indented.  */
 300 int sparc_indent_opcode = 0;
 301
 302 static bool sparc_handle_option (size_t, const char *, int);
 303 static void sparc_init_modes (void);
 304 static void scan_record_type (tree, int *, int *, int *);
 305 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
 306                                 tree, int, int, int *, int *);
 307
 308 static int supersparc_adjust_cost (rtx, rtx, rtx, int);
 309 static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
 310
 311 static void sparc_output_addr_vec (rtx);
 312 static void sparc_output_addr_diff_vec (rtx);
 313 static void sparc_output_deferred_case_vectors (void);
 314 static rtx sparc_builtin_saveregs (void);
 315 static int epilogue_renumber (rtx *, int);
 316 static bool sparc_assemble_integer (rtx, unsigned int, int);
 317 static int set_extends (rtx);
 318 static void emit_pic_helper (void);
 319 static void load_pic_register (bool);
 320 static int save_or_restore_regs (int, int, rtx, int, int);
 321 static void emit_save_regs (void);
 322 static void emit_restore_regs (void);
 323 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
 324 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
 325 #ifdef OBJECT_FORMAT_ELF
 326 static void sparc_elf_asm_named_section (const char *, unsigned int, tree);
 327 #endif
 328
 329 static int sparc_adjust_cost (rtx, rtx, rtx, int);
 330 static int sparc_issue_rate (void);
 331 static void sparc_sched_init (FILE *, int, int);
 332 static int sparc_use_sched_lookahead (void);
 333
 334 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
 335 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
 336 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
 337 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
 338 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
 339
 340 static bool sparc_function_ok_for_sibcall (tree, tree);
 341 static void sparc_init_libfuncs (void);
 342 static void sparc_init_builtins (void);
 343 static void sparc_vis_init_builtins (void);
 344 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
 345 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
 346                                    HOST_WIDE_INT, tree);
 347 static bool sparc_can_output_mi_thunk (tree, HOST_WIDE_INT,
 348                                        HOST_WIDE_INT, tree);
 349 static struct machine_function * sparc_init_machine_status (void);
 350 static bool sparc_cannot_force_const_mem (rtx);
 351 static rtx sparc_tls_get_addr (void);
 352 static rtx sparc_tls_got (void);
 353 static const char *get_some_local_dynamic_name (void);
 354 static int get_some_local_dynamic_name_1 (rtx *, void *);
 355 static bool sparc_rtx_costs (rtx, int, int, int *);
 356 static bool sparc_promote_prototypes (tree);
 357 static rtx sparc_struct_value_rtx (tree, int);
 358 static bool sparc_return_in_memory (tree, tree);
 359 static bool sparc_strict_argument_naming (CUMULATIVE_ARGS *);
 360 static tree sparc_gimplify_va_arg (tree, tree, tree *, tree *);
 361 static bool sparc_vector_mode_supported_p (enum machine_mode);
 362 static bool sparc_pass_by_reference (CUMULATIVE_ARGS *,
 363                                      enum machine_mode, tree, bool);
 364 static int sparc_arg_partial_bytes (CUMULATIVE_ARGS *,
 365                                     enum machine_mode, tree, bool);
 366 static void sparc_dwarf_handle_frame_unspec (const char *, rtx, int);
 367 static void sparc_file_end (void);
 368 #ifdef SUBTARGET_ATTRIBUTE_TABLE
 369 const struct attribute_spec sparc_attribute_table[];
 370 #endif
 371 \f
 372 /* Option handling.  */
 373
 374 /* Code model option as passed by user.  */
 375 const char *sparc_cmodel_string;
 376
 377 /* Parsed value.  */
 378 enum cmodel sparc_cmodel;
 379
 380 char sparc_hard_reg_printed[8];
 381
 382 struct sparc_cpu_select sparc_select[] =
 383 {
 384   /* switch     name,           tune    arch */
 385   { (char *)0,  "default",      1,      1 },
 386   { (char *)0,  "-mcpu=",       1,      1 },
 387   { (char *)0,  "-mtune=",      1,      0 },
 388   { 0, 0, 0, 0 }
 389 };
 390
 391 /* CPU type.  This is set from TARGET_CPU_DEFAULT and -m{cpu,tune}=xxx.  */
 392 enum processor_type sparc_cpu;
 393
 394 /* Whether\fan FPU option was specified.  */
 395 static bool fpu_option_set = false;
 396
 397 /* Initialize the GCC target structure.  */
 398
 399 /* The sparc default is to use .half rather than .short for aligned
 400    HI objects.  Use .word instead of .long on non-ELF systems.  */
 401 #undef TARGET_ASM_ALIGNED_HI_OP
 402 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
 403 #ifndef OBJECT_FORMAT_ELF
 404 #undef TARGET_ASM_ALIGNED_SI_OP
 405 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
 406 #endif
 407
 408 #undef TARGET_ASM_UNALIGNED_HI_OP
 409 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
 410 #undef TARGET_ASM_UNALIGNED_SI_OP
 411 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
 412 #undef TARGET_ASM_UNALIGNED_DI_OP
 413 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
 414
 415 /* The target hook has to handle DI-mode values.  */
 416 #undef TARGET_ASM_INTEGER
 417 #define TARGET_ASM_INTEGER sparc_assemble_integer
 418
 419 #undef TARGET_ASM_FUNCTION_PROLOGUE
 420 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
 421 #undef TARGET_ASM_FUNCTION_EPILOGUE
 422 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
 423
 424 #undef TARGET_SCHED_ADJUST_COST
 425 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
 426 #undef TARGET_SCHED_ISSUE_RATE
 427 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
 428 #undef TARGET_SCHED_INIT
 429 #define TARGET_SCHED_INIT sparc_sched_init
 430 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 431 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
 432
 433 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
 434 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
 435
 436 #undef TARGET_INIT_LIBFUNCS
 437 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
 438 #undef TARGET_INIT_BUILTINS
 439 #define TARGET_INIT_BUILTINS sparc_init_builtins
 440
 441 #undef TARGET_EXPAND_BUILTIN
 442 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
 443
 444 #ifdef HAVE_AS_TLS
 445 #undef TARGET_HAVE_TLS
 446 #define TARGET_HAVE_TLS true
 447 #endif
 448 #undef TARGET_CANNOT_FORCE_CONST_MEM
 449 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
 450
 451 #undef TARGET_ASM_OUTPUT_MI_THUNK
 452 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
 453 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
 454 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
 455
 456 #undef TARGET_RTX_COSTS
 457 #define TARGET_RTX_COSTS sparc_rtx_costs
 458 #undef TARGET_ADDRESS_COST
 459 #define TARGET_ADDRESS_COST hook_int_rtx_0
 460
 461 /* This is only needed for TARGET_ARCH64, but since PROMOTE_FUNCTION_MODE is a
 462    no-op for TARGET_ARCH32 this is ok.  Otherwise we'd need to add a runtime
 463    test for this value.  */
 464 #undef TARGET_PROMOTE_FUNCTION_ARGS
 465 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
 466
 467 /* This is only needed for TARGET_ARCH64, but since PROMOTE_FUNCTION_MODE is a
 468    no-op for TARGET_ARCH32 this is ok.  Otherwise we'd need to add a runtime
 469    test for this value.  */
 470 #undef TARGET_PROMOTE_FUNCTION_RETURN
 471 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
 472
 473 #undef TARGET_PROMOTE_PROTOTYPES
 474 #define TARGET_PROMOTE_PROTOTYPES sparc_promote_prototypes
 475
 476 #undef TARGET_STRUCT_VALUE_RTX
 477 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
 478 #undef TARGET_RETURN_IN_MEMORY
 479 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
 480 #undef TARGET_MUST_PASS_IN_STACK
 481 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
 482 #undef TARGET_PASS_BY_REFERENCE
 483 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
 484 #undef TARGET_ARG_PARTIAL_BYTES
 485 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
 486
 487 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
 488 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
 489 #undef TARGET_STRICT_ARGUMENT_NAMING
 490 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
 491
 492 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 493 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
 494
 495 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 496 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
 497
 498 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
 499 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC sparc_dwarf_handle_frame_unspec
 500
 501 #ifdef SUBTARGET_INSERT_ATTRIBUTES
 502 #undef TARGET_INSERT_ATTRIBUTES
 503 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
 504 #endif
 505
 506 #ifdef SUBTARGET_ATTRIBUTE_TABLE
 507 #undef TARGET_ATTRIBUTE_TABLE
 508 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
 509 #endif
 510
 511 #undef TARGET_RELAXED_ORDERING
 512 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
 513
 514 #undef TARGET_DEFAULT_TARGET_FLAGS
 515 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
 516 #undef TARGET_HANDLE_OPTION
 517 #define TARGET_HANDLE_OPTION sparc_handle_option
 518
 519 #undef TARGET_ASM_FILE_END
 520 #define TARGET_ASM_FILE_END sparc_file_end
 521
 522 struct gcc_target targetm = TARGET_INITIALIZER;
 523
 524 /* Implement TARGET_HANDLE_OPTION.  */
 525
 526 static bool
 527 sparc_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
 528 {
 529   switch (code)
 530     {
 531     case OPT_mfpu:
 532     case OPT_mhard_float:
 533     case OPT_msoft_float:
 534       fpu_option_set = true;
 535       break;
 536
 537     case OPT_mcpu_:
 538       sparc_select[1].string = arg;
 539       break;
 540
 541     case OPT_mtune_:
 542       sparc_select[2].string = arg;
 543       break;
 544
 545     case OPT_mcmodel_:
 546       sparc_cmodel_string = arg;
 547       break;
 548     }
 549
 550   return true;
 551 }
 552
 553 /* Validate and override various options, and do some machine dependent
 554    initialization.  */
 555
 556 void
 557 sparc_override_options (void)
 558 {
 559   static struct code_model {
 560     const char *const name;
 561     const int value;
 562   } const cmodels[] = {
 563     { "32", CM_32 },
 564     { "medlow", CM_MEDLOW },
 565     { "medmid", CM_MEDMID },
 566     { "medany", CM_MEDANY },
 567     { "embmedany", CM_EMBMEDANY },
 568     { 0, 0 }
 569   };
 570   const struct code_model *cmodel;
 571   /* Map TARGET_CPU_DEFAULT to value for -m{arch,tune}=.  */
 572   static struct cpu_default {
 573     const int cpu;
 574     const char *const name;
 575   } const cpu_default[] = {
 576     /* There must be one entry here for each TARGET_CPU value.  */
 577     { TARGET_CPU_sparc, "cypress" },
 578     { TARGET_CPU_sparclet, "tsc701" },
 579     { TARGET_CPU_sparclite, "f930" },
 580     { TARGET_CPU_v8, "v8" },
 581     { TARGET_CPU_hypersparc, "hypersparc" },
 582     { TARGET_CPU_sparclite86x, "sparclite86x" },
 583     { TARGET_CPU_supersparc, "supersparc" },
 584     { TARGET_CPU_v9, "v9" },
 585     { TARGET_CPU_ultrasparc, "ultrasparc" },
 586     { TARGET_CPU_ultrasparc3, "ultrasparc3" },
 587     { 0, 0 }
 588   };
 589   const struct cpu_default *def;
 590   /* Table of values for -m{cpu,tune}=.  */
 591   static struct cpu_table {
 592     const char *const name;
 593     const enum processor_type processor;
 594     const int disable;
 595     const int enable;
 596   } const cpu_table[] = {
 597     { "v7",         PROCESSOR_V7, MASK_ISA, 0 },
 598     { "cypress",    PROCESSOR_CYPRESS, MASK_ISA, 0 },
 599     { "v8",         PROCESSOR_V8, MASK_ISA, MASK_V8 },
 600     /* TI TMS390Z55 supersparc */
 601     { "supersparc", PROCESSOR_SUPERSPARC, MASK_ISA, MASK_V8 },
 602     { "sparclite",  PROCESSOR_SPARCLITE, MASK_ISA, MASK_SPARCLITE },
 603     /* The Fujitsu MB86930 is the original sparclite chip, with no fpu.
 604        The Fujitsu MB86934 is the recent sparclite chip, with an fpu.  */
 605     { "f930",       PROCESSOR_F930, MASK_ISA|MASK_FPU, MASK_SPARCLITE },
 606     { "f934",       PROCESSOR_F934, MASK_ISA, MASK_SPARCLITE|MASK_FPU },
 607     { "hypersparc", PROCESSOR_HYPERSPARC, MASK_ISA, MASK_V8|MASK_FPU },
 608     { "sparclite86x",  PROCESSOR_SPARCLITE86X, MASK_ISA|MASK_FPU,
 609       MASK_SPARCLITE },
 610     { "sparclet",   PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
 611     /* TEMIC sparclet */
 612     { "tsc701",     PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
 613     { "v9",         PROCESSOR_V9, MASK_ISA, MASK_V9 },
 614     /* TI ultrasparc I, II, IIi */
 615     { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9
 616     /* Although insns using %y are deprecated, it is a clear win on current
 617        ultrasparcs.  */
 618                                                     |MASK_DEPRECATED_V8_INSNS},
 619     /* TI ultrasparc III */
 620     /* ??? Check if %y issue still holds true in ultra3.  */
 621     { "ultrasparc3", PROCESSOR_ULTRASPARC3, MASK_ISA, MASK_V9|MASK_DEPRECATED_V8_INSNS},
 622     { 0, 0, 0, 0 }
 623   };
 624   const struct cpu_table *cpu;
 625   const struct sparc_cpu_select *sel;
 626   int fpu;
 627
 628 #ifndef SPARC_BI_ARCH
 629   /* Check for unsupported architecture size.  */
 630   if (! TARGET_64BIT != DEFAULT_ARCH32_P)
 631     error ("%s is not supported by this configuration",
 632            DEFAULT_ARCH32_P ? "-m64" : "-m32");
 633 #endif
 634
 635   /* We force all 64bit archs to use 128 bit long double */
 636   if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
 637     {
 638       error ("-mlong-double-64 not allowed with -m64");
 639       target_flags |= MASK_LONG_DOUBLE_128;
 640     }
 641
 642   /* Code model selection.  */
 643   sparc_cmodel = SPARC_DEFAULT_CMODEL;
 644
 645 #ifdef SPARC_BI_ARCH
 646   if (TARGET_ARCH32)
 647     sparc_cmodel = CM_32;
 648 #endif
 649
 650   if (sparc_cmodel_string != NULL)
 651     {
 652       if (TARGET_ARCH64)
 653         {
 654           for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
 655             if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
 656               break;
 657           if (cmodel->name == NULL)
 658             error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
 659           else
 660             sparc_cmodel = cmodel->value;
 661         }
 662       else
 663         error ("-mcmodel= is not supported on 32 bit systems");
 664     }
 665
 666   fpu = TARGET_FPU; /* save current -mfpu status */
 667
 668   /* Set the default CPU.  */
 669   for (def = &cpu_default[0]; def->name; ++def)
 670     if (def->cpu == TARGET_CPU_DEFAULT)
 671       break;
 672   gcc_assert (def->name);
 673   sparc_select[0].string = def->name;
 674
 675   for (sel = &sparc_select[0]; sel->name; ++sel)
 676     {
 677       if (sel->string)
 678         {
 679           for (cpu = &cpu_table[0]; cpu->name; ++cpu)
 680             if (! strcmp (sel->string, cpu->name))
 681               {
 682                 if (sel->set_tune_p)
 683                   sparc_cpu = cpu->processor;
 684
 685                 if (sel->set_arch_p)
 686                   {
 687                     target_flags &= ~cpu->disable;
 688                     target_flags |= cpu->enable;
 689                   }
 690                 break;
 691               }
 692
 693           if (! cpu->name)
 694             error ("bad value (%s) for %s switch", sel->string, sel->name);
 695         }
 696     }
 697
 698   /* If -mfpu or -mno-fpu was explicitly used, don't override with
 699      the processor default.  */
 700   if (fpu_option_set)
 701     target_flags = (target_flags & ~MASK_FPU) | fpu;
 702
 703   /* Don't allow -mvis if FPU is disabled.  */
 704   if (! TARGET_FPU)
 705     target_flags &= ~MASK_VIS;
 706
 707   /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
 708      are available.
 709      -m64 also implies v9.  */
 710   if (TARGET_VIS || TARGET_ARCH64)
 711     {
 712       target_flags |= MASK_V9;
 713       target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
 714     }
 715
 716   /* Use the deprecated v8 insns for sparc64 in 32 bit mode.  */
 717   if (TARGET_V9 && TARGET_ARCH32)
 718     target_flags |= MASK_DEPRECATED_V8_INSNS;
 719
 720   /* V8PLUS requires V9, makes no sense in 64 bit mode.  */
 721   if (! TARGET_V9 || TARGET_ARCH64)
 722     target_flags &= ~MASK_V8PLUS;
 723
 724   /* Don't use stack biasing in 32 bit mode.  */
 725   if (TARGET_ARCH32)
 726     target_flags &= ~MASK_STACK_BIAS;
 727
 728   /* Supply a default value for align_functions.  */
 729   if (align_functions == 0
 730       && (sparc_cpu == PROCESSOR_ULTRASPARC
 731           || sparc_cpu == PROCESSOR_ULTRASPARC3))
 732     align_functions = 32;
 733
 734   /* Validate PCC_STRUCT_RETURN.  */
 735   if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
 736     flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
 737
 738   /* Only use .uaxword when compiling for a 64-bit target.  */
 739   if (!TARGET_ARCH64)
 740     targetm.asm_out.unaligned_op.di = NULL;
 741
 742   /* Do various machine dependent initializations.  */
 743   sparc_init_modes ();
 744
 745   /* Acquire unique alias sets for our private stuff.  */
 746   sparc_sr_alias_set = new_alias_set ();
 747   struct_value_alias_set = new_alias_set ();
 748
 749   /* Set up function hooks.  */
 750   init_machine_status = sparc_init_machine_status;
 751
 752   switch (sparc_cpu)
 753     {
 754     case PROCESSOR_V7:
 755     case PROCESSOR_CYPRESS:
 756       sparc_costs = &cypress_costs;
 757       break;
 758     case PROCESSOR_V8:
 759     case PROCESSOR_SPARCLITE:
 760     case PROCESSOR_SUPERSPARC:
 761       sparc_costs = &supersparc_costs;
 762       break;
 763     case PROCESSOR_F930:
 764     case PROCESSOR_F934:
 765     case PROCESSOR_HYPERSPARC:
 766     case PROCESSOR_SPARCLITE86X:
 767       sparc_costs = &hypersparc_costs;
 768       break;
 769     case PROCESSOR_SPARCLET:
 770     case PROCESSOR_TSC701:
 771       sparc_costs = &sparclet_costs;
 772       break;
 773     case PROCESSOR_V9:
 774     case PROCESSOR_ULTRASPARC:
 775       sparc_costs = &ultrasparc_costs;
 776       break;
 777     case PROCESSOR_ULTRASPARC3:
 778       sparc_costs = &ultrasparc3_costs;
 779       break;
 780     };
 781 }
 782 \f
 783 #ifdef SUBTARGET_ATTRIBUTE_TABLE
 784 /* Table of valid machine attributes.  */
 785 const struct attribute_spec sparc_attribute_table[] =
 786 {
 787   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
 788   SUBTARGET_ATTRIBUTE_TABLE,
 789   { NULL,        0, 0, false, false, false, NULL }
 790 };
 791 #endif
 792 \f
 793 /* Miscellaneous utilities.  */
 794
 795 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
 796    or branch on register contents instructions.  */
 797
 798 int
 799 v9_regcmp_p (enum rtx_code code)
 800 {
 801   return (code == EQ || code == NE || code == GE || code == LT
 802           || code == LE || code == GT);
 803 }
 804
 805 \f
 806 /* Operand constraints.  */
 807
 808 /* Nonzero if OP is a floating point constant which can
 809    be loaded into an integer register using a single
 810    sethi instruction.  */
 811
 812 int
 813 fp_sethi_p (rtx op)
 814 {
 815   if (GET_CODE (op) == CONST_DOUBLE)
 816     {
 817       REAL_VALUE_TYPE r;
 818       long i;
 819
 820       REAL_VALUE_FROM_CONST_DOUBLE (r, op);
 821       REAL_VALUE_TO_TARGET_SINGLE (r, i);
 822       return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
 823     }
 824
 825   return 0;
 826 }
 827
 828 /* Nonzero if OP is a floating point constant which can
 829    be loaded into an integer register using a single
 830    mov instruction.  */
 831
 832 int
 833 fp_mov_p (rtx op)
 834 {
 835   if (GET_CODE (op) == CONST_DOUBLE)
 836     {
 837       REAL_VALUE_TYPE r;
 838       long i;
 839
 840       REAL_VALUE_FROM_CONST_DOUBLE (r, op);
 841       REAL_VALUE_TO_TARGET_SINGLE (r, i);
 842       return SPARC_SIMM13_P (i);
 843     }
 844
 845   return 0;
 846 }
 847
 848 /* Nonzero if OP is a floating point constant which can
 849    be loaded into an integer register using a high/losum
 850    instruction sequence.  */
 851
 852 int
 853 fp_high_losum_p (rtx op)
 854 {
 855   /* The constraints calling this should only be in
 856      SFmode move insns, so any constant which cannot
 857      be moved using a single insn will do.  */
 858   if (GET_CODE (op) == CONST_DOUBLE)
 859     {
 860       REAL_VALUE_TYPE r;
 861       long i;
 862
 863       REAL_VALUE_FROM_CONST_DOUBLE (r, op);
 864       REAL_VALUE_TO_TARGET_SINGLE (r, i);
 865       return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
 866     }
 867
 868   return 0;
 869 }
 870
 871 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
 872    otherwise return 0.  */
 873
 874 int
 875 tls_symbolic_operand (rtx op)
 876 {
 877   if (GET_CODE (op) != SYMBOL_REF)
 878     return 0;
 879   return SYMBOL_REF_TLS_MODEL (op);
 880 }
 881 \f
 882 /* We know it can't be done in one insn when we get here,
 883    the movsi expander guarantees this.  */
 884 void
 885 sparc_emit_set_const32 (rtx op0, rtx op1)
 886 {
 887   enum machine_mode mode = GET_MODE (op0);
 888   rtx temp;
 889
 890   if (reload_in_progress || reload_completed)
 891     temp = op0;
 892   else
 893     temp = gen_reg_rtx (mode);
 894
 895   if (GET_CODE (op1) == CONST_INT)
 896     {
 897       gcc_assert (!small_int_operand (op1, mode)
 898                   && !const_high_operand (op1, mode));
 899
 900       /* Emit them as real moves instead of a HIGH/LO_SUM,
 901          this way CSE can see everything and reuse intermediate
 902          values if it wants.  */
 903       emit_insn (gen_rtx_SET (VOIDmode, temp,
 904                               GEN_INT (INTVAL (op1)
 905                                 & ~(HOST_WIDE_INT)0x3ff)));
 906
 907       emit_insn (gen_rtx_SET (VOIDmode,
 908                               op0,
 909                               gen_rtx_IOR (mode, temp,
 910                                            GEN_INT (INTVAL (op1) & 0x3ff))));
 911     }
 912   else
 913     {
 914       /* A symbol, emit in the traditional way.  */
 915       emit_insn (gen_rtx_SET (VOIDmode, temp,
 916                               gen_rtx_HIGH (mode, op1)));
 917       emit_insn (gen_rtx_SET (VOIDmode,
 918                               op0, gen_rtx_LO_SUM (mode, temp, op1)));
 919     }
 920 }
 921
 922 \f
 923 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
 924    If TEMP is nonzero, we are forbidden to use any other scratch
 925    registers.  Otherwise, we are allowed to generate them as needed.
 926
 927    Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
 928    or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns).  */
 929 void
 930 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
 931 {
 932   rtx temp1, temp2, temp3, temp4, temp5;
 933   rtx ti_temp = 0;
 934
 935   if (temp && GET_MODE (temp) == TImode)
 936     {
 937       ti_temp = temp;
 938       temp = gen_rtx_REG (DImode, REGNO (temp));
 939     }
 940
 941   /* SPARC-V9 code-model support.  */
 942   switch (sparc_cmodel)
 943     {
 944     case CM_MEDLOW:
 945       /* The range spanned by all instructions in the object is less
 946          than 2^31 bytes (2GB) and the distance from any instruction
 947          to the location of the label _GLOBAL_OFFSET_TABLE_ is less
 948          than 2^31 bytes (2GB).
 949
 950          The executable must be in the low 4TB of the virtual address
 951          space.
 952
 953          sethi  %hi(symbol), %temp1
 954          or     %temp1, %lo(symbol), %reg  */
 955       if (temp)
 956         temp1 = temp;  /* op0 is allowed.  */
 957       else
 958         temp1 = gen_reg_rtx (DImode);
 959
 960       emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
 961       emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
 962       break;
 963
 964     case CM_MEDMID:
 965       /* The range spanned by all instructions in the object is less
 966          than 2^31 bytes (2GB) and the distance from any instruction
 967          to the location of the label _GLOBAL_OFFSET_TABLE_ is less
 968          than 2^31 bytes (2GB).
 969
 970          The executable must be in the low 16TB of the virtual address
 971          space.
 972
 973          sethi  %h44(symbol), %temp1
 974          or     %temp1, %m44(symbol), %temp2
 975          sllx   %temp2, 12, %temp3
 976          or     %temp3, %l44(symbol), %reg  */
 977       if (temp)
 978         {
 979           temp1 = op0;
 980           temp2 = op0;
 981           temp3 = temp;  /* op0 is allowed.  */
 982         }
 983       else
 984         {
 985           temp1 = gen_reg_rtx (DImode);
 986           temp2 = gen_reg_rtx (DImode);
 987           temp3 = gen_reg_rtx (DImode);
 988         }
 989
 990       emit_insn (gen_seth44 (temp1, op1));
 991       emit_insn (gen_setm44 (temp2, temp1, op1));
 992       emit_insn (gen_rtx_SET (VOIDmode, temp3,
 993                               gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
 994       emit_insn (gen_setl44 (op0, temp3, op1));
 995       break;
 996
 997     case CM_MEDANY:
 998       /* The range spanned by all instructions in the object is less
 999          than 2^31 bytes (2GB) and the distance from any instruction
1000          to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1001          than 2^31 bytes (2GB).
1002
1003          The executable can be placed anywhere in the virtual address
1004          space.
1005
1006          sethi  %hh(symbol), %temp1
1007          sethi  %lm(symbol), %temp2
1008          or     %temp1, %hm(symbol), %temp3
1009          sllx   %temp3, 32, %temp4
1010          or     %temp4, %temp2, %temp5
1011          or     %temp5, %lo(symbol), %reg  */
1012       if (temp)
1013         {
1014           /* It is possible that one of the registers we got for operands[2]
1015              might coincide with that of operands[0] (which is why we made
1016              it TImode).  Pick the other one to use as our scratch.  */
1017           if (rtx_equal_p (temp, op0))
1018             {
1019               gcc_assert (ti_temp);
1020               temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1021             }
1022           temp1 = op0;
1023           temp2 = temp;  /* op0 is _not_ allowed, see above.  */
1024           temp3 = op0;
1025           temp4 = op0;
1026           temp5 = op0;
1027         }
1028       else
1029         {
1030           temp1 = gen_reg_rtx (DImode);
1031           temp2 = gen_reg_rtx (DImode);
1032           temp3 = gen_reg_rtx (DImode);
1033           temp4 = gen_reg_rtx (DImode);
1034           temp5 = gen_reg_rtx (DImode);
1035         }
1036
1037       emit_insn (gen_sethh (temp1, op1));
1038       emit_insn (gen_setlm (temp2, op1));
1039       emit_insn (gen_sethm (temp3, temp1, op1));
1040       emit_insn (gen_rtx_SET (VOIDmode, temp4,
1041                               gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1042       emit_insn (gen_rtx_SET (VOIDmode, temp5,
1043                               gen_rtx_PLUS (DImode, temp4, temp2)));
1044       emit_insn (gen_setlo (op0, temp5, op1));
1045       break;
1046
1047     case CM_EMBMEDANY:
1048       /* Old old old backwards compatibility kruft here.
1049          Essentially it is MEDLOW with a fixed 64-bit
1050          virtual base added to all data segment addresses.
1051          Text-segment stuff is computed like MEDANY, we can't
1052          reuse the code above because the relocation knobs
1053          look different.
1054
1055          Data segment:  sethi   %hi(symbol), %temp1
1056                         add     %temp1, EMBMEDANY_BASE_REG, %temp2
1057                         or      %temp2, %lo(symbol), %reg  */
1058       if (data_segment_operand (op1, GET_MODE (op1)))
1059         {
1060           if (temp)
1061             {
1062               temp1 = temp;  /* op0 is allowed.  */
1063               temp2 = op0;
1064             }
1065           else
1066             {
1067               temp1 = gen_reg_rtx (DImode);
1068               temp2 = gen_reg_rtx (DImode);
1069             }
1070
1071           emit_insn (gen_embmedany_sethi (temp1, op1));
1072           emit_insn (gen_embmedany_brsum (temp2, temp1));
1073           emit_insn (gen_embmedany_losum (op0, temp2, op1));
1074         }
1075
1076       /* Text segment:  sethi   %uhi(symbol), %temp1
1077                         sethi   %hi(symbol), %temp2
1078                         or      %temp1, %ulo(symbol), %temp3
1079                         sllx    %temp3, 32, %temp4
1080                         or      %temp4, %temp2, %temp5
1081                         or      %temp5, %lo(symbol), %reg  */
1082       else
1083         {
1084           if (temp)
1085             {
1086               /* It is possible that one of the registers we got for operands[2]
1087                  might coincide with that of operands[0] (which is why we made
1088                  it TImode).  Pick the other one to use as our scratch.  */
1089               if (rtx_equal_p (temp, op0))
1090                 {
1091                   gcc_assert (ti_temp);
1092                   temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1093                 }
1094               temp1 = op0;
1095               temp2 = temp;  /* op0 is _not_ allowed, see above.  */
1096               temp3 = op0;
1097               temp4 = op0;
1098               temp5 = op0;
1099             }
1100           else
1101             {
1102               temp1 = gen_reg_rtx (DImode);
1103               temp2 = gen_reg_rtx (DImode);
1104               temp3 = gen_reg_rtx (DImode);
1105               temp4 = gen_reg_rtx (DImode);
1106               temp5 = gen_reg_rtx (DImode);
1107             }
1108
1109           emit_insn (gen_embmedany_textuhi (temp1, op1));
1110           emit_insn (gen_embmedany_texthi  (temp2, op1));
1111           emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
1112           emit_insn (gen_rtx_SET (VOIDmode, temp4,
1113                                   gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1114           emit_insn (gen_rtx_SET (VOIDmode, temp5,
1115                                   gen_rtx_PLUS (DImode, temp4, temp2)));
1116           emit_insn (gen_embmedany_textlo  (op0, temp5, op1));
1117         }
1118       break;
1119
1120     default:
1121       gcc_unreachable ();
1122     }
1123 }
1124
1125 #if HOST_BITS_PER_WIDE_INT == 32
1126 void
1127 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
1128 {
1129   gcc_unreachable ();
1130 }
1131 #else
1132 /* These avoid problems when cross compiling.  If we do not
1133    go through all this hair then the optimizer will see
1134    invalid REG_EQUAL notes or in some cases none at all.  */
1135 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
1136 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
1137 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
1138 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
1139
1140 /* The optimizer is not to assume anything about exactly
1141    which bits are set for a HIGH, they are unspecified.
1142    Unfortunately this leads to many missed optimizations
1143    during CSE.  We mask out the non-HIGH bits, and matches
1144    a plain movdi, to alleviate this problem.  */
1145 static rtx
1146 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
1147 {
1148   return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
1149 }
1150
1151 static rtx
1152 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
1153 {
1154   return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
1155 }
1156
1157 static rtx
1158 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
1159 {
1160   return gen_rtx_IOR (DImode, src, GEN_INT (val));
1161 }
1162
1163 static rtx
1164 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
1165 {
1166   return gen_rtx_XOR (DImode, src, GEN_INT (val));
1167 }
1168
1169 /* Worker routines for 64-bit constant formation on arch64.
1170    One of the key things to be doing in these emissions is
1171    to create as many temp REGs as possible.  This makes it
1172    possible for half-built constants to be used later when
1173    such values are similar to something required later on.
1174    Without doing this, the optimizer cannot see such
1175    opportunities.  */
1176
1177 static void sparc_emit_set_const64_quick1 (rtx, rtx,
1178                                            unsigned HOST_WIDE_INT, int);
1179
1180 static void
1181 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
1182                                unsigned HOST_WIDE_INT low_bits, int is_neg)
1183 {
1184   unsigned HOST_WIDE_INT high_bits;
1185
1186   if (is_neg)
1187     high_bits = (~low_bits) & 0xffffffff;
1188   else
1189     high_bits = low_bits;
1190
1191   emit_insn (gen_safe_HIGH64 (temp, high_bits));
1192   if (!is_neg)
1193     {
1194       emit_insn (gen_rtx_SET (VOIDmode, op0,
1195                               gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1196     }
1197   else
1198     {
1199       /* If we are XOR'ing with -1, then we should emit a one's complement
1200          instead.  This way the combiner will notice logical operations
1201          such as ANDN later on and substitute.  */
1202       if ((low_bits & 0x3ff) == 0x3ff)
1203         {
1204           emit_insn (gen_rtx_SET (VOIDmode, op0,
1205                                   gen_rtx_NOT (DImode, temp)));
1206         }
1207       else
1208         {
1209           emit_insn (gen_rtx_SET (VOIDmode, op0,
1210                                   gen_safe_XOR64 (temp,
1211                                                   (-(HOST_WIDE_INT)0x400
1212                                                    | (low_bits & 0x3ff)))));
1213         }
1214     }
1215 }
1216
1217 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
1218                                            unsigned HOST_WIDE_INT, int);
1219
1220 static void
1221 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
1222                                unsigned HOST_WIDE_INT high_bits,
1223                                unsigned HOST_WIDE_INT low_immediate,
1224                                int shift_count)
1225 {
1226   rtx temp2 = op0;
1227
1228   if ((high_bits & 0xfffffc00) != 0)
1229     {
1230       emit_insn (gen_safe_HIGH64 (temp, high_bits));
1231       if ((high_bits & ~0xfffffc00) != 0)
1232         emit_insn (gen_rtx_SET (VOIDmode, op0,
1233                                 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1234       else
1235         temp2 = temp;
1236     }
1237   else
1238     {
1239       emit_insn (gen_safe_SET64 (temp, high_bits));
1240       temp2 = temp;
1241     }
1242
1243   /* Now shift it up into place.  */
1244   emit_insn (gen_rtx_SET (VOIDmode, op0,
1245                           gen_rtx_ASHIFT (DImode, temp2,
1246                                           GEN_INT (shift_count))));
1247
1248   /* If there is a low immediate part piece, finish up by
1249      putting that in as well.  */
1250   if (low_immediate != 0)
1251     emit_insn (gen_rtx_SET (VOIDmode, op0,
1252                             gen_safe_OR64 (op0, low_immediate)));
1253 }
1254
1255 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
1256                                             unsigned HOST_WIDE_INT);
1257
1258 /* Full 64-bit constant decomposition.  Even though this is the
1259    'worst' case, we still optimize a few things away.  */
1260 static void
1261 sparc_emit_set_const64_longway (rtx op0, rtx temp,
1262                                 unsigned HOST_WIDE_INT high_bits,
1263                                 unsigned HOST_WIDE_INT low_bits)
1264 {
1265   rtx sub_temp;
1266
1267   if (reload_in_progress || reload_completed)
1268     sub_temp = op0;
1269   else
1270     sub_temp = gen_reg_rtx (DImode);
1271
1272   if ((high_bits & 0xfffffc00) != 0)
1273     {
1274       emit_insn (gen_safe_HIGH64 (temp, high_bits));
1275       if ((high_bits & ~0xfffffc00) != 0)
1276         emit_insn (gen_rtx_SET (VOIDmode,
1277                                 sub_temp,
1278                                 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1279       else
1280         sub_temp = temp;
1281     }
1282   else
1283     {
1284       emit_insn (gen_safe_SET64 (temp, high_bits));
1285       sub_temp = temp;
1286     }
1287
1288   if (!reload_in_progress && !reload_completed)
1289     {
1290       rtx temp2 = gen_reg_rtx (DImode);
1291       rtx temp3 = gen_reg_rtx (DImode);
1292       rtx temp4 = gen_reg_rtx (DImode);
1293
1294       emit_insn (gen_rtx_SET (VOIDmode, temp4,
1295                               gen_rtx_ASHIFT (DImode, sub_temp,
1296                                               GEN_INT (32))));
1297
1298       emit_insn (gen_safe_HIGH64 (temp2, low_bits));
1299       if ((low_bits & ~0xfffffc00) != 0)
1300         {
1301           emit_insn (gen_rtx_SET (VOIDmode, temp3,
1302                                   gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
1303           emit_insn (gen_rtx_SET (VOIDmode, op0,
1304                                   gen_rtx_PLUS (DImode, temp4, temp3)));
1305         }
1306       else
1307         {
1308           emit_insn (gen_rtx_SET (VOIDmode, op0,
1309                                   gen_rtx_PLUS (DImode, temp4, temp2)));
1310         }
1311     }
1312   else
1313     {
1314       rtx low1 = GEN_INT ((low_bits >> (32 - 12))          & 0xfff);
1315       rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12))     & 0xfff);
1316       rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
1317       int to_shift = 12;
1318
1319       /* We are in the middle of reload, so this is really
1320          painful.  However we do still make an attempt to
1321          avoid emitting truly stupid code.  */
1322       if (low1 != const0_rtx)
1323         {
1324           emit_insn (gen_rtx_SET (VOIDmode, op0,
1325                                   gen_rtx_ASHIFT (DImode, sub_temp,
1326                                                   GEN_INT (to_shift))));
1327           emit_insn (gen_rtx_SET (VOIDmode, op0,
1328                                   gen_rtx_IOR (DImode, op0, low1)));
1329           sub_temp = op0;
1330           to_shift = 12;
1331         }
1332       else
1333         {
1334           to_shift += 12;
1335         }
1336       if (low2 != const0_rtx)
1337         {
1338           emit_insn (gen_rtx_SET (VOIDmode, op0,
1339                                   gen_rtx_ASHIFT (DImode, sub_temp,
1340                                                   GEN_INT (to_shift))));
1341           emit_insn (gen_rtx_SET (VOIDmode, op0,
1342                                   gen_rtx_IOR (DImode, op0, low2)));
1343           sub_temp = op0;
1344           to_shift = 8;
1345         }
1346       else
1347         {
1348           to_shift += 8;
1349         }
1350       emit_insn (gen_rtx_SET (VOIDmode, op0,
1351                               gen_rtx_ASHIFT (DImode, sub_temp,
1352                                               GEN_INT (to_shift))));
1353       if (low3 != const0_rtx)
1354         emit_insn (gen_rtx_SET (VOIDmode, op0,
1355                                 gen_rtx_IOR (DImode, op0, low3)));
1356       /* phew...  */
1357     }
1358 }
1359
1360 /* Analyze a 64-bit constant for certain properties.  */
1361 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
1362                                     unsigned HOST_WIDE_INT,
1363                                     int *, int *, int *);
1364
1365 static void
1366 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
1367                         unsigned HOST_WIDE_INT low_bits,
1368                         int *hbsp, int *lbsp, int *abbasp)
1369 {
1370   int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
1371   int i;
1372
1373   lowest_bit_set = highest_bit_set = -1;
1374   i = 0;
1375   do
1376     {
1377       if ((lowest_bit_set == -1)
1378           && ((low_bits >> i) & 1))
1379         lowest_bit_set = i;
1380       if ((highest_bit_set == -1)
1381           && ((high_bits >> (32 - i - 1)) & 1))
1382         highest_bit_set = (64 - i - 1);
1383     }
1384   while (++i < 32
1385          && ((highest_bit_set == -1)
1386              || (lowest_bit_set == -1)));
1387   if (i == 32)
1388     {
1389       i = 0;
1390       do
1391         {
1392           if ((lowest_bit_set == -1)
1393               && ((high_bits >> i) & 1))
1394             lowest_bit_set = i + 32;
1395           if ((highest_bit_set == -1)
1396               && ((low_bits >> (32 - i - 1)) & 1))
1397             highest_bit_set = 32 - i - 1;
1398         }
1399       while (++i < 32
1400              && ((highest_bit_set == -1)
1401                  || (lowest_bit_set == -1)));
1402     }
1403   /* If there are no bits set this should have gone out
1404      as one instruction!  */
1405   gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
1406   all_bits_between_are_set = 1;
1407   for (i = lowest_bit_set; i <= highest_bit_set; i++)
1408     {
1409       if (i < 32)
1410         {
1411           if ((low_bits & (1 << i)) != 0)
1412             continue;
1413         }
1414       else
1415         {
1416           if ((high_bits & (1 << (i - 32))) != 0)
1417             continue;
1418         }
1419       all_bits_between_are_set = 0;
1420       break;
1421     }
1422   *hbsp = highest_bit_set;
1423   *lbsp = lowest_bit_set;
1424   *abbasp = all_bits_between_are_set;
1425 }
1426
1427 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
1428
1429 static int
1430 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
1431                    unsigned HOST_WIDE_INT low_bits)
1432 {
1433   int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
1434
1435   if (high_bits == 0
1436       || high_bits == 0xffffffff)
1437     return 1;
1438
1439   analyze_64bit_constant (high_bits, low_bits,
1440                           &highest_bit_set, &lowest_bit_set,
1441                           &all_bits_between_are_set);
1442
1443   if ((highest_bit_set == 63
1444        || lowest_bit_set == 0)
1445       && all_bits_between_are_set != 0)
1446     return 1;
1447
1448   if ((highest_bit_set - lowest_bit_set) < 21)
1449     return 1;
1450
1451   return 0;
1452 }
1453
1454 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
1455                                                         unsigned HOST_WIDE_INT,
1456                                                         int, int);
1457
1458 static unsigned HOST_WIDE_INT
1459 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
1460                           unsigned HOST_WIDE_INT low_bits,
1461                           int lowest_bit_set, int shift)
1462 {
1463   HOST_WIDE_INT hi, lo;
1464
1465   if (lowest_bit_set < 32)
1466     {
1467       lo = (low_bits >> lowest_bit_set) << shift;
1468       hi = ((high_bits << (32 - lowest_bit_set)) << shift);
1469     }
1470   else
1471     {
1472       lo = 0;
1473       hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
1474     }
1475   gcc_assert (! (hi & lo));
1476   return (hi | lo);
1477 }
1478
1479 /* Here we are sure to be arch64 and this is an integer constant
1480    being loaded into a register.  Emit the most efficient
1481    insn sequence possible.  Detection of all the 1-insn cases
1482    has been done already.  */
1483 void
1484 sparc_emit_set_const64 (rtx op0, rtx op1)
1485 {
1486   unsigned HOST_WIDE_INT high_bits, low_bits;
1487   int lowest_bit_set, highest_bit_set;
1488   int all_bits_between_are_set;
1489   rtx temp = 0;
1490
1491   /* Sanity check that we know what we are working with.  */
1492   gcc_assert (TARGET_ARCH64);
1493
1494   if (GET_CODE (op0) != SUBREG)
1495     {
1496       gcc_assert (GET_CODE (op0) == REG
1497                   && (REGNO (op0) < SPARC_FIRST_FP_REG
1498                       || REGNO (op0) > SPARC_LAST_V9_FP_REG));
1499     }
1500
1501   if (reload_in_progress || reload_completed)
1502     temp = op0;
1503
1504   if (GET_CODE (op1) != CONST_INT)
1505     {
1506       sparc_emit_set_symbolic_const64 (op0, op1, temp);
1507       return;
1508     }
1509
1510   if (! temp)
1511     temp = gen_reg_rtx (DImode);
1512
1513   high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
1514   low_bits = (INTVAL (op1) & 0xffffffff);
1515
1516   /* low_bits   bits 0  --> 31
1517      high_bits  bits 32 --> 63  */
1518
1519   analyze_64bit_constant (high_bits, low_bits,
1520                           &highest_bit_set, &lowest_bit_set,
1521                           &all_bits_between_are_set);
1522
1523   /* First try for a 2-insn sequence.  */
1524
1525   /* These situations are preferred because the optimizer can
1526    * do more things with them:
1527    * 1) mov     -1, %reg
1528    *    sllx    %reg, shift, %reg
1529    * 2) mov     -1, %reg
1530    *    srlx    %reg, shift, %reg
1531    * 3) mov     some_small_const, %reg
1532    *    sllx    %reg, shift, %reg
1533    */
1534   if (((highest_bit_set == 63
1535         || lowest_bit_set == 0)
1536        && all_bits_between_are_set != 0)
1537       || ((highest_bit_set - lowest_bit_set) < 12))
1538     {
1539       HOST_WIDE_INT the_const = -1;
1540       int shift = lowest_bit_set;
1541
1542       if ((highest_bit_set != 63
1543            && lowest_bit_set != 0)
1544           || all_bits_between_are_set == 0)
1545         {
1546           the_const =
1547             create_simple_focus_bits (high_bits, low_bits,
1548                                       lowest_bit_set, 0);
1549         }
1550       else if (lowest_bit_set == 0)
1551         shift = -(63 - highest_bit_set);
1552
1553       gcc_assert (SPARC_SIMM13_P (the_const));
1554       gcc_assert (shift != 0);
1555
1556       emit_insn (gen_safe_SET64 (temp, the_const));
1557       if (shift > 0)
1558         emit_insn (gen_rtx_SET (VOIDmode,
1559                                 op0,
1560                                 gen_rtx_ASHIFT (DImode,
1561                                                 temp,
1562                                                 GEN_INT (shift))));
1563       else if (shift < 0)
1564         emit_insn (gen_rtx_SET (VOIDmode,
1565                                 op0,
1566                                 gen_rtx_LSHIFTRT (DImode,
1567                                                   temp,
1568                                                   GEN_INT (-shift))));
1569       return;
1570     }
1571
1572   /* Now a range of 22 or less bits set somewhere.
1573    * 1) sethi   %hi(focus_bits), %reg
1574    *    sllx    %reg, shift, %reg
1575    * 2) sethi   %hi(focus_bits), %reg
1576    *    srlx    %reg, shift, %reg
1577    */
1578   if ((highest_bit_set - lowest_bit_set) < 21)
1579     {
1580       unsigned HOST_WIDE_INT focus_bits =
1581         create_simple_focus_bits (high_bits, low_bits,
1582                                   lowest_bit_set, 10);
1583
1584       gcc_assert (SPARC_SETHI_P (focus_bits));
1585       gcc_assert (lowest_bit_set != 10);
1586
1587       emit_insn (gen_safe_HIGH64 (temp, focus_bits));
1588
1589       /* If lowest_bit_set == 10 then a sethi alone could have done it.  */
1590       if (lowest_bit_set < 10)
1591         emit_insn (gen_rtx_SET (VOIDmode,
1592                                 op0,
1593                                 gen_rtx_LSHIFTRT (DImode, temp,
1594                                                   GEN_INT (10 - lowest_bit_set))));
1595       else if (lowest_bit_set > 10)
1596         emit_insn (gen_rtx_SET (VOIDmode,
1597                                 op0,
1598                                 gen_rtx_ASHIFT (DImode, temp,
1599                                                 GEN_INT (lowest_bit_set - 10))));
1600       return;
1601     }
1602
1603   /* 1) sethi   %hi(low_bits), %reg
1604    *    or      %reg, %lo(low_bits), %reg
1605    * 2) sethi   %hi(~low_bits), %reg
1606    *    xor     %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
1607    */
1608   if (high_bits == 0
1609       || high_bits == 0xffffffff)
1610     {
1611       sparc_emit_set_const64_quick1 (op0, temp, low_bits,
1612                                      (high_bits == 0xffffffff));
1613       return;
1614     }
1615
1616   /* Now, try 3-insn sequences.  */
1617
1618   /* 1) sethi   %hi(high_bits), %reg
1619    *    or      %reg, %lo(high_bits), %reg
1620    *    sllx    %reg, 32, %reg
1621    */
1622   if (low_bits == 0)
1623     {
1624       sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
1625       return;
1626     }
1627
1628   /* We may be able to do something quick
1629      when the constant is negated, so try that.  */
1630   if (const64_is_2insns ((~high_bits) & 0xffffffff,
1631                          (~low_bits) & 0xfffffc00))
1632     {
1633       /* NOTE: The trailing bits get XOR'd so we need the
1634          non-negated bits, not the negated ones.  */
1635       unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
1636
1637       if ((((~high_bits) & 0xffffffff) == 0
1638            && ((~low_bits) & 0x80000000) == 0)
1639           || (((~high_bits) & 0xffffffff) == 0xffffffff
1640               && ((~low_bits) & 0x80000000) != 0))
1641         {
1642           unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
1643
1644           if ((SPARC_SETHI_P (fast_int)
1645                && (~high_bits & 0xffffffff) == 0)
1646               || SPARC_SIMM13_P (fast_int))
1647             emit_insn (gen_safe_SET64 (temp, fast_int));
1648           else
1649             sparc_emit_set_const64 (temp, GEN_INT (fast_int));
1650         }
1651       else
1652         {
1653           rtx negated_const;
1654           negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
1655                                    (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
1656           sparc_emit_set_const64 (temp, negated_const);
1657         }
1658
1659       /* If we are XOR'ing with -1, then we should emit a one's complement
1660          instead.  This way the combiner will notice logical operations
1661          such as ANDN later on and substitute.  */
1662       if (trailing_bits == 0x3ff)
1663         {
1664           emit_insn (gen_rtx_SET (VOIDmode, op0,
1665                                   gen_rtx_NOT (DImode, temp)));
1666         }
1667       else
1668         {
1669           emit_insn (gen_rtx_SET (VOIDmode,
1670                                   op0,
1671                                   gen_safe_XOR64 (temp,
1672                                                   (-0x400 | trailing_bits))));
1673         }
1674       return;
1675     }
1676
1677   /* 1) sethi   %hi(xxx), %reg
1678    *    or      %reg, %lo(xxx), %reg
1679    *    sllx    %reg, yyy, %reg
1680    *
1681    * ??? This is just a generalized version of the low_bits==0
1682    * thing above, FIXME...
1683    */
1684   if ((highest_bit_set - lowest_bit_set) < 32)
1685     {
1686       unsigned HOST_WIDE_INT focus_bits =
1687         create_simple_focus_bits (high_bits, low_bits,
1688                                   lowest_bit_set, 0);
1689
1690       /* We can't get here in this state.  */
1691       gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
1692
1693       /* So what we know is that the set bits straddle the
1694          middle of the 64-bit word.  */
1695       sparc_emit_set_const64_quick2 (op0, temp,
1696                                      focus_bits, 0,
1697                                      lowest_bit_set);
1698       return;
1699     }
1700
1701   /* 1) sethi   %hi(high_bits), %reg
1702    *    or      %reg, %lo(high_bits), %reg
1703    *    sllx    %reg, 32, %reg
1704    *    or      %reg, low_bits, %reg
1705    */
1706   if (SPARC_SIMM13_P(low_bits)
1707       && ((int)low_bits > 0))
1708     {
1709       sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
1710       return;
1711     }
1712
1713   /* The easiest way when all else fails, is full decomposition.  */
1714 #if 0
1715   printf ("sparc_emit_set_const64: Hard constant [%08lx%08lx] neg[%08lx%08lx]\n",
1716           high_bits, low_bits, ~high_bits, ~low_bits);
1717 #endif
1718   sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
1719 }
1720 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
1721
1722 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
1723    return the mode to be used for the comparison.  For floating-point,
1724    CCFP[E]mode is used.  CC_NOOVmode should be used when the first operand
1725    is a PLUS, MINUS, NEG, or ASHIFT.  CCmode should be used when no special
1726    processing is needed.  */
1727
1728 enum machine_mode
1729 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
1730 {
1731   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1732     {
1733       switch (op)
1734         {
1735         case EQ:
1736         case NE:
1737         case UNORDERED:
1738         case ORDERED:
1739         case UNLT:
1740         case UNLE:
1741         case UNGT:
1742         case UNGE:
1743         case UNEQ:
1744         case LTGT:
1745           return CCFPmode;
1746
1747         case LT:
1748         case LE:
1749         case GT:
1750         case GE:
1751           return CCFPEmode;
1752
1753         default:
1754           gcc_unreachable ();
1755         }
1756     }
1757   else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
1758            || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
1759     {
1760       if (TARGET_ARCH64 && GET_MODE (x) == DImode)
1761         return CCX_NOOVmode;
1762       else
1763         return CC_NOOVmode;
1764     }
1765   else
1766     {
1767       if (TARGET_ARCH64 && GET_MODE (x) == DImode)
1768         return CCXmode;
1769       else
1770         return CCmode;
1771     }
1772 }
1773
1774 /* X and Y are two things to compare using CODE.  Emit the compare insn and
1775    return the rtx for the cc reg in the proper mode.  */
1776
1777 rtx
1778 gen_compare_reg (enum rtx_code code, rtx x, rtx y)
1779 {
1780   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
1781   rtx cc_reg;
1782
1783   /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
1784      fcc regs (cse can't tell they're really call clobbered regs and will
1785      remove a duplicate comparison even if there is an intervening function
1786      call - it will then try to reload the cc reg via an int reg which is why
1787      we need the movcc patterns).  It is possible to provide the movcc
1788      patterns by using the ldxfsr/stxfsr v9 insns.  I tried it: you need two
1789      registers (say %g1,%g5) and it takes about 6 insns.  A better fix would be
1790      to tell cse that CCFPE mode registers (even pseudos) are call
1791      clobbered.  */
1792
1793   /* ??? This is an experiment.  Rather than making changes to cse which may
1794      or may not be easy/clean, we do our own cse.  This is possible because
1795      we will generate hard registers.  Cse knows they're call clobbered (it
1796      doesn't know the same thing about pseudos). If we guess wrong, no big
1797      deal, but if we win, great!  */
1798
1799   if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1800 #if 1 /* experiment */
1801     {
1802       int reg;
1803       /* We cycle through the registers to ensure they're all exercised.  */
1804       static int next_fcc_reg = 0;
1805       /* Previous x,y for each fcc reg.  */
1806       static rtx prev_args[4][2];
1807
1808       /* Scan prev_args for x,y.  */
1809       for (reg = 0; reg < 4; reg++)
1810         if (prev_args[reg][0] == x && prev_args[reg][1] == y)
1811           break;
1812       if (reg == 4)
1813         {
1814           reg = next_fcc_reg;
1815           prev_args[reg][0] = x;
1816           prev_args[reg][1] = y;
1817           next_fcc_reg = (next_fcc_reg + 1) & 3;
1818         }
1819       cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
1820     }
1821 #else
1822     cc_reg = gen_reg_rtx (mode);
1823 #endif /* ! experiment */
1824   else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1825     cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
1826   else
1827     cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
1828
1829   emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
1830                           gen_rtx_COMPARE (mode, x, y)));
1831
1832   return cc_reg;
1833 }
1834
1835 /* This function is used for v9 only.
1836    CODE is the code for an Scc's comparison.
1837    OPERANDS[0] is the target of the Scc insn.
1838    OPERANDS[1] is the value we compare against const0_rtx (which hasn't
1839    been generated yet).
1840
1841    This function is needed to turn
1842
1843            (set (reg:SI 110)
1844                (gt (reg:CCX 100 %icc)
1845                    (const_int 0)))
1846    into
1847            (set (reg:SI 110)
1848                (gt:DI (reg:CCX 100 %icc)
1849                    (const_int 0)))
1850
1851    IE: The instruction recognizer needs to see the mode of the comparison to
1852    find the right instruction. We could use "gt:DI" right in the
1853    define_expand, but leaving it out allows us to handle DI, SI, etc.
1854
1855    We refer to the global sparc compare operands sparc_compare_op0 and
1856    sparc_compare_op1.  */
1857
1858 int
1859 gen_v9_scc (enum rtx_code compare_code, register rtx *operands)
1860 {
1861   rtx temp, op0, op1;
1862
1863   if (! TARGET_ARCH64
1864       && (GET_MODE (sparc_compare_op0) == DImode
1865           || GET_MODE (operands[0]) == DImode))
1866     return 0;
1867
1868   op0 = sparc_compare_op0;
1869   op1 = sparc_compare_op1;
1870
1871   /* Try to use the movrCC insns.  */
1872   if (TARGET_ARCH64
1873       && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
1874       && op1 == const0_rtx
1875       && v9_regcmp_p (compare_code))
1876     {
1877       /* Special case for op0 != 0.  This can be done with one instruction if
1878          operands[0] == sparc_compare_op0.  */
1879
1880       if (compare_code == NE
1881           && GET_MODE (operands[0]) == DImode
1882           && rtx_equal_p (op0, operands[0]))
1883         {
1884           emit_insn (gen_rtx_SET (VOIDmode, operands[0],
1885                               gen_rtx_IF_THEN_ELSE (DImode,
1886                                        gen_rtx_fmt_ee (compare_code, DImode,
1887                                                        op0, const0_rtx),
1888                                        const1_rtx,
1889                                        operands[0])));
1890           return 1;
1891         }
1892
1893       if (reg_overlap_mentioned_p (operands[0], op0))
1894         {
1895           /* Handle the case where operands[0] == sparc_compare_op0.
1896              We "early clobber" the result.  */
1897           op0 = gen_reg_rtx (GET_MODE (sparc_compare_op0));
1898           emit_move_insn (op0, sparc_compare_op0);
1899         }
1900
1901       emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx));
1902       if (GET_MODE (op0) != DImode)
1903         {
1904           temp = gen_reg_rtx (DImode);
1905           convert_move (temp, op0, 0);
1906         }
1907       else
1908         temp = op0;
1909       emit_insn (gen_rtx_SET (VOIDmode, operands[0],
1910                           gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
1911                                    gen_rtx_fmt_ee (compare_code, DImode,
1912                                                    temp, const0_rtx),
1913                                    const1_rtx,
1914                                    operands[0])));
1915       return 1;
1916     }
1917   else
1918     {
1919       operands[1] = gen_compare_reg (compare_code, op0, op1);
1920
1921       switch (GET_MODE (operands[1]))
1922         {
1923           case CCmode :
1924           case CCXmode :
1925           case CCFPEmode :
1926           case CCFPmode :
1927             break;
1928           default :
1929             gcc_unreachable ();
1930         }
1931       emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx));
1932       emit_insn (gen_rtx_SET (VOIDmode, operands[0],
1933                           gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
1934                                    gen_rtx_fmt_ee (compare_code,
1935                                                    GET_MODE (operands[1]),
1936                                                    operands[1], const0_rtx),
1937                                     const1_rtx, operands[0])));
1938       return 1;
1939     }
1940 }
1941
1942 /* Emit a conditional jump insn for the v9 architecture using comparison code
1943    CODE and jump target LABEL.
1944    This function exists to take advantage of the v9 brxx insns.  */
1945
1946 void
1947 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
1948 {
1949   emit_jump_insn (gen_rtx_SET (VOIDmode,
1950                            pc_rtx,
1951                            gen_rtx_IF_THEN_ELSE (VOIDmode,
1952                                     gen_rtx_fmt_ee (code, GET_MODE (op0),
1953                                                     op0, const0_rtx),
1954                                     gen_rtx_LABEL_REF (VOIDmode, label),
1955                                     pc_rtx)));
1956 }
1957
1958 /* Generate a DFmode part of a hard TFmode register.
1959    REG is the TFmode hard register, LOW is 1 for the
1960    low 64bit of the register and 0 otherwise.
1961  */
1962 rtx
1963 gen_df_reg (rtx reg, int low)
1964 {
1965   int regno = REGNO (reg);
1966
1967   if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
1968     regno += (TARGET_ARCH64 && regno < 32) ? 1 : 2;
1969   return gen_rtx_REG (DFmode, regno);
1970 }
1971 \f
1972 /* Generate a call to FUNC with OPERANDS.  Operand 0 is the return value.
1973    Unlike normal calls, TFmode operands are passed by reference.  It is
1974    assumed that no more than 3 operands are required.  */
1975
1976 static void
1977 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
1978 {
1979   rtx ret_slot = NULL, arg[3], func_sym;
1980   int i;
1981
1982   /* We only expect to be called for conversions, unary, and binary ops.  */
1983   gcc_assert (nargs == 2 || nargs == 3);
1984
1985   for (i = 0; i < nargs; ++i)
1986     {
1987       rtx this_arg = operands[i];
1988       rtx this_slot;
1989
1990       /* TFmode arguments and return values are passed by reference.  */
1991       if (GET_MODE (this_arg) == TFmode)
1992         {
1993           int force_stack_temp;
1994
1995           force_stack_temp = 0;
1996           if (TARGET_BUGGY_QP_LIB && i == 0)
1997             force_stack_temp = 1;
1998
1999           if (GET_CODE (this_arg) == MEM
2000               && ! force_stack_temp)
2001             this_arg = XEXP (this_arg, 0);
2002           else if (CONSTANT_P (this_arg)
2003                    && ! force_stack_temp)
2004             {
2005               this_slot = force_const_mem (TFmode, this_arg);
2006               this_arg = XEXP (this_slot, 0);
2007             }
2008           else
2009             {
2010               this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode), 0);
2011
2012               /* Operand 0 is the return value.  We'll copy it out later.  */
2013               if (i > 0)
2014                 emit_move_insn (this_slot, this_arg);
2015               else
2016                 ret_slot = this_slot;
2017
2018               this_arg = XEXP (this_slot, 0);
2019             }
2020         }
2021
2022       arg[i] = this_arg;
2023     }
2024
2025   func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
2026
2027   if (GET_MODE (operands[0]) == TFmode)
2028     {
2029       if (nargs == 2)
2030         emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
2031                            arg[0], GET_MODE (arg[0]),
2032                            arg[1], GET_MODE (arg[1]));
2033       else
2034         emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
2035                            arg[0], GET_MODE (arg[0]),
2036                            arg[1], GET_MODE (arg[1]),
2037                            arg[2], GET_MODE (arg[2]));
2038
2039       if (ret_slot)
2040         emit_move_insn (operands[0], ret_slot);
2041     }
2042   else
2043     {
2044       rtx ret;
2045
2046       gcc_assert (nargs == 2);
2047
2048       ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
2049                                      GET_MODE (operands[0]), 1,
2050                                      arg[1], GET_MODE (arg[1]));
2051
2052       if (ret != operands[0])
2053         emit_move_insn (operands[0], ret);
2054     }
2055 }
2056
2057 /* Expand soft-float TFmode calls to sparc abi routines.  */
2058
2059 static void
2060 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
2061 {
2062   const char *func;
2063
2064   switch (code)
2065     {
2066     case PLUS:
2067       func = "_Qp_add";
2068       break;
2069     case MINUS:
2070       func = "_Qp_sub";
2071       break;
2072     case MULT:
2073       func = "_Qp_mul";
2074       break;
2075     case DIV:
2076       func = "_Qp_div";
2077       break;
2078     default:
2079       gcc_unreachable ();
2080     }
2081
2082   emit_soft_tfmode_libcall (func, 3, operands);
2083 }
2084
2085 static void
2086 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
2087 {
2088   const char *func;
2089
2090   gcc_assert (code == SQRT);
2091   func = "_Qp_sqrt";
2092
2093   emit_soft_tfmode_libcall (func, 2, operands);
2094 }
2095
2096 static void
2097 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
2098 {
2099   const char *func;
2100
2101   switch (code)
2102     {
2103     case FLOAT_EXTEND:
2104       switch (GET_MODE (operands[1]))
2105         {
2106         case SFmode:
2107           func = "_Qp_stoq";
2108           break;
2109         case DFmode:
2110           func = "_Qp_dtoq";
2111           break;
2112         default:
2113           gcc_unreachable ();
2114         }
2115       break;
2116
2117     case FLOAT_TRUNCATE:
2118       switch (GET_MODE (operands[0]))
2119         {
2120         case SFmode:
2121           func = "_Qp_qtos";
2122           break;
2123         case DFmode:
2124           func = "_Qp_qtod";
2125           break;
2126         default:
2127           gcc_unreachable ();
2128         }
2129       break;
2130
2131     case FLOAT:
2132       switch (GET_MODE (operands[1]))
2133         {
2134         case SImode:
2135           func = "_Qp_itoq";
2136           break;
2137         case DImode:
2138           func = "_Qp_xtoq";
2139           break;
2140         default:
2141           gcc_unreachable ();
2142         }
2143       break;
2144
2145     case UNSIGNED_FLOAT:
2146       switch (GET_MODE (operands[1]))
2147         {
2148         case SImode:
2149           func = "_Qp_uitoq";
2150           break;
2151         case DImode:
2152           func = "_Qp_uxtoq";
2153           break;
2154         default:
2155           gcc_unreachable ();
2156         }
2157       break;
2158
2159     case FIX:
2160       switch (GET_MODE (operands[0]))
2161         {
2162         case SImode:
2163           func = "_Qp_qtoi";
2164           break;
2165         case DImode:
2166           func = "_Qp_qtox";
2167           break;
2168         default:
2169           gcc_unreachable ();
2170         }
2171       break;
2172
2173     case UNSIGNED_FIX:
2174       switch (GET_MODE (operands[0]))
2175         {
2176         case SImode:
2177           func = "_Qp_qtoui";
2178           break;
2179         case DImode:
2180           func = "_Qp_qtoux";
2181           break;
2182         default:
2183           gcc_unreachable ();
2184         }
2185       break;
2186
2187     default:
2188       gcc_unreachable ();
2189     }
2190
2191   emit_soft_tfmode_libcall (func, 2, operands);
2192 }
2193
2194 /* Expand a hard-float tfmode operation.  All arguments must be in
2195    registers.  */
2196
2197 static void
2198 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
2199 {
2200   rtx op, dest;
2201
2202   if (GET_RTX_CLASS (code) == RTX_UNARY)
2203     {
2204       operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2205       op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
2206     }
2207   else
2208     {
2209       operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2210       operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
2211       op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
2212                            operands[1], operands[2]);
2213     }
2214
2215   if (register_operand (operands[0], VOIDmode))
2216     dest = operands[0];
2217   else
2218     dest = gen_reg_rtx (GET_MODE (operands[0]));
2219
2220   emit_insn (gen_rtx_SET (VOIDmode, dest, op));
2221
2222   if (dest != operands[0])
2223     emit_move_insn (operands[0], dest);
2224 }
2225
2226 void
2227 emit_tfmode_binop (enum rtx_code code, rtx *operands)
2228 {
2229   if (TARGET_HARD_QUAD)
2230     emit_hard_tfmode_operation (code, operands);
2231   else
2232     emit_soft_tfmode_binop (code, operands);
2233 }
2234
2235 void
2236 emit_tfmode_unop (enum rtx_code code, rtx *operands)
2237 {
2238   if (TARGET_HARD_QUAD)
2239     emit_hard_tfmode_operation (code, operands);
2240   else
2241     emit_soft_tfmode_unop (code, operands);
2242 }
2243
2244 void
2245 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
2246 {
2247   if (TARGET_HARD_QUAD)
2248     emit_hard_tfmode_operation (code, operands);
2249   else
2250     emit_soft_tfmode_cvt (code, operands);
2251 }
2252 \f
2253 /* Return nonzero if a branch/jump/call instruction will be emitting
2254    nop into its delay slot.  */
2255
2256 int
2257 empty_delay_slot (rtx insn)
2258 {
2259   rtx seq;
2260
2261   /* If no previous instruction (should not happen), return true.  */
2262   if (PREV_INSN (insn) == NULL)
2263     return 1;
2264
2265   seq = NEXT_INSN (PREV_INSN (insn));
2266   if (GET_CODE (PATTERN (seq)) == SEQUENCE)
2267     return 0;
2268
2269   return 1;
2270 }
2271
2272 /* Return nonzero if TRIAL can go into the call delay slot.  */
2273
2274 int
2275 tls_call_delay (rtx trial)
2276 {
2277   rtx pat, unspec;
2278
2279   /* Binutils allows
2280      call __tls_get_addr, %tgd_call (foo)
2281       add %l7, %o0, %o0, %tgd_add (foo)
2282      while Sun as/ld does not.  */
2283   if (TARGET_GNU_TLS || !TARGET_TLS)
2284     return 1;
2285
2286   pat = PATTERN (trial);
2287   if (GET_CODE (pat) != SET || GET_CODE (SET_DEST (pat)) != PLUS)
2288     return 1;
2289
2290   unspec = XEXP (SET_DEST (pat), 1);
2291   if (GET_CODE (unspec) != UNSPEC
2292       || (XINT (unspec, 1) != UNSPEC_TLSGD
2293           && XINT (unspec, 1) != UNSPEC_TLSLDM))
2294     return 1;
2295
2296   return 0;
2297 }
2298
2299 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
2300    instruction.  RETURN_P is true if the v9 variant 'return' is to be
2301    considered in the test too.
2302
2303    TRIAL must be a SET whose destination is a REG appropriate for the
2304    'restore' instruction or, if RETURN_P is true, for the 'return'
2305    instruction.  */
2306
2307 static int
2308 eligible_for_restore_insn (rtx trial, bool return_p)
2309 {
2310   rtx pat = PATTERN (trial);
2311   rtx src = SET_SRC (pat);
2312
2313   /* The 'restore src,%g0,dest' pattern for word mode and below.  */
2314   if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2315       && arith_operand (src, GET_MODE (src)))
2316     {
2317       if (TARGET_ARCH64)
2318         return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2319       else
2320         return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
2321     }
2322
2323   /* The 'restore src,%g0,dest' pattern for double-word mode.  */
2324   else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
2325            && arith_double_operand (src, GET_MODE (src)))
2326     return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
2327
2328   /* The 'restore src,%g0,dest' pattern for float if no FPU.  */
2329   else if (! TARGET_FPU && register_operand (src, SFmode))
2330     return 1;
2331
2332   /* The 'restore src,%g0,dest' pattern for double if no FPU.  */
2333   else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
2334     return 1;
2335
2336   /* If we have the 'return' instruction, anything that does not use
2337      local or output registers and can go into a delay slot wins.  */
2338   else if (return_p && TARGET_V9 && ! epilogue_renumber (&pat, 1)
2339            && (get_attr_in_uncond_branch_delay (trial)
2340                == IN_UNCOND_BRANCH_DELAY_TRUE))
2341     return 1;
2342
2343   /* The 'restore src1,src2,dest' pattern for SImode.  */
2344   else if (GET_CODE (src) == PLUS
2345            && register_operand (XEXP (src, 0), SImode)
2346            && arith_operand (XEXP (src, 1), SImode))
2347     return 1;
2348
2349   /* The 'restore src1,src2,dest' pattern for DImode.  */
2350   else if (GET_CODE (src) == PLUS
2351            && register_operand (XEXP (src, 0), DImode)
2352            && arith_double_operand (XEXP (src, 1), DImode))
2353     return 1;
2354
2355   /* The 'restore src1,%lo(src2),dest' pattern.  */
2356   else if (GET_CODE (src) == LO_SUM
2357            && ! TARGET_CM_MEDMID
2358            && ((register_operand (XEXP (src, 0), SImode)
2359                 && immediate_operand (XEXP (src, 1), SImode))
2360                || (TARGET_ARCH64
2361                    && register_operand (XEXP (src, 0), DImode)
2362                    && immediate_operand (XEXP (src, 1), DImode))))
2363     return 1;
2364
2365   /* The 'restore src,src,dest' pattern.  */
2366   else if (GET_CODE (src) == ASHIFT
2367            && (register_operand (XEXP (src, 0), SImode)
2368                || register_operand (XEXP (src, 0), DImode))
2369            && XEXP (src, 1) == const1_rtx)
2370     return 1;
2371
2372   return 0;
2373 }
2374
2375 /* Return nonzero if TRIAL can go into the function return's
2376    delay slot.  */
2377
2378 int
2379 eligible_for_return_delay (rtx trial)
2380 {
2381   rtx pat;
2382
2383   if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2384     return 0;
2385
2386   if (get_attr_length (trial) != 1)
2387     return 0;
2388
2389   /* If there are any call-saved registers, we should scan TRIAL if it
2390      does not reference them.  For now just make it easy.  */
2391   if (num_gfregs)
2392     return 0;
2393
2394   /* If the function uses __builtin_eh_return, the eh_return machinery
2395      occupies the delay slot.  */
2396   if (current_function_calls_eh_return)
2397     return 0;
2398
2399   /* In the case of a true leaf function, anything can go into the slot.  */
2400   if (sparc_leaf_function_p)
2401     return get_attr_in_uncond_branch_delay (trial)
2402            == IN_UNCOND_BRANCH_DELAY_TRUE;
2403
2404   pat = PATTERN (trial);
2405
2406   /* Otherwise, only operations which can be done in tandem with
2407      a `restore' or `return' insn can go into the delay slot.  */
2408   if (GET_CODE (SET_DEST (pat)) != REG
2409       || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24))
2410     return 0;
2411
2412   /* If this instruction sets up floating point register and we have a return
2413      instruction, it can probably go in.  But restore will not work
2414      with FP_REGS.  */
2415   if (REGNO (SET_DEST (pat)) >= 32)
2416     return (TARGET_V9
2417             && ! epilogue_renumber (&pat, 1)
2418             && (get_attr_in_uncond_branch_delay (trial)
2419                 == IN_UNCOND_BRANCH_DELAY_TRUE));
2420
2421   return eligible_for_restore_insn (trial, true);
2422 }
2423
2424 /* Return nonzero if TRIAL can go into the sibling call's
2425    delay slot.  */
2426
2427 int
2428 eligible_for_sibcall_delay (rtx trial)
2429 {
2430   rtx pat;
2431
2432   if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
2433     return 0;
2434
2435   if (get_attr_length (trial) != 1)
2436     return 0;
2437
2438   pat = PATTERN (trial);
2439
2440   if (sparc_leaf_function_p)
2441     {
2442       /* If the tail call is done using the call instruction,
2443          we have to restore %o7 in the delay slot.  */
2444       if (LEAF_SIBCALL_SLOT_RESERVED_P)
2445         return 0;
2446
2447       /* %g1 is used to build the function address */
2448       if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
2449         return 0;
2450
2451       return 1;
2452     }
2453
2454   /* Otherwise, only operations which can be done in tandem with
2455      a `restore' insn can go into the delay slot.  */
2456   if (GET_CODE (SET_DEST (pat)) != REG
2457       || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
2458       || REGNO (SET_DEST (pat)) >= 32)
2459     return 0;
2460
2461   /* If it mentions %o7, it can't go in, because sibcall will clobber it
2462      in most cases.  */
2463   if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
2464     return 0;
2465
2466   return eligible_for_restore_insn (trial, false);
2467 }
2468
2469 int
2470 short_branch (int uid1, int uid2)
2471 {
2472   int delta = INSN_ADDRESSES (uid1) - INSN_ADDRESSES (uid2);
2473
2474   /* Leave a few words of "slop".  */
2475   if (delta >= -1023 && delta <= 1022)
2476     return 1;
2477
2478   return 0;
2479 }
2480
2481 /* Return nonzero if REG is not used after INSN.
2482    We assume REG is a reload reg, and therefore does
2483    not live past labels or calls or jumps.  */
2484 int
2485 reg_unused_after (rtx reg, rtx insn)
2486 {
2487   enum rtx_code code, prev_code = UNKNOWN;
2488
2489   while ((insn = NEXT_INSN (insn)))
2490     {
2491       if (prev_code == CALL_INSN && call_used_regs[REGNO (reg)])
2492         return 1;
2493
2494       code = GET_CODE (insn);
2495       if (GET_CODE (insn) == CODE_LABEL)
2496         return 1;
2497
2498       if (INSN_P (insn))
2499         {
2500           rtx set = single_set (insn);
2501           int in_src = set && reg_overlap_mentioned_p (reg, SET_SRC (set));
2502           if (set && in_src)
2503             return 0;
2504           if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
2505             return 1;
2506           if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
2507             return 0;
2508         }
2509       prev_code = code;
2510     }
2511   return 1;
2512 }
2513 \f
2514 /* Determine if it's legal to put X into the constant pool.  This
2515    is not possible if X contains the address of a symbol that is
2516    not constant (TLS) or not known at final link time (PIC).  */
2517
2518 static bool
2519 sparc_cannot_force_const_mem (rtx x)
2520 {
2521   switch (GET_CODE (x))
2522     {
2523     case CONST_INT:
2524     case CONST_DOUBLE:
2525     case CONST_VECTOR:
2526       /* Accept all non-symbolic constants.  */
2527       return false;
2528
2529     case LABEL_REF:
2530       /* Labels are OK iff we are non-PIC.  */
2531       return flag_pic != 0;
2532
2533     case SYMBOL_REF:
2534       /* 'Naked' TLS symbol references are never OK,
2535          non-TLS symbols are OK iff we are non-PIC.  */
2536       if (SYMBOL_REF_TLS_MODEL (x))
2537         return true;
2538       else
2539         return flag_pic != 0;
2540
2541     case CONST:
2542       return sparc_cannot_force_const_mem (XEXP (x, 0));
2543     case PLUS:
2544     case MINUS:
2545       return sparc_cannot_force_const_mem (XEXP (x, 0))
2546          || sparc_cannot_force_const_mem (XEXP (x, 1));
2547     case UNSPEC:
2548       return true;
2549     default:
2550       gcc_unreachable ();
2551     }
2552 }
2553 \f
2554 /* PIC support.  */
2555 static GTY(()) char pic_helper_symbol_name[256];
2556 static GTY(()) rtx pic_helper_symbol;
2557 static GTY(()) bool pic_helper_emitted_p = false;
2558 static GTY(()) rtx global_offset_table;
2559
2560 /* Ensure that we are not using patterns that are not OK with PIC.  */
2561
2562 int
2563 check_pic (int i)
2564 {
2565   switch (flag_pic)
2566     {
2567     case 1:
2568       gcc_assert (GET_CODE (recog_data.operand[i]) != SYMBOL_REF
2569                   && (GET_CODE (recog_data.operand[i]) != CONST
2570                   || (GET_CODE (XEXP (recog_data.operand[i], 0)) == MINUS
2571                       && (XEXP (XEXP (recog_data.operand[i], 0), 0)
2572                           == global_offset_table)
2573                       && (GET_CODE (XEXP (XEXP (recog_data.operand[i], 0), 1))
2574                           == CONST))));
2575     case 2:
2576     default:
2577       return 1;
2578     }
2579 }
2580
2581 /* Return true if X is an address which needs a temporary register when
2582    reloaded while generating PIC code.  */
2583
2584 int
2585 pic_address_needs_scratch (rtx x)
2586 {
2587   /* An address which is a symbolic plus a non SMALL_INT needs a temp reg.  */
2588   if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
2589       && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
2590       && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2591       && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
2592     return 1;
2593
2594   return 0;
2595 }
2596
2597 /* Determine if a given RTX is a valid constant.  We already know this
2598    satisfies CONSTANT_P.  */
2599
2600 bool
2601 legitimate_constant_p (rtx x)
2602 {
2603   rtx inner;
2604
2605   switch (GET_CODE (x))
2606     {
2607     case SYMBOL_REF:
2608       /* TLS symbols are not constant.  */
2609       if (SYMBOL_REF_TLS_MODEL (x))
2610         return false;
2611       break;
2612
2613     case CONST:
2614       inner = XEXP (x, 0);
2615
2616       /* Offsets of TLS symbols are never valid.
2617          Discourage CSE from creating them.  */
2618       if (GET_CODE (inner) == PLUS
2619           && tls_symbolic_operand (XEXP (inner, 0)))
2620         return false;
2621       break;
2622
2623     case CONST_DOUBLE:
2624       if (GET_MODE (x) == VOIDmode)
2625         return true;
2626
2627       /* Floating point constants are generally not ok.
2628          The only exception is 0.0 in VIS.  */
2629       if (TARGET_VIS
2630           && (GET_MODE (x) == SFmode
2631               || GET_MODE (x) == DFmode
2632               || GET_MODE (x) == TFmode)
2633           && const_zero_operand (x, GET_MODE (x)))
2634         return true;
2635
2636       return false;
2637
2638     default:
2639       break;
2640     }
2641
2642   return true;
2643 }
2644
2645 /* Determine if a given RTX is a valid constant address.  */
2646
2647 bool
2648 constant_address_p (rtx x)
2649 {
2650   switch (GET_CODE (x))
2651     {
2652     case LABEL_REF:
2653     case CONST_INT:
2654     case HIGH:
2655       return true;
2656
2657     case CONST:
2658       if (flag_pic && pic_address_needs_scratch (x))
2659         return false;
2660       return legitimate_constant_p (x);
2661
2662     case SYMBOL_REF:
2663       return !flag_pic && legitimate_constant_p (x);
2664
2665     default:
2666       return false;
2667     }
2668 }
2669
2670 /* Nonzero if the constant value X is a legitimate general operand
2671    when generating PIC code.  It is given that flag_pic is on and
2672    that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
2673
2674 bool
2675 legitimate_pic_operand_p (rtx x)
2676 {
2677   if (pic_address_needs_scratch (x))
2678     return false;
2679   if (tls_symbolic_operand (x)
2680       || (GET_CODE (x) == CONST
2681           && GET_CODE (XEXP (x, 0)) == PLUS
2682           && tls_symbolic_operand (XEXP (XEXP (x, 0), 0))))
2683     return false;
2684   return true;
2685 }
2686
2687 /* Return nonzero if ADDR is a valid memory address.
2688    STRICT specifies whether strict register checking applies.  */
2689
2690 int
2691 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
2692 {
2693   rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
2694
2695   if (REG_P (addr) || GET_CODE (addr) == SUBREG)
2696     rs1 = addr;
2697   else if (GET_CODE (addr) == PLUS)
2698     {
2699       rs1 = XEXP (addr, 0);
2700       rs2 = XEXP (addr, 1);
2701
2702       /* Canonicalize.  REG comes first, if there are no regs,
2703          LO_SUM comes first.  */
2704       if (!REG_P (rs1)
2705           && GET_CODE (rs1) != SUBREG
2706           && (REG_P (rs2)
2707               || GET_CODE (rs2) == SUBREG
2708               || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
2709         {
2710           rs1 = XEXP (addr, 1);
2711           rs2 = XEXP (addr, 0);
2712         }
2713
2714       if ((flag_pic == 1
2715            && rs1 == pic_offset_table_rtx
2716            && !REG_P (rs2)
2717            && GET_CODE (rs2) != SUBREG
2718            && GET_CODE (rs2) != LO_SUM
2719            && GET_CODE (rs2) != MEM
2720            && !tls_symbolic_operand (rs2)
2721            && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
2722            && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
2723           || ((REG_P (rs1)
2724                || GET_CODE (rs1) == SUBREG)
2725               && RTX_OK_FOR_OFFSET_P (rs2)))
2726         {
2727           imm1 = rs2;
2728           rs2 = NULL;
2729         }
2730       else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
2731                && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
2732         {
2733           /* We prohibit REG + REG for TFmode when there are no quad move insns
2734              and we consequently need to split.  We do this because REG+REG
2735              is not an offsettable address.  If we get the situation in reload
2736              where source and destination of a movtf pattern are both MEMs with
2737              REG+REG address, then only one of them gets converted to an
2738              offsettable address.  */
2739           if (mode == TFmode
2740               && ! (TARGET_FPU && TARGET_ARCH64 && TARGET_HARD_QUAD))
2741             return 0;
2742
2743           /* We prohibit REG + REG on ARCH32 if not optimizing for
2744              DFmode/DImode because then mem_min_alignment is likely to be zero
2745              after reload and the  forced split would lack a matching splitter
2746              pattern.  */
2747           if (TARGET_ARCH32 && !optimize
2748               && (mode == DFmode || mode == DImode))
2749             return 0;
2750         }
2751       else if (USE_AS_OFFSETABLE_LO10
2752                && GET_CODE (rs1) == LO_SUM
2753                && TARGET_ARCH64
2754                && ! TARGET_CM_MEDMID
2755                && RTX_OK_FOR_OLO10_P (rs2))
2756         {
2757           rs2 = NULL;
2758           imm1 = XEXP (rs1, 1);
2759           rs1 = XEXP (rs1, 0);
2760           if (! CONSTANT_P (imm1) || tls_symbolic_operand (rs1))
2761             return 0;
2762         }
2763     }
2764   else if (GET_CODE (addr) == LO_SUM)
2765     {
2766       rs1 = XEXP (addr, 0);
2767       imm1 = XEXP (addr, 1);
2768
2769       if (! CONSTANT_P (imm1) || tls_symbolic_operand (rs1))
2770         return 0;
2771
2772       /* We can't allow TFmode in 32-bit mode, because an offset greater
2773          than the alignment (8) may cause the LO_SUM to overflow.  */
2774       if (mode == TFmode && TARGET_ARCH32)
2775         return 0;
2776     }
2777   else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
2778     return 1;
2779   else
2780     return 0;
2781
2782   if (GET_CODE (rs1) == SUBREG)
2783     rs1 = SUBREG_REG (rs1);
2784   if (!REG_P (rs1))
2785     return 0;
2786
2787   if (rs2)
2788     {
2789       if (GET_CODE (rs2) == SUBREG)
2790         rs2 = SUBREG_REG (rs2);
2791       if (!REG_P (rs2))
2792         return 0;
2793     }
2794
2795   if (strict)
2796     {
2797       if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
2798           || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
2799         return 0;
2800     }
2801   else
2802     {
2803       if ((REGNO (rs1) >= 32
2804            && REGNO (rs1) != FRAME_POINTER_REGNUM
2805            && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
2806           || (rs2
2807               && (REGNO (rs2) >= 32
2808                   && REGNO (rs2) != FRAME_POINTER_REGNUM
2809                   && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
2810         return 0;
2811     }
2812   return 1;
2813 }
2814
2815 /* Construct the SYMBOL_REF for the tls_get_offset function.  */
2816
2817 static GTY(()) rtx sparc_tls_symbol;
2818 static rtx
2819 sparc_tls_get_addr (void)
2820 {
2821   if (!sparc_tls_symbol)
2822     sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
2823
2824   return sparc_tls_symbol;
2825 }
2826
2827 static rtx
2828 sparc_tls_got (void)
2829 {
2830   rtx temp;
2831   if (flag_pic)
2832     {
2833       current_function_uses_pic_offset_table = 1;
2834       return pic_offset_table_rtx;
2835     }
2836
2837   if (!global_offset_table)
2838     global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
2839   temp = gen_reg_rtx (Pmode);
2840   emit_move_insn (temp, global_offset_table);
2841   return temp;
2842 }
2843
2844
2845 /* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
2846    this (thread-local) address.  */
2847
2848 rtx
2849 legitimize_tls_address (rtx addr)
2850 {
2851   rtx temp1, temp2, temp3, ret, o0, got, insn;
2852
2853   gcc_assert (! no_new_pseudos);
2854
2855   if (GET_CODE (addr) == SYMBOL_REF)
2856     switch (SYMBOL_REF_TLS_MODEL (addr))
2857       {
2858       case TLS_MODEL_GLOBAL_DYNAMIC:
2859         start_sequence ();
2860         temp1 = gen_reg_rtx (SImode);
2861         temp2 = gen_reg_rtx (SImode);
2862         ret = gen_reg_rtx (Pmode);
2863         o0 = gen_rtx_REG (Pmode, 8);
2864         got = sparc_tls_got ();
2865         emit_insn (gen_tgd_hi22 (temp1, addr));
2866         emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
2867         if (TARGET_ARCH32)
2868           {
2869             emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
2870             insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
2871                                                    addr, const1_rtx));
2872           }
2873         else
2874           {
2875             emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
2876             insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
2877                                                    addr, const1_rtx));
2878           }
2879         CALL_INSN_FUNCTION_USAGE (insn)
2880           = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, o0),
2881                                CALL_INSN_FUNCTION_USAGE (insn));
2882         insn = get_insns ();
2883         end_sequence ();
2884         emit_libcall_block (insn, ret, o0, addr);
2885         break;
2886
2887       case TLS_MODEL_LOCAL_DYNAMIC:
2888         start_sequence ();
2889         temp1 = gen_reg_rtx (SImode);
2890         temp2 = gen_reg_rtx (SImode);
2891         temp3 = gen_reg_rtx (Pmode);
2892         ret = gen_reg_rtx (Pmode);
2893         o0 = gen_rtx_REG (Pmode, 8);
2894         got = sparc_tls_got ();
2895         emit_insn (gen_tldm_hi22 (temp1));
2896         emit_insn (gen_tldm_lo10 (temp2, temp1));
2897         if (TARGET_ARCH32)
2898           {
2899             emit_insn (gen_tldm_add32 (o0, got, temp2));
2900             insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
2901                                                     const1_rtx));
2902           }
2903         else
2904           {
2905             emit_insn (gen_tldm_add64 (o0, got, temp2));
2906             insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
2907                                                     const1_rtx));
2908           }
2909         CALL_INSN_FUNCTION_USAGE (insn)
2910           = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, o0),
2911                                CALL_INSN_FUNCTION_USAGE (insn));
2912         insn = get_insns ();
2913         end_sequence ();
2914         emit_libcall_block (insn, temp3, o0,
2915                             gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
2916                                             UNSPEC_TLSLD_BASE));
2917         temp1 = gen_reg_rtx (SImode);
2918         temp2 = gen_reg_rtx (SImode);
2919         emit_insn (gen_tldo_hix22 (temp1, addr));
2920         emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
2921         if (TARGET_ARCH32)
2922           emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
2923         else
2924           emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
2925         break;
2926
2927       case TLS_MODEL_INITIAL_EXEC:
2928         temp1 = gen_reg_rtx (SImode);
2929         temp2 = gen_reg_rtx (SImode);
2930         temp3 = gen_reg_rtx (Pmode);
2931         got = sparc_tls_got ();
2932         emit_insn (gen_tie_hi22 (temp1, addr));
2933         emit_insn (gen_tie_lo10 (temp2, temp1, addr));
2934         if (TARGET_ARCH32)
2935           emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
2936         else
2937           emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
2938         if (TARGET_SUN_TLS)
2939           {
2940             ret = gen_reg_rtx (Pmode);
2941             if (TARGET_ARCH32)
2942               emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
2943                                         temp3, addr));
2944             else
2945               emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
2946                                         temp3, addr));
2947           }
2948         else
2949           ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
2950         break;
2951
2952       case TLS_MODEL_LOCAL_EXEC:
2953         temp1 = gen_reg_rtx (Pmode);
2954         temp2 = gen_reg_rtx (Pmode);
2955         if (TARGET_ARCH32)
2956           {
2957             emit_insn (gen_tle_hix22_sp32 (temp1, addr));
2958             emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
2959           }
2960         else
2961           {
2962             emit_insn (gen_tle_hix22_sp64 (temp1, addr));
2963             emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
2964           }
2965         ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
2966         break;
2967
2968       default:
2969         gcc_unreachable ();
2970       }
2971
2972   else
2973     gcc_unreachable ();  /* for now ... */
2974
2975   return ret;
2976 }
2977
2978
2979 /* Legitimize PIC addresses.  If the address is already position-independent,
2980    we return ORIG.  Newly generated position-independent addresses go into a
2981    reg.  This is REG if nonzero, otherwise we allocate register(s) as
2982    necessary.  */
2983
2984 rtx
2985 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
2986                         rtx reg)
2987 {
2988   if (GET_CODE (orig) == SYMBOL_REF)
2989     {
2990       rtx pic_ref, address;
2991       rtx insn;
2992
2993       if (reg == 0)
2994         {
2995           gcc_assert (! reload_in_progress && ! reload_completed);
2996           reg = gen_reg_rtx (Pmode);
2997         }
2998
2999       if (flag_pic == 2)
3000         {
3001           /* If not during reload, allocate another temp reg here for loading
3002              in the address, so that these instructions can be optimized
3003              properly.  */
3004           rtx temp_reg = ((reload_in_progress || reload_completed)
3005                           ? reg : gen_reg_rtx (Pmode));
3006
3007           /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
3008              won't get confused into thinking that these two instructions
3009              are loading in the true address of the symbol.  If in the
3010              future a PIC rtx exists, that should be used instead.  */
3011           if (Pmode == SImode)
3012             {
3013               emit_insn (gen_movsi_high_pic (temp_reg, orig));
3014               emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
3015             }
3016           else
3017             {
3018               emit_insn (gen_movdi_high_pic (temp_reg, orig));
3019               emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
3020             }
3021           address = temp_reg;
3022         }
3023       else
3024         address = orig;
3025
3026       pic_ref = gen_const_mem (Pmode,
3027                                gen_rtx_PLUS (Pmode,
3028                                              pic_offset_table_rtx, address));
3029       current_function_uses_pic_offset_table = 1;
3030       insn = emit_move_insn (reg, pic_ref);
3031       /* Put a REG_EQUAL note on this insn, so that it can be optimized
3032          by loop.  */
3033       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig,
3034                                   REG_NOTES (insn));
3035       return reg;
3036     }
3037   else if (GET_CODE (orig) == CONST)
3038     {
3039       rtx base, offset;
3040
3041       if (GET_CODE (XEXP (orig, 0)) == PLUS
3042           && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
3043         return orig;
3044
3045       if (reg == 0)
3046         {
3047           gcc_assert (! reload_in_progress && ! reload_completed);
3048           reg = gen_reg_rtx (Pmode);
3049         }
3050
3051       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3052       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3053       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3054                                        base == reg ? 0 : reg);
3055
3056       if (GET_CODE (offset) == CONST_INT)
3057         {
3058           if (SMALL_INT (offset))
3059             return plus_constant (base, INTVAL (offset));
3060           else if (! reload_in_progress && ! reload_completed)
3061             offset = force_reg (Pmode, offset);
3062           else
3063             /* If we reach here, then something is seriously wrong.  */
3064             gcc_unreachable ();
3065         }
3066       return gen_rtx_PLUS (Pmode, base, offset);
3067     }
3068   else if (GET_CODE (orig) == LABEL_REF)
3069     /* ??? Why do we do this?  */
3070     /* Now movsi_pic_label_ref uses it, but we ought to be checking that
3071        the register is live instead, in case it is eliminated.  */
3072     current_function_uses_pic_offset_table = 1;
3073
3074   return orig;
3075 }
3076
3077 /* Try machine-dependent ways of modifying an illegitimate address X
3078    to be legitimate.  If we find one, return the new, valid address.
3079
3080    OLDX is the address as it was before break_out_memory_refs was called.
3081    In some cases it is useful to look at this to decide what needs to be done.
3082
3083    MODE is the mode of the operand pointed to by X.  */
3084
3085 rtx
3086 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
3087 {
3088   rtx orig_x = x;
3089
3090   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
3091     x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
3092                       force_operand (XEXP (x, 0), NULL_RTX));
3093   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
3094     x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3095                       force_operand (XEXP (x, 1), NULL_RTX));
3096   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
3097     x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
3098                       XEXP (x, 1));
3099   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
3100     x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3101                       force_operand (XEXP (x, 1), NULL_RTX));
3102
3103   if (x != orig_x && legitimate_address_p (mode, x, FALSE))
3104     return x;
3105
3106   if (tls_symbolic_operand (x))
3107     x = legitimize_tls_address (x);
3108   else if (flag_pic)
3109     x = legitimize_pic_address (x, mode, 0);
3110   else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
3111     x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3112                       copy_to_mode_reg (Pmode, XEXP (x, 1)));
3113   else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
3114     x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
3115                       copy_to_mode_reg (Pmode, XEXP (x, 0)));
3116   else if (GET_CODE (x) == SYMBOL_REF
3117            || GET_CODE (x) == CONST
3118            || GET_CODE (x) == LABEL_REF)
3119     x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
3120   return x;
3121 }
3122
3123 /* Emit the special PIC helper function.  */
3124
3125 static void
3126 emit_pic_helper (void)
3127 {
3128   const char *pic_name = reg_names[REGNO (pic_offset_table_rtx)];
3129   int align;
3130
3131   text_section ();
3132
3133   align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
3134   if (align > 0)
3135     ASM_OUTPUT_ALIGN (asm_out_file, align);
3136   ASM_OUTPUT_LABEL (asm_out_file, pic_helper_symbol_name);
3137   if (flag_delayed_branch)
3138     fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
3139             pic_name, pic_name);
3140   else
3141     fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
3142             pic_name, pic_name);
3143
3144   pic_helper_emitted_p = true;
3145 }
3146
3147 /* Emit code to load the PIC register.  */
3148
3149 static void
3150 load_pic_register (bool delay_pic_helper)
3151 {
3152   int orig_flag_pic = flag_pic;
3153
3154   /* If we haven't initialized the special PIC symbols, do so now.  */
3155   if (!pic_helper_symbol_name[0])
3156     {
3157       ASM_GENERATE_INTERNAL_LABEL (pic_helper_symbol_name, "LADDPC", 0);
3158       pic_helper_symbol = gen_rtx_SYMBOL_REF (Pmode, pic_helper_symbol_name);
3159       global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3160     }
3161
3162   /* If we haven't emitted the special PIC helper function, do so now unless
3163      we are requested to delay it.  */
3164   if (!delay_pic_helper && !pic_helper_emitted_p)
3165     emit_pic_helper ();
3166
3167   flag_pic = 0;
3168   if (TARGET_ARCH64)
3169     emit_insn (gen_load_pcrel_symdi (pic_offset_table_rtx, global_offset_table,
3170                                      pic_helper_symbol));
3171   else
3172     emit_insn (gen_load_pcrel_symsi (pic_offset_table_rtx, global_offset_table,
3173                                      pic_helper_symbol));
3174   flag_pic = orig_flag_pic;
3175
3176   /* Need to emit this whether or not we obey regdecls,
3177      since setjmp/longjmp can cause life info to screw up.
3178      ??? In the case where we don't obey regdecls, this is not sufficient
3179      since we may not fall out the bottom.  */
3180   emit_insn (gen_rtx_USE (VOIDmode, pic_offset_table_rtx));
3181 }
3182 \f
3183 /* Return 1 if RTX is a MEM which is known to be aligned to at
3184    least a DESIRED byte boundary.  */
3185
3186 int
3187 mem_min_alignment (rtx mem, int desired)
3188 {
3189   rtx addr, base, offset;
3190
3191   /* If it's not a MEM we can't accept it.  */
3192   if (GET_CODE (mem) != MEM)
3193     return 0;
3194
3195   /* Obviously...  */
3196   if (!TARGET_UNALIGNED_DOUBLES
3197       && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
3198     return 1;
3199
3200   /* ??? The rest of the function predates MEM_ALIGN so
3201      there is probably a bit of redundancy.  */
3202   addr = XEXP (mem, 0);
3203   base = offset = NULL_RTX;
3204   if (GET_CODE (addr) == PLUS)
3205     {
3206       if (GET_CODE (XEXP (addr, 0)) == REG)
3207         {
3208           base = XEXP (addr, 0);
3209
3210           /* What we are saying here is that if the base
3211              REG is aligned properly, the compiler will make
3212              sure any REG based index upon it will be so
3213              as well.  */
3214           if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
3215             offset = XEXP (addr, 1);
3216           else
3217             offset = const0_rtx;
3218         }
3219     }
3220   else if (GET_CODE (addr) == REG)
3221     {
3222       base = addr;
3223       offset = const0_rtx;
3224     }
3225
3226   if (base != NULL_RTX)
3227     {
3228       int regno = REGNO (base);
3229
3230       if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
3231         {
3232           /* Check if the compiler has recorded some information
3233              about the alignment of the base REG.  If reload has
3234              completed, we already matched with proper alignments.
3235              If not running global_alloc, reload might give us
3236              unaligned pointer to local stack though.  */
3237           if (((cfun != 0
3238                 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
3239                || (optimize && reload_completed))
3240               && (INTVAL (offset) & (desired - 1)) == 0)
3241             return 1;
3242         }
3243       else
3244         {
3245           if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
3246             return 1;
3247         }
3248     }
3249   else if (! TARGET_UNALIGNED_DOUBLES
3250            || CONSTANT_P (addr)
3251            || GET_CODE (addr) == LO_SUM)
3252     {
3253       /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
3254          is true, in which case we can only assume that an access is aligned if
3255          it is to a constant address, or the address involves a LO_SUM.  */
3256       return 1;
3257     }
3258
3259   /* An obviously unaligned address.  */
3260   return 0;
3261 }
3262
3263 \f
3264 /* Vectors to keep interesting information about registers where it can easily
3265    be got.  We used to use the actual mode value as the bit number, but there
3266    are more than 32 modes now.  Instead we use two tables: one indexed by
3267    hard register number, and one indexed by mode.  */
3268
3269 /* The purpose of sparc_mode_class is to shrink the range of modes so that
3270    they all fit (as bit numbers) in a 32 bit word (again).  Each real mode is
3271    mapped into one sparc_mode_class mode.  */
3272
3273 enum sparc_mode_class {
3274   S_MODE, D_MODE, T_MODE, O_MODE,
3275   SF_MODE, DF_MODE, TF_MODE, OF_MODE,
3276   CC_MODE, CCFP_MODE
3277 };
3278
3279 /* Modes for single-word and smaller quantities.  */
3280 #define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
3281
3282 /* Modes for double-word and smaller quantities.  */
3283 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
3284
3285 /* Modes for quad-word and smaller quantities.  */
3286 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
3287
3288 /* Modes for 8-word and smaller quantities.  */
3289 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
3290
3291 /* Modes for single-float quantities.  We must allow any single word or
3292    smaller quantity.  This is because the fix/float conversion instructions
3293    take integer inputs/outputs from the float registers.  */
3294 #define SF_MODES (S_MODES)
3295
3296 /* Modes for double-float and smaller quantities.  */
3297 #define DF_MODES (S_MODES | D_MODES)
3298
3299 /* Modes for double-float only quantities.  */
3300 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
3301
3302 /* Modes for quad-float only quantities.  */
3303 #define TF_ONLY_MODES (1 << (int) TF_MODE)
3304
3305 /* Modes for quad-float and smaller quantities.  */
3306 #define TF_MODES (DF_MODES | TF_ONLY_MODES)
3307
3308 /* Modes for quad-float and double-float quantities.  */
3309 #define TF_MODES_NO_S (DF_MODES_NO_S | TF_ONLY_MODES)
3310
3311 /* Modes for quad-float pair only quantities.  */
3312 #define OF_ONLY_MODES (1 << (int) OF_MODE)
3313
3314 /* Modes for quad-float pairs and smaller quantities.  */
3315 #define OF_MODES (TF_MODES | OF_ONLY_MODES)
3316
3317 #define OF_MODES_NO_S (TF_MODES_NO_S | OF_ONLY_MODES)
3318
3319 /* Modes for condition codes.  */
3320 #define CC_MODES (1 << (int) CC_MODE)
3321 #define CCFP_MODES (1 << (int) CCFP_MODE)
3322
3323 /* Value is 1 if register/mode pair is acceptable on sparc.
3324    The funny mixture of D and T modes is because integer operations
3325    do not specially operate on tetra quantities, so non-quad-aligned
3326    registers can hold quadword quantities (except %o4 and %i4 because
3327    they cross fixed registers).  */
3328
3329 /* This points to either the 32 bit or the 64 bit version.  */
3330 const int *hard_regno_mode_classes;
3331
3332 static const int hard_32bit_mode_classes[] = {
3333   S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
3334   T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
3335   T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
3336   T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
3337
3338   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3339   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3340   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3341   OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
3342
3343   /* FP regs f32 to f63.  Only the even numbered registers actually exist,
3344      and none can hold SFmode/SImode values.  */
3345   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3346   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3347   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3348   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3349
3350   /* %fcc[0123] */
3351   CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
3352
3353   /* %icc */
3354   CC_MODES
3355 };
3356
3357 static const int hard_64bit_mode_classes[] = {
3358   D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3359   O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3360   T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3361   O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
3362
3363   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3364   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3365   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
3366   OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
3367
3368   /* FP regs f32 to f63.  Only the even numbered registers actually exist,
3369      and none can hold SFmode/SImode values.  */
3370   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3371   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3372   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3373   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
3374
3375   /* %fcc[0123] */
3376   CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
3377
3378   /* %icc */
3379   CC_MODES
3380 };
3381
3382 int sparc_mode_class [NUM_MACHINE_MODES];
3383
3384 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
3385
3386 static void
3387 sparc_init_modes (void)
3388 {
3389   int i;
3390
3391   for (i = 0; i < NUM_MACHINE_MODES; i++)
3392     {
3393       switch (GET_MODE_CLASS (i))
3394         {
3395         case MODE_INT:
3396         case MODE_PARTIAL_INT:
3397         case MODE_COMPLEX_INT:
3398           if (GET_MODE_SIZE (i) <= 4)
3399             sparc_mode_class[i] = 1 << (int) S_MODE;
3400           else if (GET_MODE_SIZE (i) == 8)
3401             sparc_mode_class[i] = 1 << (int) D_MODE;
3402           else if (GET_MODE_SIZE (i) == 16)
3403             sparc_mode_class[i] = 1 << (int) T_MODE;
3404           else if (GET_MODE_SIZE (i) == 32)
3405             sparc_mode_class[i] = 1 << (int) O_MODE;
3406           else
3407             sparc_mode_class[i] = 0;
3408           break;
3409         case MODE_VECTOR_INT:
3410           if (GET_MODE_SIZE (i) <= 4)
3411             sparc_mode_class[i] = 1 << (int)SF_MODE;
3412           else if (GET_MODE_SIZE (i) == 8)
3413             sparc_mode_class[i] = 1 << (int)DF_MODE;
3414           break;
3415         case MODE_FLOAT:
3416         case MODE_COMPLEX_FLOAT:
3417           if (GET_MODE_SIZE (i) <= 4)
3418             sparc_mode_class[i] = 1 << (int) SF_MODE;
3419           else if (GET_MODE_SIZE (i) == 8)
3420             sparc_mode_class[i] = 1 << (int) DF_MODE;
3421           else if (GET_MODE_SIZE (i) == 16)
3422             sparc_mode_class[i] = 1 << (int) TF_MODE;
3423           else if (GET_MODE_SIZE (i) == 32)
3424             sparc_mode_class[i] = 1 << (int) OF_MODE;
3425           else
3426             sparc_mode_class[i] = 0;
3427           break;
3428         case MODE_CC:
3429           if (i == (int) CCFPmode || i == (int) CCFPEmode)
3430             sparc_mode_class[i] = 1 << (int) CCFP_MODE;
3431           else
3432             sparc_mode_class[i] = 1 << (int) CC_MODE;
3433           break;
3434         default:
3435           sparc_mode_class[i] = 0;
3436           break;
3437         }
3438     }
3439
3440   if (TARGET_ARCH64)
3441     hard_regno_mode_classes = hard_64bit_mode_classes;
3442   else
3443     hard_regno_mode_classes = hard_32bit_mode_classes;
3444
3445   /* Initialize the array used by REGNO_REG_CLASS.  */
3446   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3447     {
3448       if (i < 16 && TARGET_V8PLUS)
3449         sparc_regno_reg_class[i] = I64_REGS;
3450       else if (i < 32 || i == FRAME_POINTER_REGNUM)
3451         sparc_regno_reg_class[i] = GENERAL_REGS;
3452       else if (i < 64)
3453         sparc_regno_reg_class[i] = FP_REGS;
3454       else if (i < 96)
3455         sparc_regno_reg_class[i] = EXTRA_FP_REGS;
3456       else if (i < 100)
3457         sparc_regno_reg_class[i] = FPCC_REGS;
3458       else
3459         sparc_regno_reg_class[i] = NO_REGS;
3460     }
3461 }
3462 \f
3463 /* Compute the frame size required by the function.  This function is called
3464    during the reload pass and also by sparc_expand_prologue.  */
3465
3466 HOST_WIDE_INT
3467 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function_p)
3468 {
3469   int outgoing_args_size = (current_function_outgoing_args_size
3470                             + REG_PARM_STACK_SPACE (current_function_decl));
3471   int n_regs = 0;  /* N_REGS is the number of 4-byte regs saved thus far.  */
3472   int i;
3473
3474   if (TARGET_ARCH64)
3475     {
3476       for (i = 0; i < 8; i++)
3477         if (regs_ever_live[i] && ! call_used_regs[i])
3478           n_regs += 2;
3479     }
3480   else
3481     {
3482       for (i = 0; i < 8; i += 2)
3483         if ((regs_ever_live[i] && ! call_used_regs[i])
3484             || (regs_ever_live[i+1] && ! call_used_regs[i+1]))
3485           n_regs += 2;
3486     }
3487
3488   for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
3489     if ((regs_ever_live[i] && ! call_used_regs[i])
3490         || (regs_ever_live[i+1] && ! call_used_regs[i+1]))
3491       n_regs += 2;
3492
3493   /* Set up values for use in prologue and epilogue.  */
3494   num_gfregs = n_regs;
3495
3496   if (leaf_function_p
3497       && n_regs == 0
3498       && size == 0
3499       && current_function_outgoing_args_size == 0)
3500     actual_fsize = apparent_fsize = 0;
3501   else
3502     {
3503       /* We subtract STARTING_FRAME_OFFSET, remember it's negative.  */
3504       apparent_fsize = (size - STARTING_FRAME_OFFSET + 7) & -8;
3505       apparent_fsize += n_regs * 4;
3506       actual_fsize = apparent_fsize + ((outgoing_args_size + 7) & -8);
3507     }
3508
3509   /* Make sure nothing can clobber our register windows.
3510      If a SAVE must be done, or there is a stack-local variable,
3511      the register window area must be allocated.
3512      ??? For v8 we apparently need an additional 8 bytes of reserved space.  */
3513   if (! leaf_function_p || size > 0)
3514     actual_fsize += (16 * UNITS_PER_WORD) + (TARGET_ARCH64 ? 0 : 8);
3515
3516   return SPARC_STACK_ALIGN (actual_fsize);
3517 }
3518
3519 /* Output any necessary .register pseudo-ops.  */
3520
3521 void
3522 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
3523 {
3524 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
3525   int i;
3526
3527   if (TARGET_ARCH32)
3528     return;
3529
3530   /* Check if %g[2367] were used without
3531      .register being printed for them already.  */
3532   for (i = 2; i < 8; i++)
3533     {
3534       if (regs_ever_live [i]
3535           && ! sparc_hard_reg_printed [i])
3536         {
3537           sparc_hard_reg_printed [i] = 1;
3538           fprintf (file, "\t.register\t%%g%d, #scratch\n", i);
3539         }
3540       if (i == 3) i = 5;
3541     }
3542 #endif
3543 }
3544
3545 /* Save/restore call-saved registers from LOW to HIGH at BASE+OFFSET
3546    as needed.  LOW should be double-word aligned for 32-bit registers.
3547    Return the new OFFSET.  */
3548
3549 #define SORR_SAVE    0
3550 #define SORR_RESTORE 1
3551
3552 static int
3553 save_or_restore_regs (int low, int high, rtx base, int offset, int action)
3554 {
3555   rtx mem, insn;
3556   int i;
3557
3558   if (TARGET_ARCH64 && high <= 32)
3559     {
3560       for (i = low; i < high; i++)
3561         {
3562           if (regs_ever_live[i] && ! call_used_regs[i])
3563             {
3564               mem = gen_rtx_MEM (DImode, plus_constant (base, offset));
3565               set_mem_alias_set (mem, sparc_sr_alias_set);
3566               if (action == SORR_SAVE)
3567                 {
3568                   insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
3569                   RTX_FRAME_RELATED_P (insn) = 1;
3570                 }
3571               else  /* action == SORR_RESTORE */
3572                 emit_move_insn (gen_rtx_REG (DImode, i), mem);
3573               offset += 8;
3574             }
3575         }
3576     }
3577   else
3578     {
3579       for (i = low; i < high; i += 2)
3580         {
3581           bool reg0 = regs_ever_live[i] && ! call_used_regs[i];
3582           bool reg1 = regs_ever_live[i+1] && ! call_used_regs[i+1];
3583           enum machine_mode mode;
3584           int regno;
3585
3586           if (reg0 && reg1)
3587             {
3588               mode = i < 32 ? DImode : DFmode;
3589               regno = i;
3590             }
3591           else if (reg0)
3592             {
3593               mode = i < 32 ? SImode : SFmode;
3594               regno = i;
3595             }
3596           else if (reg1)
3597             {
3598               mode = i < 32 ? SImode : SFmode;
3599               regno = i + 1;
3600               offset += 4;
3601             }
3602           else
3603             continue;
3604
3605           mem = gen_rtx_MEM (mode, plus_constant (base, offset));
3606           set_mem_alias_set (mem, sparc_sr_alias_set);
3607           if (action == SORR_SAVE)
3608             {
3609               insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
3610               RTX_FRAME_RELATED_P (insn) = 1;
3611             }
3612           else  /* action == SORR_RESTORE */
3613             emit_move_insn (gen_rtx_REG (mode, regno), mem);
3614
3615           /* Always preserve double-word alignment.  */
3616           offset = (offset + 7) & -8;
3617         }
3618     }
3619
3620   return offset;
3621 }
3622
3623 /* Emit code to save call-saved registers.  */
3624
3625 static void
3626 emit_save_regs (void)
3627 {
3628   HOST_WIDE_INT offset;
3629   rtx base;
3630
3631   offset = frame_base_offset - apparent_fsize;
3632
3633   if (offset < -4096 || offset + num_gfregs * 4 > 4096)
3634     {
3635       /* ??? This might be optimized a little as %g1 might already have a
3636          value close enough that a single add insn will do.  */
3637       /* ??? Although, all of this is probably only a temporary fix
3638          because if %g1 can hold a function result, then
3639          sparc_expand_epilogue will lose (the result will be
3640          clobbered).  */
3641       base = gen_rtx_REG (Pmode, 1);
3642       emit_move_insn (base, GEN_INT (offset));
3643       emit_insn (gen_rtx_SET (VOIDmode,
3644                               base,
3645                               gen_rtx_PLUS (Pmode, frame_base_reg, base)));
3646       offset = 0;
3647     }
3648   else
3649     base = frame_base_reg;
3650
3651   offset = save_or_restore_regs (0, 8, base, offset, SORR_SAVE);
3652   save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, SORR_SAVE);
3653 }
3654
3655 /* Emit code to restore call-saved registers.  */
3656
3657 static void
3658 emit_restore_regs (void)
3659 {
3660   HOST_WIDE_INT offset;
3661   rtx base;
3662
3663   offset = frame_base_offset - apparent_fsize;
3664
3665   if (offset < -4096 || offset + num_gfregs * 4 > 4096 - 8 /*double*/)
3666     {
3667       base = gen_rtx_REG (Pmode, 1);
3668       emit_move_insn (base, GEN_INT (offset));
3669       emit_insn (gen_rtx_SET (VOIDmode,
3670                               base,
3671                               gen_rtx_PLUS (Pmode, frame_base_reg, base)));
3672       offset = 0;
3673     }
3674   else
3675     base = frame_base_reg;
3676
3677   offset = save_or_restore_regs (0, 8, base, offset, SORR_RESTORE);
3678   save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, SORR_RESTORE);
3679 }
3680
3681 /* Generate a save_register_window insn.  */
3682
3683 static rtx
3684 gen_save_register_window (rtx increment)
3685 {
3686   if (TARGET_ARCH64)
3687     return gen_save_register_windowdi (increment);
3688   else
3689     return gen_save_register_windowsi (increment);
3690 }
3691
3692 /* Generate an increment for the stack pointer.  */
3693
3694 static rtx
3695 gen_stack_pointer_inc (rtx increment)
3696 {
3697   if (TARGET_ARCH64)
3698     return gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, increment);
3699   else
3700     return gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, increment);
3701 }
3702
3703 /* Generate a decrement for the stack pointer.  */
3704
3705 static rtx
3706 gen_stack_pointer_dec (rtx decrement)
3707 {
3708   if (TARGET_ARCH64)
3709     return gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, decrement);
3710   else
3711     return gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, decrement);
3712 }
3713
3714 /* Expand the function prologue.  The prologue is responsible for reserving
3715    storage for the frame, saving the call-saved registers and loading the
3716    PIC register if needed.  */
3717
3718 void
3719 sparc_expand_prologue (void)
3720 {
3721   rtx insn;
3722   int i;
3723
3724   /* Compute a snapshot of current_function_uses_only_leaf_regs.  Relying
3725      on the final value of the flag means deferring the prologue/epilogue
3726      expansion until just before the second scheduling pass, which is too
3727      late to emit multiple epilogues or return insns.
3728
3729      Of course we are making the assumption that the value of the flag
3730      will not change between now and its final value.  Of the three parts
3731      of the formula, only the last one can reasonably vary.  Let's take a
3732      closer look, after assuming that the first two ones are set to true
3733      (otherwise the last value is effectively silenced).
3734
3735      If only_leaf_regs_used returns false, the global predicate will also
3736      be false so the actual frame size calculated below will be positive.
3737      As a consequence, the save_register_window insn will be emitted in
3738      the instruction stream; now this insn explicitly references %fp
3739      which is not a leaf register so only_leaf_regs_used will always
3740      return false subsequently.
3741
3742      If only_leaf_regs_used returns true, we hope that the subsequent
3743      optimization passes won't cause non-leaf registers to pop up.  For
3744      example, the regrename pass has special provisions to not rename to
3745      non-leaf registers in a leaf function.  */
3746   sparc_leaf_function_p
3747     = optimize > 0 && leaf_function_p () && only_leaf_regs_used ();
3748
3749   /* Need to use actual_fsize, since we are also allocating
3750      space for our callee (and our own register save area).  */
3751   actual_fsize
3752     = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
3753
3754   /* Advertise that the data calculated just above are now valid.  */
3755   sparc_prologue_data_valid_p = true;
3756
3757   if (sparc_leaf_function_p)
3758     {
3759       frame_base_reg = stack_pointer_rtx;
3760       frame_base_offset = actual_fsize + SPARC_STACK_BIAS;
3761     }
3762   else
3763     {
3764       frame_base_reg = hard_frame_pointer_rtx;
3765       frame_base_offset = SPARC_STACK_BIAS;
3766     }
3767
3768   if (actual_fsize == 0)
3769     /* do nothing.  */ ;
3770   else if (sparc_leaf_function_p)
3771     {
3772       if (actual_fsize <= 4096)
3773         insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-actual_fsize)));
3774       else if (actual_fsize <= 8192)
3775         {
3776           insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
3777           /* %sp is still the CFA register.  */
3778           RTX_FRAME_RELATED_P (insn) = 1;
3779           insn
3780             = emit_insn (gen_stack_pointer_inc (GEN_INT (4096-actual_fsize)));
3781         }
3782       else
3783         {
3784           rtx reg = gen_rtx_REG (Pmode, 1);
3785           emit_move_insn (reg, GEN_INT (-actual_fsize));
3786           insn = emit_insn (gen_stack_pointer_inc (reg));
3787           REG_NOTES (insn) =
3788             gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3789                                PATTERN (gen_stack_pointer_inc (GEN_INT (-actual_fsize))),
3790                                REG_NOTES (insn));
3791         }
3792
3793       RTX_FRAME_RELATED_P (insn) = 1;
3794     }
3795   else
3796     {
3797       if (actual_fsize <= 4096)
3798         insn = emit_insn (gen_save_register_window (GEN_INT (-actual_fsize)));
3799       else if (actual_fsize <= 8192)
3800         {
3801           insn = emit_insn (gen_save_register_window (GEN_INT (-4096)));
3802           /* %sp is not the CFA register anymore.  */
3803           emit_insn (gen_stack_pointer_inc (GEN_INT (4096-actual_fsize)));
3804         }
3805       else
3806         {
3807           rtx reg = gen_rtx_REG (Pmode, 1);
3808           emit_move_insn (reg, GEN_INT (-actual_fsize));
3809           insn = emit_insn (gen_save_register_window (reg));
3810         }
3811
3812       RTX_FRAME_RELATED_P (insn) = 1;
3813       for (i=0; i < XVECLEN (PATTERN (insn), 0); i++)
3814         RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, i)) = 1;
3815     }
3816
3817   /* Call-saved registers are saved just above the outgoing argument area.  */
3818   if (num_gfregs)
3819     emit_save_regs ();
3820
3821   /* Load the PIC register if needed.  */
3822   if (flag_pic && current_function_uses_pic_offset_table)
3823     load_pic_register (false);
3824 }
3825
3826 /* This function generates the assembly code for function entry, which boils
3827    down to emitting the necessary .register directives.
3828
3829    ??? Historical cruft: "On SPARC, move-double insns between fpu and cpu need
3830    an 8-byte block of memory.  If any fpu reg is used in the function, we
3831    allocate such a block here, at the bottom of the frame, just in case it's
3832    needed."  Could this explain the -8 in emit_restore_regs?  */
3833
3834 static void
3835 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3836 {
3837   /* Check that the assumption we made in sparc_expand_prologue is valid.  */
3838   gcc_assert (sparc_leaf_function_p == current_function_uses_only_leaf_regs);
3839
3840   sparc_output_scratch_registers (file);
3841 }
3842
3843 /* Expand the function epilogue, either normal or part of a sibcall.
3844    We emit all the instructions except the return or the call.  */
3845
3846 void
3847 sparc_expand_epilogue (void)
3848 {
3849   if (num_gfregs)
3850     emit_restore_regs ();
3851
3852   if (actual_fsize == 0)
3853     /* do nothing.  */ ;
3854   else if (sparc_leaf_function_p)
3855     {
3856       if (actual_fsize <= 4096)
3857         emit_insn (gen_stack_pointer_dec (GEN_INT (- actual_fsize)));
3858       else if (actual_fsize <= 8192)
3859         {
3860           emit_insn (gen_stack_pointer_dec (GEN_INT (-4096)));
3861           emit_insn (gen_stack_pointer_dec (GEN_INT (4096 - actual_fsize)));
3862         }
3863       else
3864         {
3865           rtx reg = gen_rtx_REG (Pmode, 1);
3866           emit_move_insn (reg, GEN_INT (-actual_fsize));
3867           emit_insn (gen_stack_pointer_dec (reg));
3868         }
3869     }
3870 }
3871
3872 /* Return true if it is appropriate to emit `return' instructions in the
3873    body of a function.  */
3874
3875 bool
3876 sparc_can_use_return_insn_p (void)
3877 {
3878   return sparc_prologue_data_valid_p
3879          && (actual_fsize == 0 || !sparc_leaf_function_p);
3880 }
3881
3882 /* This function generates the assembly code for function exit.  */
3883
3884 static void
3885 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3886 {
3887   /* If code does not drop into the epilogue, we have to still output
3888      a dummy nop for the sake of sane backtraces.  Otherwise, if the
3889      last two instructions of a function were "call foo; dslot;" this
3890      can make the return PC of foo (i.e. address of call instruction
3891      plus 8) point to the first instruction in the next function.  */
3892
3893   rtx insn, last_real_insn;
3894
3895   insn = get_last_insn ();
3896
3897   last_real_insn = prev_real_insn (insn);
3898   if (last_real_insn
3899       && GET_CODE (last_real_insn) == INSN
3900       && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
3901     last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
3902
3903   if (last_real_insn && GET_CODE (last_real_insn) == CALL_INSN)
3904     fputs("\tnop\n", file);
3905
3906   sparc_output_deferred_case_vectors ();
3907 }
3908
3909 /* Output a 'restore' instruction.  */
3910
3911 static void
3912 output_restore (rtx pat)
3913 {
3914   rtx operands[3];
3915
3916   if (! pat)
3917     {
3918       fputs ("\t restore\n", asm_out_file);
3919       return;
3920     }
3921
3922   gcc_assert (GET_CODE (pat) == SET);
3923
3924   operands[0] = SET_DEST (pat);
3925   pat = SET_SRC (pat);
3926
3927   switch (GET_CODE (pat))
3928     {
3929       case PLUS:
3930         operands[1] = XEXP (pat, 0);
3931         operands[2] = XEXP (pat, 1);
3932         output_asm_insn (" restore %r1, %2, %Y0", operands);
3933         break;
3934       case LO_SUM:
3935         operands[1] = XEXP (pat, 0);
3936         operands[2] = XEXP (pat, 1);
3937         output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
3938         break;
3939       case ASHIFT:
3940         operands[1] = XEXP (pat, 0);
3941         gcc_assert (XEXP (pat, 1) == const1_rtx);
3942         output_asm_insn (" restore %r1, %r1, %Y0", operands);
3943         break;
3944       default:
3945         operands[1] = pat;
3946         output_asm_insn (" restore %%g0, %1, %Y0", operands);
3947         break;
3948     }
3949 }
3950
3951 /* Output a return.  */
3952
3953 const char *
3954 output_return (rtx insn)
3955 {
3956   if (sparc_leaf_function_p)
3957     {
3958       /* This is a leaf function so we don't have to bother restoring the
3959          register window, which frees us from dealing with the convoluted
3960          semantics of restore/return.  We simply output the jump to the
3961          return address and the insn in the delay slot (if any).  */
3962
3963       gcc_assert (! current_function_calls_eh_return);
3964
3965       return "jmp\t%%o7+%)%#";
3966     }
3967   else
3968     {
3969       /* This is a regular function so we have to restore the register window.
3970          We may have a pending insn for the delay slot, which will be either
3971          combined with the 'restore' instruction or put in the delay slot of
3972          the 'return' instruction.  */
3973
3974       if (current_function_calls_eh_return)
3975         {
3976           /* If the function uses __builtin_eh_return, the eh_return
3977              machinery occupies the delay slot.  */
3978           gcc_assert (! final_sequence);
3979
3980           if (! flag_delayed_branch)
3981             fputs ("\tadd\t%fp, %g1, %fp\n", asm_out_file);
3982
3983           if (TARGET_V9)
3984             fputs ("\treturn\t%i7+8\n", asm_out_file);
3985           else
3986             fputs ("\trestore\n\tjmp\t%o7+8\n", asm_out_file);
3987
3988           if (flag_delayed_branch)
3989             fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
3990           else
3991             fputs ("\t nop\n", asm_out_file);
3992         }
3993       else if (final_sequence)
3994         {
3995           rtx delay, pat;
3996
3997           delay = NEXT_INSN (insn);
3998           gcc_assert (delay);
3999
4000           pat = PATTERN (delay);
4001
4002           if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
4003             {
4004               epilogue_renumber (&pat, 0);
4005               return "return\t%%i7+%)%#";
4006             }
4007           else
4008             {
4009               output_asm_insn ("jmp\t%%i7+%)", NULL);
4010               output_restore (pat);
4011               PATTERN (delay) = gen_blockage ();
4012               INSN_CODE (delay) = -1;
4013             }
4014         }
4015       else
4016         {
4017           /* The delay slot is empty.  */
4018           if (TARGET_V9)
4019             return "return\t%%i7+%)\n\t nop";
4020           else if (flag_delayed_branch)
4021             return "jmp\t%%i7+%)\n\t restore";
4022           else
4023             return "restore\n\tjmp\t%%o7+%)\n\t nop";
4024         }
4025     }
4026
4027   return "";
4028 }
4029
4030 /* Output a sibling call.  */
4031
4032 const char *
4033 output_sibcall (rtx insn, rtx call_operand)
4034 {
4035   rtx operands[1];
4036
4037   gcc_assert (flag_delayed_branch);
4038
4039   operands[0] = call_operand;
4040
4041   if (sparc_leaf_function_p)
4042     {
4043       /* This is a leaf function so we don't have to bother restoring the
4044          register window.  We simply output the jump to the function and
4045          the insn in the delay slot (if any).  */
4046
4047       gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
4048
4049       if (final_sequence)
4050         output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
4051                          operands);
4052       else
4053         /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
4054            it into branch if possible.  */
4055         output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
4056                          operands);
4057     }
4058   else
4059     {
4060       /* This is a regular function so we have to restore the register window.
4061          We may have a pending insn for the delay slot, which will be combined
4062          with the 'restore' instruction.  */
4063
4064       output_asm_insn ("call\t%a0, 0", operands);
4065
4066       if (final_sequence)
4067         {
4068           rtx delay = NEXT_INSN (insn);
4069           gcc_assert (delay);
4070
4071           output_restore (PATTERN (delay));
4072
4073           PATTERN (delay) = gen_blockage ();
4074           INSN_CODE (delay) = -1;
4075         }
4076       else
4077         output_restore (NULL_RTX);
4078     }
4079
4080   return "";
4081 }
4082 \f
4083 /* Functions for handling argument passing.
4084
4085    For 32-bit, the first 6 args are normally in registers and the rest are
4086    pushed.  Any arg that starts within the first 6 words is at least
4087    partially passed in a register unless its data type forbids.
4088
4089    For 64-bit, the argument registers are laid out as an array of 16 elements
4090    and arguments are added sequentially.  The first 6 int args and up to the
4091    first 16 fp args (depending on size) are passed in regs.
4092
4093    Slot    Stack   Integral   Float   Float in structure   Double   Long Double
4094    ----    -----   --------   -----   ------------------   ------   -----------
4095     15   [SP+248]              %f31       %f30,%f31         %d30
4096     14   [SP+240]              %f29       %f28,%f29         %d28       %q28
4097     13   [SP+232]              %f27       %f26,%f27         %d26
4098     12   [SP+224]              %f25       %f24,%f25         %d24       %q24
4099     11   [SP+216]              %f23       %f22,%f23         %d22
4100     10   [SP+208]              %f21       %f20,%f21         %d20       %q20
4101      9   [SP+200]              %f19       %f18,%f19         %d18
4102      8   [SP+192]              %f17       %f16,%f17         %d16       %q16
4103      7   [SP+184]              %f15       %f14,%f15         %d14
4104      6   [SP+176]              %f13       %f12,%f13         %d12       %q12
4105      5   [SP+168]     %o5      %f11       %f10,%f11         %d10
4106      4   [SP+160]     %o4       %f9        %f8,%f9           %d8        %q8
4107      3   [SP+152]     %o3       %f7        %f6,%f7           %d6
4108      2   [SP+144]     %o2       %f5        %f4,%f5           %d4        %q4
4109      1   [SP+136]     %o1       %f3        %f2,%f3           %d2
4110      0   [SP+128]     %o0       %f1        %f0,%f1           %d0        %q0
4111
4112    Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
4113
4114    Integral arguments are always passed as 64-bit quantities appropriately
4115    extended.
4116
4117    Passing of floating point values is handled as follows.
4118    If a prototype is in scope:
4119      If the value is in a named argument (i.e. not a stdarg function or a
4120      value not part of the `...') then the value is passed in the appropriate
4121      fp reg.
4122      If the value is part of the `...' and is passed in one of the first 6
4123      slots then the value is passed in the appropriate int reg.
4124      If the value is part of the `...' and is not passed in one of the first 6
4125      slots then the value is passed in memory.
4126    If a prototype is not in scope:
4127      If the value is one of the first 6 arguments the value is passed in the
4128      appropriate integer reg and the appropriate fp reg.
4129      If the value is not one of the first 6 arguments the value is passed in
4130      the appropriate fp reg and in memory.
4131
4132
4133    Summary of the calling conventions implemented by GCC on SPARC:
4134
4135    32-bit ABI:
4136                                 size      argument     return value
4137
4138       small integer              <4       int. reg.      int. reg.
4139       word                        4       int. reg.      int. reg.
4140       double word                 8       int. reg.      int. reg.
4141
4142       _Complex small integer     <8       int. reg.      int. reg.
4143       _Complex word               8       int. reg.      int. reg.
4144       _Complex double word       16        memory        int. reg.
4145
4146       vector integer            <=8       int. reg.       FP reg.
4147       vector integer             >8        memory         memory
4148
4149       float                       4       int. reg.       FP reg.
4150       double                      8       int. reg.       FP reg.
4151       long double                16        memory         memory
4152
4153       _Complex float              8        memory         FP reg.
4154       _Complex double            16        memory         FP reg.
4155       _Complex long double       32        memory         FP reg.
4156
4157       vector float              any        memory         memory
4158
4159       aggregate                 any        memory         memory
4160
4161
4162
4163     64-bit ABI:
4164                                 size      argument     return value
4165
4166       small integer              <8       int. reg.      int. reg.
4167       word                        8       int. reg.      int. reg.
4168       double word                16       int. reg.      int. reg.
4169
4170       _Complex small integer    <16       int. reg.      int. reg.
4171       _Complex word              16       int. reg.      int. reg.
4172       _Complex double word       32        memory        int. reg.
4173
4174       vector integer           <=16        FP reg.        FP reg.
4175       vector integer       16<s<=32        memory         FP reg.
4176       vector integer            >32        memory         memory
4177
4178       float                       4        FP reg.        FP reg.
4179       double                      8        FP reg.        FP reg.
4180       long double                16        FP reg.        FP reg.
4181
4182       _Complex float              8        FP reg.        FP reg.
4183       _Complex double            16        FP reg.        FP reg.
4184       _Complex long double       32        memory         FP reg.
4185
4186       vector float             <=16        FP reg.        FP reg.
4187       vector float         16<s<=32        memory         FP reg.
4188       vector float              >32        memory         memory
4189
4190       aggregate                <=16         reg.           reg.
4191       aggregate            16<s<=32        memory          reg.
4192       aggregate                 >32        memory         memory
4193
4194
4195
4196 Note #1: complex floating-point types follow the extended SPARC ABIs as
4197 implemented by the Sun compiler.
4198
4199 Note #2: integral vector types follow the scalar floating-point types
4200 conventions to match what is implemented by the Sun VIS SDK.
4201
4202 Note #3: floating-point vector types follow the aggregate types
4203 conventions.  */
4204
4205
4206 /* Maximum number of int regs for args.  */
4207 #define SPARC_INT_ARG_MAX 6
4208 /* Maximum number of fp regs for args.  */
4209 #define SPARC_FP_ARG_MAX 16
4210
4211 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
4212
4213 /* Handle the INIT_CUMULATIVE_ARGS macro.
4214    Initialize a variable CUM of type CUMULATIVE_ARGS
4215    for a call to a function whose data type is FNTYPE.
4216    For a library call, FNTYPE is 0.  */
4217
4218 void
4219 init_cumulative_args (struct sparc_args *cum, tree fntype,
4220                       rtx libname ATTRIBUTE_UNUSED,
4221                       tree fndecl ATTRIBUTE_UNUSED)
4222 {
4223   cum->words = 0;
4224   cum->prototype_p = fntype && TYPE_ARG_TYPES (fntype);
4225   cum->libcall_p = fntype == 0;
4226 }
4227
4228 /* Handle the TARGET_PROMOTE_PROTOTYPES target hook.
4229    When a prototype says `char' or `short', really pass an `int'.  */
4230
4231 static bool
4232 sparc_promote_prototypes (tree fntype ATTRIBUTE_UNUSED)
4233 {
4234   return TARGET_ARCH32 ? true : false;
4235 }
4236
4237 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook.  */
4238
4239 static bool
4240 sparc_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
4241 {
4242   return TARGET_ARCH64 ? true : false;
4243 }
4244
4245 /* Scan the record type TYPE and return the following predicates:
4246     - INTREGS_P: the record contains at least one field or sub-field
4247       that is eligible for promotion in integer registers.
4248     - FP_REGS_P: the record contains at least one field or sub-field
4249       that is eligible for promotion in floating-point registers.
4250     - PACKED_P: the record contains at least one field that is packed.
4251
4252    Sub-fields are not taken into account for the PACKED_P predicate.  */
4253
4254 static void
4255 scan_record_type (tree type, int *intregs_p, int *fpregs_p, int *packed_p)
4256 {
4257   tree field;
4258
4259   for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4260     {
4261       if (TREE_CODE (field) == FIELD_DECL)
4262         {
4263           if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
4264             scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
4265           else if ((FLOAT_TYPE_P (TREE_TYPE (field))
4266                    || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
4267                   && TARGET_FPU)
4268             *fpregs_p = 1;
4269           else
4270             *intregs_p = 1;
4271
4272           if (packed_p && DECL_PACKED (field))
4273             *packed_p = 1;
4274         }
4275     }
4276 }
4277
4278 /* Compute the slot number to pass an argument in.
4279    Return the slot number or -1 if passing on the stack.
4280
4281    CUM is a variable of type CUMULATIVE_ARGS which gives info about
4282     the preceding args and about the function being called.
4283    MODE is the argument's machine mode.
4284    TYPE is the data type of the argument (as a tree).
4285     This is null for libcalls where that information may
4286     not be available.
4287    NAMED is nonzero if this argument is a named parameter
4288     (otherwise it is an extra parameter matching an ellipsis).
4289    INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
4290    *PREGNO records the register number to use if scalar type.
4291    *PPADDING records the amount of padding needed in words.  */
4292
4293 static int
4294 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
4295                      tree type, int named, int incoming_p,
4296                      int *pregno, int *ppadding)
4297 {
4298   int regbase = (incoming_p
4299                  ? SPARC_INCOMING_INT_ARG_FIRST
4300                  : SPARC_OUTGOING_INT_ARG_FIRST);
4301   int slotno = cum->words;
4302   enum mode_class mclass;
4303   int regno;
4304
4305   *ppadding = 0;
4306
4307   if (type && TREE_ADDRESSABLE (type))
4308     return -1;
4309
4310   if (TARGET_ARCH32
4311       && mode == BLKmode
4312       && type
4313       && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
4314     return -1;
4315
4316   /* For SPARC64, objects requiring 16-byte alignment get it.  */
4317   if (TARGET_ARCH64
4318       && GET_MODE_ALIGNMENT (mode) >= 2 * BITS_PER_WORD
4319       && (slotno & 1) != 0)
4320     slotno++, *ppadding = 1;
4321
4322   mclass = GET_MODE_CLASS (mode);
4323   if (type && TREE_CODE (type) == VECTOR_TYPE)
4324     {
4325       /* Vector types deserve special treatment because they are
4326          polymorphic wrt their mode, depending upon whether VIS
4327          instructions are enabled.  */
4328       if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4329         {
4330           /* The SPARC port defines no floating-point vector modes.  */
4331           gcc_assert (mode == BLKmode);
4332         }
4333       else
4334         {
4335           /* Integral vector types should either have a vector
4336              mode or an integral mode, because we are guaranteed
4337              by pass_by_reference that their size is not greater
4338              than 16 bytes and TImode is 16-byte wide.  */
4339           gcc_assert (mode != BLKmode);
4340
4341           /* Vector integers are handled like floats according to
4342              the Sun VIS SDK.  */
4343           mclass = MODE_FLOAT;
4344         }
4345     }
4346
4347   switch (mclass)
4348     {
4349     case MODE_FLOAT:
4350     case MODE_COMPLEX_FLOAT:
4351       if (TARGET_ARCH64 && TARGET_FPU && named)
4352         {
4353           if (slotno >= SPARC_FP_ARG_MAX)
4354             return -1;
4355           regno = SPARC_FP_ARG_FIRST + slotno * 2;
4356           /* Arguments filling only one single FP register are
4357              right-justified in the outer double FP register.  */
4358           if (GET_MODE_SIZE (mode) <= 4)
4359             regno++;
4360           break;
4361         }
4362       /* fallthrough */
4363
4364     case MODE_INT:
4365     case MODE_COMPLEX_INT:
4366       if (slotno >= SPARC_INT_ARG_MAX)
4367         return -1;
4368       regno = regbase + slotno;
4369       break;
4370
4371     case MODE_RANDOM:
4372       if (mode == VOIDmode)
4373         /* MODE is VOIDmode when generating the actual call.  */
4374         return -1;
4375
4376       gcc_assert (mode == BLKmode);
4377
4378       /* For SPARC64, objects requiring 16-byte alignment get it.  */
4379       if (TARGET_ARCH64
4380           && type
4381           && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
4382           && (slotno & 1) != 0)
4383         slotno++, *ppadding = 1;
4384
4385       if (TARGET_ARCH32 || !type || (TREE_CODE (type) == UNION_TYPE))
4386         {
4387           if (slotno >= SPARC_INT_ARG_MAX)
4388             return -1;
4389           regno = regbase + slotno;
4390         }
4391       else  /* TARGET_ARCH64 && type */
4392         {
4393           int intregs_p = 0, fpregs_p = 0, packed_p = 0;
4394
4395           /* First see what kinds of registers we would need.  */
4396           if (TREE_CODE (type) == VECTOR_TYPE)
4397             fpregs_p = 1;
4398           else
4399             scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
4400
4401           /* The ABI obviously doesn't specify how packed structures
4402              are passed.  These are defined to be passed in int regs
4403              if possible, otherwise memory.  */
4404           if (packed_p || !named)
4405             fpregs_p = 0, intregs_p = 1;
4406
4407           /* If all arg slots are filled, then must pass on stack.  */
4408           if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
4409             return -1;
4410
4411           /* If there are only int args and all int arg slots are filled,
4412              then must pass on stack.  */
4413           if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
4414             return -1;
4415
4416           /* Note that even if all int arg slots are filled, fp members may
4417              still be passed in regs if such regs are available.
4418              *PREGNO isn't set because there may be more than one, it's up
4419              to the caller to compute them.  */
4420           return slotno;
4421         }
4422       break;
4423
4424     default :
4425       gcc_unreachable ();
4426     }
4427
4428   *pregno = regno;
4429   return slotno;
4430 }
4431
4432 /* Handle recursive register counting for structure field layout.  */
4433
4434 struct function_arg_record_value_parms
4435 {
4436   rtx ret;              /* return expression being built.  */
4437   int slotno;           /* slot number of the argument.  */
4438   int named;            /* whether the argument is named.  */
4439   int regbase;          /* regno of the base register.  */
4440   int stack;            /* 1 if part of the argument is on the stack.  */
4441   int intoffset;        /* offset of the first pending integer field.  */
4442   unsigned int nregs;   /* number of words passed in registers.  */
4443 };
4444
4445 static void function_arg_record_value_3
4446  (HOST_WIDE_INT, struct function_arg_record_value_parms *);
4447 static void function_arg_record_value_2
4448  (tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
4449 static void function_arg_record_value_1
4450  (tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
4451 static rtx function_arg_record_value (tree, enum machine_mode, int, int, int);
4452 static rtx function_arg_union_value (int, enum machine_mode, int, int);
4453
4454 /* A subroutine of function_arg_record_value.  Traverse the structure
4455    recursively and determine how many registers will be required.  */
4456
4457 static void
4458 function_arg_record_value_1 (tree type, HOST_WIDE_INT startbitpos,
4459                              struct function_arg_record_value_parms *parms,
4460                              bool packed_p)
4461 {
4462   tree field;
4463
4464   /* We need to compute how many registers are needed so we can
4465      allocate the PARALLEL but before we can do that we need to know
4466      whether there are any packed fields.  The ABI obviously doesn't
4467      specify how structures are passed in this case, so they are
4468      defined to be passed in int regs if possible, otherwise memory,
4469      regardless of whether there are fp values present.  */
4470
4471   if (! packed_p)
4472     for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4473       {
4474         if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
4475           {
4476             packed_p = true;
4477             break;
4478           }
4479       }
4480
4481   /* Compute how many registers we need.  */
4482   for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4483     {
4484       if (TREE_CODE (field) == FIELD_DECL)
4485         {
4486           HOST_WIDE_INT bitpos = startbitpos;
4487
4488           if (DECL_SIZE (field) != 0)
4489             {
4490               if (integer_zerop (DECL_SIZE (field)))
4491                 continue;
4492
4493               if (host_integerp (bit_position (field), 1))
4494                 bitpos += int_bit_position (field);
4495             }
4496
4497           /* ??? FIXME: else assume zero offset.  */
4498
4499           if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
4500             function_arg_record_value_1 (TREE_TYPE (field),
4501                                          bitpos,
4502                                          parms,
4503                                          packed_p);
4504           else if ((FLOAT_TYPE_P (TREE_TYPE (field))
4505                     || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
4506                    && TARGET_FPU
4507                    && parms->named
4508                    && ! packed_p)
4509             {
4510               if (parms->intoffset != -1)
4511                 {
4512                   unsigned int startbit, endbit;
4513                   int intslots, this_slotno;
4514
4515                   startbit = parms->intoffset & -BITS_PER_WORD;
4516                   endbit   = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
4517
4518                   intslots = (endbit - startbit) / BITS_PER_WORD;
4519                   this_slotno = parms->slotno + parms->intoffset
4520                     / BITS_PER_WORD;
4521
4522                   if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
4523                     {
4524                       intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
4525                       /* We need to pass this field on the stack.  */
4526                       parms->stack = 1;
4527                     }
4528
4529                   parms->nregs += intslots;
4530                   parms->intoffset = -1;
4531                 }
4532
4533               /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
4534                  If it wasn't true we wouldn't be here.  */
4535               if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
4536                   && DECL_MODE (field) == BLKmode)
4537                 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
4538               else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
4539                 parms->nregs += 2;
4540               else
4541                 parms->nregs += 1;
4542             }
4543           else
4544             {
4545               if (parms->intoffset == -1)
4546                 parms->intoffset = bitpos;
4547             }
4548         }
4549     }
4550 }
4551
4552 /* A subroutine of function_arg_record_value.  Assign the bits of the
4553    structure between parms->intoffset and bitpos to integer registers.  */
4554
4555 static void
4556 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
4557                              struct function_arg_record_value_parms *parms)
4558 {
4559   enum machine_mode mode;
4560   unsigned int regno;
4561   unsigned int startbit, endbit;
4562   int this_slotno, intslots, intoffset;
4563   rtx reg;
4564
4565   if (parms->intoffset == -1)
4566     return;
4567
4568   intoffset = parms->intoffset;
4569   parms->intoffset = -1;
4570
4571   startbit = intoffset & -BITS_PER_WORD;
4572   endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
4573   intslots = (endbit - startbit) / BITS_PER_WORD;
4574   this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
4575
4576   intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
4577   if (intslots <= 0)
4578     return;
4579
4580   /* If this is the trailing part of a word, only load that much into
4581      the register.  Otherwise load the whole register.  Note that in
4582      the latter case we may pick up unwanted bits.  It's not a problem
4583      at the moment but may wish to revisit.  */
4584
4585   if (intoffset % BITS_PER_WORD != 0)
4586     mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
4587                                    MODE_INT);
4588   else
4589     mode = word_mode;
4590
4591   intoffset /= BITS_PER_UNIT;
4592   do
4593     {
4594       regno = parms->regbase + this_slotno;
4595       reg = gen_rtx_REG (mode, regno);
4596       XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
4597         = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
4598
4599       this_slotno += 1;
4600       intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
4601       mode = word_mode;
4602       parms->nregs += 1;
4603       intslots -= 1;
4604     }
4605   while (intslots > 0);
4606 }
4607
4608 /* A subroutine of function_arg_record_value.  Traverse the structure
4609    recursively and assign bits to floating point registers.  Track which
4610    bits in between need integer registers; invoke function_arg_record_value_3
4611    to make that happen.  */
4612
4613 static void
4614 function_arg_record_value_2 (tree type, HOST_WIDE_INT startbitpos,
4615                              struct function_arg_record_value_parms *parms,
4616                              bool packed_p)
4617 {
4618   tree field;
4619
4620   if (! packed_p)
4621     for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4622       {
4623         if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
4624           {
4625             packed_p = true;
4626             break;
4627           }
4628       }
4629
4630   for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4631     {
4632       if (TREE_CODE (field) == FIELD_DECL)
4633         {
4634           HOST_WIDE_INT bitpos = startbitpos;
4635
4636           if (DECL_SIZE (field) != 0)
4637             {
4638               if (integer_zerop (DECL_SIZE (field)))
4639                 continue;
4640
4641               if (host_integerp (bit_position (field), 1))
4642                 bitpos += int_bit_position (field);
4643             }
4644
4645           /* ??? FIXME: else assume zero offset.  */
4646
4647           if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
4648             function_arg_record_value_2 (TREE_TYPE (field),
4649                                          bitpos,
4650                                          parms,
4651                                          packed_p);
4652           else if ((FLOAT_TYPE_P (TREE_TYPE (field))
4653                     || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
4654                    && TARGET_FPU
4655                    && parms->named
4656                    && ! packed_p)
4657             {
4658               int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
4659               int regno, nregs, pos;
4660               enum machine_mode mode = DECL_MODE (field);
4661               rtx reg;
4662
4663               function_arg_record_value_3 (bitpos, parms);
4664
4665               if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
4666                   && mode == BLKmode)
4667                 {
4668                   mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
4669                   nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
4670                 }
4671               else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
4672                 {
4673                   mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
4674                   nregs = 2;
4675                 }
4676               else
4677                 nregs = 1;
4678
4679               regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
4680               if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
4681                 regno++;
4682               reg = gen_rtx_REG (mode, regno);
4683               pos = bitpos / BITS_PER_UNIT;
4684               XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
4685                 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
4686               parms->nregs += 1;
4687               while (--nregs > 0)
4688                 {
4689                   regno += GET_MODE_SIZE (mode) / 4;
4690                   reg = gen_rtx_REG (mode, regno);
4691                   pos += GET_MODE_SIZE (mode);
4692                   XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
4693                     = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
4694                   parms->nregs += 1;
4695                 }
4696             }
4697           else
4698             {
4699               if (parms->intoffset == -1)
4700                 parms->intoffset = bitpos;
4701             }
4702         }
4703     }
4704 }
4705
4706 /* Used by function_arg and function_value to implement the complex
4707    conventions of the 64-bit ABI for passing and returning structures.
4708    Return an expression valid as a return value for the two macros
4709    FUNCTION_ARG and FUNCTION_VALUE.
4710
4711    TYPE is the data type of the argument (as a tree).
4712     This is null for libcalls where that information may
4713     not be available.
4714    MODE is the argument's machine mode.
4715    SLOTNO is the index number of the argument's slot in the parameter array.
4716    NAMED is nonzero if this argument is a named parameter
4717     (otherwise it is an extra parameter matching an ellipsis).
4718    REGBASE is the regno of the base register for the parameter array.  */
4719
4720 static rtx
4721 function_arg_record_value (tree type, enum machine_mode mode,
4722                            int slotno, int named, int regbase)
4723 {
4724   HOST_WIDE_INT typesize = int_size_in_bytes (type);
4725   struct function_arg_record_value_parms parms;
4726   unsigned int nregs;
4727
4728   parms.ret = NULL_RTX;
4729   parms.slotno = slotno;
4730   parms.named = named;
4731   parms.regbase = regbase;
4732   parms.stack = 0;
4733
4734   /* Compute how many registers we need.  */
4735   parms.nregs = 0;
4736   parms.intoffset = 0;
4737   function_arg_record_value_1 (type, 0, &parms, false);
4738
4739   /* Take into account pending integer fields.  */
4740   if (parms.intoffset != -1)
4741     {
4742       unsigned int startbit, endbit;
4743       int intslots, this_slotno;
4744
4745       startbit = parms.intoffset & -BITS_PER_WORD;
4746       endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
4747       intslots = (endbit - startbit) / BITS_PER_WORD;
4748       this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
4749
4750       if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
4751         {
4752           intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
4753           /* We need to pass this field on the stack.  */
4754           parms.stack = 1;
4755         }
4756
4757       parms.nregs += intslots;
4758     }
4759   nregs = parms.nregs;
4760
4761   /* Allocate the vector and handle some annoying special cases.  */
4762   if (nregs == 0)
4763     {
4764       /* ??? Empty structure has no value?  Duh?  */
4765       if (typesize <= 0)
4766         {
4767           /* Though there's nothing really to store, return a word register
4768              anyway so the rest of gcc doesn't go nuts.  Returning a PARALLEL
4769              leads to breakage due to the fact that there are zero bytes to
4770              load.  */
4771           return gen_rtx_REG (mode, regbase);
4772         }
4773       else
4774         {
4775           /* ??? C++ has structures with no fields, and yet a size.  Give up
4776              for now and pass everything back in integer registers.  */
4777           nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4778         }
4779       if (nregs + slotno > SPARC_INT_ARG_MAX)
4780         nregs = SPARC_INT_ARG_MAX - slotno;
4781     }
4782   gcc_assert (nregs != 0);
4783
4784   parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
4785
4786   /* If at least one field must be passed on the stack, generate
4787      (parallel [(expr_list (nil) ...) ...]) so that all fields will
4788      also be passed on the stack.  We can't do much better because the
4789      semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
4790      of structures for which the fields passed exclusively in registers
4791      are not at the beginning of the structure.  */
4792   if (parms.stack)
4793     XVECEXP (parms.ret, 0, 0)
4794       = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
4795
4796   /* Fill in the entries.  */
4797   parms.nregs = 0;
4798   parms.intoffset = 0;
4799   function_arg_record_value_2 (type, 0, &parms, false);
4800   function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
4801
4802   gcc_assert (parms.nregs == nregs);
4803
4804   return parms.ret;
4805 }
4806
4807 /* Used by function_arg and function_value to implement the conventions
4808    of the 64-bit ABI for passing and returning unions.
4809    Return an expression valid as a return value for the two macros
4810    FUNCTION_ARG and FUNCTION_VALUE.
4811
4812    SIZE is the size in bytes of the union.
4813    MODE is the argument's machine mode.
4814    REGNO is the hard register the union will be passed in.  */
4815
4816 static rtx
4817 function_arg_union_value (int size, enum machine_mode mode, int slotno,
4818                           int regno)
4819 {
4820   int nwords = ROUND_ADVANCE (size), i;
4821   rtx regs;
4822
4823   /* See comment in previous function for empty structures.  */
4824   if (nwords == 0)
4825     return gen_rtx_REG (mode, regno);
4826
4827   if (slotno == SPARC_INT_ARG_MAX - 1)
4828     nwords = 1;
4829
4830   regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
4831
4832   for (i = 0; i < nwords; i++)
4833     {
4834       /* Unions are passed left-justified.  */
4835       XVECEXP (regs, 0, i)
4836         = gen_rtx_EXPR_LIST (VOIDmode,
4837                              gen_rtx_REG (word_mode, regno),
4838                              GEN_INT (UNITS_PER_WORD * i));
4839       regno++;
4840     }
4841
4842   return regs;
4843 }
4844
4845 /* Used by function_arg and function_value to implement the conventions
4846    for passing and returning large (BLKmode) vectors.
4847    Return an expression valid as a return value for the two macros
4848    FUNCTION_ARG and FUNCTION_VALUE.
4849
4850    SIZE is the size in bytes of the vector.
4851    BASE_MODE is the argument's base machine mode.
4852    REGNO is the FP hard register the vector will be passed in.  */
4853
4854 static rtx
4855 function_arg_vector_value (int size, enum machine_mode base_mode, int regno)
4856 {
4857   unsigned short base_mode_size = GET_MODE_SIZE (base_mode);
4858   int nregs = size / base_mode_size, i;
4859   rtx regs;
4860
4861   regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
4862
4863   for (i = 0; i < nregs; i++)
4864     {
4865       XVECEXP (regs, 0, i)
4866         = gen_rtx_EXPR_LIST (VOIDmode,
4867                              gen_rtx_REG (base_mode, regno),
4868                              GEN_INT (base_mode_size * i));
4869       regno += base_mode_size / 4;
4870     }
4871
4872   return regs;
4873 }
4874
4875 /* Handle the FUNCTION_ARG macro.
4876    Determine where to put an argument to a function.
4877    Value is zero to push the argument on the stack,
4878    or a hard register in which to store the argument.
4879
4880    CUM is a variable of type CUMULATIVE_ARGS which gives info about
4881     the preceding args and about the function being called.
4882    MODE is the argument's machine mode.
4883    TYPE is the data type of the argument (as a tree).
4884     This is null for libcalls where that information may
4885     not be available.
4886    NAMED is nonzero if this argument is a named parameter
4887     (otherwise it is an extra parameter matching an ellipsis).
4888    INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.  */
4889
4890 rtx
4891 function_arg (const struct sparc_args *cum, enum machine_mode mode,
4892               tree type, int named, int incoming_p)
4893 {
4894   int regbase = (incoming_p
4895                  ? SPARC_INCOMING_INT_ARG_FIRST
4896                  : SPARC_OUTGOING_INT_ARG_FIRST);
4897   int slotno, regno, padding;
4898   enum mode_class mclass = GET_MODE_CLASS (mode);
4899   rtx reg;
4900
4901   slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
4902                                 &regno, &padding);
4903
4904   if (slotno == -1)
4905     return 0;
4906
4907   if (TARGET_ARCH32)
4908     {
4909       reg = gen_rtx_REG (mode, regno);
4910       return reg;
4911     }
4912
4913   if (type && TREE_CODE (type) == RECORD_TYPE)
4914     {
4915       /* Structures up to 16 bytes in size are passed in arg slots on the
4916          stack and are promoted to registers where possible.  */
4917
4918       gcc_assert (int_size_in_bytes (type) <= 16);
4919
4920       return function_arg_record_value (type, mode, slotno, named, regbase);
4921     }
4922   else if (type && TREE_CODE (type) == UNION_TYPE)
4923     {
4924       HOST_WIDE_INT size = int_size_in_bytes (type);
4925
4926       gcc_assert (size <= 16);
4927
4928       return function_arg_union_value (size, mode, slotno, regno);
4929     }
4930   else if (type && TREE_CODE (type) == VECTOR_TYPE)
4931     {
4932       /* Vector types deserve special treatment because they are
4933          polymorphic wrt their mode, depending upon whether VIS
4934          instructions are enabled.  */
4935       HOST_WIDE_INT size = int_size_in_bytes (type);
4936
4937       gcc_assert (size <= 16);
4938
4939       if (mode == BLKmode)
4940         return function_arg_vector_value (size,
4941                                           TYPE_MODE (TREE_TYPE (type)),
4942                                           SPARC_FP_ARG_FIRST + 2*slotno);
4943       else
4944         mclass = MODE_FLOAT;
4945     }
4946
4947   /* v9 fp args in reg slots beyond the int reg slots get passed in regs
4948      but also have the slot allocated for them.
4949      If no prototype is in scope fp values in register slots get passed
4950      in two places, either fp regs and int regs or fp regs and memory.  */
4951   if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
4952       && SPARC_FP_REG_P (regno))
4953     {
4954       reg = gen_rtx_REG (mode, regno);
4955       if (cum->prototype_p || cum->libcall_p)
4956         {
4957           /* "* 2" because fp reg numbers are recorded in 4 byte
4958              quantities.  */
4959 #if 0
4960           /* ??? This will cause the value to be passed in the fp reg and
4961              in the stack.  When a prototype exists we want to pass the
4962              value in the reg but reserve space on the stack.  That's an
4963              optimization, and is deferred [for a bit].  */
4964           if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
4965             return gen_rtx_PARALLEL (mode,
4966                             gen_rtvec (2,
4967                                        gen_rtx_EXPR_LIST (VOIDmode,
4968                                                 NULL_RTX, const0_rtx),
4969                                        gen_rtx_EXPR_LIST (VOIDmode,
4970                                                 reg, const0_rtx)));
4971           else
4972 #else
4973           /* ??? It seems that passing back a register even when past
4974              the area declared by REG_PARM_STACK_SPACE will allocate
4975              space appropriately, and will not copy the data onto the
4976              stack, exactly as we desire.
4977
4978              This is due to locate_and_pad_parm being called in
4979              expand_call whenever reg_parm_stack_space > 0, which
4980              while beneficial to our example here, would seem to be
4981              in error from what had been intended.  Ho hum...  -- r~ */
4982 #endif
4983             return reg;
4984         }
4985       else
4986         {
4987           rtx v0, v1;
4988
4989           if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
4990             {
4991               int intreg;
4992
4993               /* On incoming, we don't need to know that the value
4994                  is passed in %f0 and %i0, and it confuses other parts
4995                  causing needless spillage even on the simplest cases.  */
4996               if (incoming_p)
4997                 return reg;
4998
4999               intreg = (SPARC_OUTGOING_INT_ARG_FIRST
5000                         + (regno - SPARC_FP_ARG_FIRST) / 2);
5001
5002               v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
5003               v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
5004                                       const0_rtx);
5005               return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
5006             }
5007           else
5008             {
5009               v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
5010               v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
5011               return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
5012             }
5013         }
5014     }
5015   else
5016     {
5017       /* Scalar or complex int.  */
5018       reg = gen_rtx_REG (mode, regno);
5019     }
5020
5021   return reg;
5022 }
5023
5024 /* For an arg passed partly in registers and partly in memory,
5025    this is the number of bytes of registers used.
5026    For args passed entirely in registers or entirely in memory, zero.
5027
5028    Any arg that starts in the first 6 regs but won't entirely fit in them
5029    needs partial registers on v8.  On v9, structures with integer
5030    values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
5031    values that begin in the last fp reg [where "last fp reg" varies with the
5032    mode] will be split between that reg and memory.  */
5033
5034 static int
5035 sparc_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5036                          tree type, bool named)
5037 {
5038   int slotno, regno, padding;
5039
5040   /* We pass 0 for incoming_p here, it doesn't matter.  */
5041   slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
5042
5043   if (slotno == -1)
5044     return 0;
5045
5046   if (TARGET_ARCH32)
5047     {
5048       if ((slotno + (mode == BLKmode
5049                      ? ROUND_ADVANCE (int_size_in_bytes (type))
5050                      : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
5051           > SPARC_INT_ARG_MAX)
5052         return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
5053     }
5054   else
5055     {
5056       /* We are guaranteed by pass_by_reference that the size of the
5057          argument is not greater than 16 bytes, so we only need to return
5058          one word if the argument is partially passed in registers.  */
5059
5060       if (type && AGGREGATE_TYPE_P (type))
5061         {
5062           int size = int_size_in_bytes (type);
5063
5064           if (size > UNITS_PER_WORD
5065               && slotno == SPARC_INT_ARG_MAX - 1)
5066             return UNITS_PER_WORD;
5067         }
5068       else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
5069                || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5070                    && ! (TARGET_FPU && named)))
5071         {
5072           /* The complex types are passed as packed types.  */
5073           if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
5074               && slotno == SPARC_INT_ARG_MAX - 1)
5075             return UNITS_PER_WORD;
5076         }
5077       else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5078         {
5079           if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
5080               > SPARC_FP_ARG_MAX)
5081             return UNITS_PER_WORD;
5082         }
5083     }
5084
5085   return 0;
5086 }
5087
5088 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
5089    Specify whether to pass the argument by reference.  */
5090
5091 static bool
5092 sparc_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5093                          enum machine_mode mode, tree type,
5094                          bool named ATTRIBUTE_UNUSED)
5095 {
5096   if (TARGET_ARCH32)
5097     {
5098     /* Original SPARC 32-bit ABI says that structures and unions,
5099        and quad-precision floats are passed by reference.  For Pascal,
5100        also pass arrays by reference.  All other base types are passed
5101        in registers.
5102
5103        Extended ABI (as implemented by the Sun compiler) says that all
5104        complex floats are passed by reference.  Pass complex integers
5105        in registers up to 8 bytes.  More generally, enforce the 2-word
5106        cap for passing arguments in registers.
5107
5108        Vector ABI (as implemented by the Sun VIS SDK) says that vector
5109        integers are passed like floats of the same size, that is in
5110        registers up to 8 bytes.  Pass all vector floats by reference
5111        like structure and unions.  */
5112       return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
5113               || mode == SCmode
5114               /* Catch CDImode, TFmode, DCmode and TCmode.  */
5115               || GET_MODE_SIZE (mode) > 8
5116               || (type
5117                   && TREE_CODE (type) == VECTOR_TYPE
5118                   && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
5119     }
5120   else
5121     {
5122     /* Original SPARC 64-bit ABI says that structures and unions
5123        smaller than 16 bytes are passed in registers, as well as
5124        all other base types.  For Pascal, pass arrays by reference.
5125
5126        Extended ABI (as implemented by the Sun compiler) says that
5127        complex floats are passed in registers up to 16 bytes.  Pass
5128        all complex integers in registers up to 16 bytes.  More generally,
5129        enforce the 2-word cap for passing arguments in registers.
5130
5131        Vector ABI (as implemented by the Sun VIS SDK) says that vector
5132        integers are passed like floats of the same size, that is in
5133        registers (up to 16 bytes).  Pass all vector floats like structure
5134        and unions.  */
5135       return ((type && TREE_CODE (type) == ARRAY_TYPE)
5136               || (type
5137                   && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
5138                   && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
5139               /* Catch CTImode and TCmode.  */
5140               || GET_MODE_SIZE (mode) > 16);
5141     }
5142 }
5143
5144 /* Handle the FUNCTION_ARG_ADVANCE macro.
5145    Update the data in CUM to advance over an argument
5146    of mode MODE and data type TYPE.
5147    TYPE is null for libcalls where that information may not be available.  */
5148
5149 void
5150 function_arg_advance (struct sparc_args *cum, enum machine_mode mode,
5151                       tree type, int named)
5152 {
5153   int slotno, regno, padding;
5154
5155   /* We pass 0 for incoming_p here, it doesn't matter.  */
5156   slotno = function_arg_slotno (cum, mode, type, named, 0, &regno, &padding);
5157
5158   /* If register required leading padding, add it.  */
5159   if (slotno != -1)
5160     cum->words += padding;
5161
5162   if (TARGET_ARCH32)
5163     {
5164       cum->words += (mode != BLKmode
5165                      ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
5166                      : ROUND_ADVANCE (int_size_in_bytes (type)));
5167     }
5168   else
5169     {
5170       if (type && AGGREGATE_TYPE_P (type))
5171         {
5172           int size = int_size_in_bytes (type);
5173
5174           if (size <= 8)
5175             ++cum->words;
5176           else if (size <= 16)
5177             cum->words += 2;
5178           else /* passed by reference */
5179             ++cum->words;
5180         }
5181       else
5182         {
5183           cum->words += (mode != BLKmode
5184                          ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
5185                          : ROUND_ADVANCE (int_size_in_bytes (type)));
5186         }
5187     }
5188 }
5189
5190 /* Handle the FUNCTION_ARG_PADDING macro.
5191    For the 64 bit ABI structs are always stored left shifted in their
5192    argument slot.  */
5193
5194 enum direction
5195 function_arg_padding (enum machine_mode mode, tree type)
5196 {
5197   if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
5198     return upward;
5199
5200   /* Fall back to the default.  */
5201   return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
5202 }
5203
5204 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
5205    Specify whether to return the return value in memory.  */
5206
5207 static bool
5208 sparc_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED)
5209 {
5210   if (TARGET_ARCH32)
5211     /* Original SPARC 32-bit ABI says that structures and unions,
5212        and quad-precision floats are returned in memory.  All other
5213        base types are returned in registers.
5214
5215        Extended ABI (as implemented by the Sun compiler) says that
5216        all complex floats are returned in registers (8 FP registers
5217        at most for '_Complex long double').  Return all complex integers
5218        in registers (4 at most for '_Complex long long').
5219
5220        Vector ABI (as implemented by the Sun VIS SDK) says that vector
5221        integers are returned like floats of the same size, that is in
5222        registers up to 8 bytes and in memory otherwise.  Return all
5223        vector floats in memory like structure and unions; note that
5224        they always have BLKmode like the latter.  */
5225     return (TYPE_MODE (type) == BLKmode
5226             || TYPE_MODE (type) == TFmode
5227             || (TREE_CODE (type) == VECTOR_TYPE
5228                 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
5229   else
5230     /* Original SPARC 64-bit ABI says that structures and unions
5231        smaller than 32 bytes are returned in registers, as well as
5232        all other base types.
5233
5234        Extended ABI (as implemented by the Sun compiler) says that all
5235        complex floats are returned in registers (8 FP registers at most
5236        for '_Complex long double').  Return all complex integers in
5237        registers (4 at most for '_Complex TItype').
5238
5239        Vector ABI (as implemented by the Sun VIS SDK) says that vector
5240        integers are returned like floats of the same size, that is in
5241        registers.  Return all vector floats like structure and unions;
5242        note that they always have BLKmode like the latter.  */
5243     return ((TYPE_MODE (type) == BLKmode
5244              && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32));
5245 }
5246
5247 /* Handle the TARGET_STRUCT_VALUE target hook.
5248    Return where to find the structure return value address.  */
5249
5250 static rtx
5251 sparc_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED, int incoming)
5252 {
5253   if (TARGET_ARCH64)
5254     return 0;
5255   else
5256     {
5257       rtx mem;
5258
5259       if (incoming)
5260         mem = gen_rtx_MEM (Pmode, plus_constant (frame_pointer_rtx,
5261                                                  STRUCT_VALUE_OFFSET));
5262       else
5263         mem = gen_rtx_MEM (Pmode, plus_constant (stack_pointer_rtx,
5264                                                  STRUCT_VALUE_OFFSET));
5265
5266       set_mem_alias_set (mem, struct_value_alias_set);
5267       return mem;
5268     }
5269 }
5270
5271 /* Handle FUNCTION_VALUE, FUNCTION_OUTGOING_VALUE, and LIBCALL_VALUE macros.
5272    For v9, function return values are subject to the same rules as arguments,
5273    except that up to 32 bytes may be returned in registers.  */
5274
5275 rtx
5276 function_value (tree type, enum machine_mode mode, int incoming_p)
5277 {
5278   /* Beware that the two values are swapped here wrt function_arg.  */
5279   int regbase = (incoming_p
5280                  ? SPARC_OUTGOING_INT_ARG_FIRST
5281                  : SPARC_INCOMING_INT_ARG_FIRST);
5282   enum mode_class mclass = GET_MODE_CLASS (mode);
5283   int regno;
5284
5285   if (type && TREE_CODE (type) == VECTOR_TYPE)
5286     {
5287       /* Vector types deserve special treatment because they are
5288          polymorphic wrt their mode, depending upon whether VIS
5289          instructions are enabled.  */
5290       HOST_WIDE_INT size = int_size_in_bytes (type);
5291
5292       gcc_assert ((TARGET_ARCH32 && size <= 8)
5293                   || (TARGET_ARCH64 && size <= 32));
5294
5295       if (mode == BLKmode)
5296         return function_arg_vector_value (size,
5297                                           TYPE_MODE (TREE_TYPE (type)),
5298                                           SPARC_FP_ARG_FIRST);
5299       else
5300         mclass = MODE_FLOAT;
5301     }
5302   else if (type && TARGET_ARCH64)
5303     {
5304       if (TREE_CODE (type) == RECORD_TYPE)
5305         {
5306           /* Structures up to 32 bytes in size are passed in registers,
5307              promoted to fp registers where possible.  */
5308
5309           gcc_assert (int_size_in_bytes (type) <= 32);
5310
5311           return function_arg_record_value (type, mode, 0, 1, regbase);
5312         }
5313       else if (TREE_CODE (type) == UNION_TYPE)
5314         {
5315           HOST_WIDE_INT size = int_size_in_bytes (type);
5316
5317           gcc_assert (size <= 32);
5318
5319           return function_arg_union_value (size, mode, 0, regbase);
5320         }
5321       else if (AGGREGATE_TYPE_P (type))
5322         {
5323           /* All other aggregate types are passed in an integer register
5324              in a mode corresponding to the size of the type.  */
5325           HOST_WIDE_INT bytes = int_size_in_bytes (type);
5326
5327           gcc_assert (bytes <= 32);
5328
5329           mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 0);
5330
5331           /* ??? We probably should have made the same ABI change in
5332              3.4.0 as the one we made for unions.   The latter was
5333              required by the SCD though, while the former is not
5334              specified, so we favored compatibility and efficiency.
5335
5336              Now we're stuck for aggregates larger than 16 bytes,
5337              because OImode vanished in the meantime.  Let's not
5338              try to be unduly clever, and simply follow the ABI
5339              for unions in that case.  */
5340           if (mode == BLKmode)
5341             return function_arg_union_value (bytes, mode, 0, regbase);
5342           else
5343             mclass = MODE_INT;
5344         }
5345       else if (mclass == MODE_INT
5346                && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5347         mode = word_mode;
5348     }
5349
5350   if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
5351       && TARGET_FPU)
5352     regno = SPARC_FP_ARG_FIRST;
5353   else
5354     regno = regbase;
5355
5356   return gen_rtx_REG (mode, regno);
5357 }
5358
5359 /* Do what is necessary for `va_start'.  We look at the current function
5360    to determine if stdarg or varargs is used and return the address of
5361    the first unnamed parameter.  */
5362
5363 static rtx
5364 sparc_builtin_saveregs (void)
5365 {
5366   int first_reg = current_function_args_info.words;
5367   rtx address;
5368   int regno;
5369
5370   for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
5371     emit_move_insn (gen_rtx_MEM (word_mode,
5372                                  gen_rtx_PLUS (Pmode,
5373                                                frame_pointer_rtx,
5374                                                GEN_INT (FIRST_PARM_OFFSET (0)
5375                                                         + (UNITS_PER_WORD
5376                                                            * regno)))),
5377                     gen_rtx_REG (word_mode,
5378                                  SPARC_INCOMING_INT_ARG_FIRST + regno));
5379
5380   address = gen_rtx_PLUS (Pmode,
5381                           frame_pointer_rtx,
5382                           GEN_INT (FIRST_PARM_OFFSET (0)
5383                                    + UNITS_PER_WORD * first_reg));
5384
5385   return address;
5386 }
5387
5388 /* Implement `va_start' for stdarg.  */
5389
5390 void
5391 sparc_va_start (tree valist, rtx nextarg)
5392 {
5393   nextarg = expand_builtin_saveregs ();
5394   std_expand_builtin_va_start (valist, nextarg);
5395 }
5396
5397 /* Implement `va_arg' for stdarg.  */
5398
5399 static tree
5400 sparc_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
5401 {
5402   HOST_WIDE_INT size, rsize, align;
5403   tree addr, incr;
5404   bool indirect;
5405   tree ptrtype = build_pointer_type (type);
5406
5407   if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
5408     {
5409       indirect = true;
5410       size = rsize = UNITS_PER_WORD;
5411       align = 0;
5412     }
5413   else
5414     {
5415       indirect = false;
5416       size = int_size_in_bytes (type);
5417       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5418       align = 0;
5419
5420       if (TARGET_ARCH64)
5421         {
5422           /* For SPARC64, objects requiring 16-byte alignment get it.  */
5423           if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
5424             align = 2 * UNITS_PER_WORD;
5425
5426           /* SPARC-V9 ABI states that structures up to 16 bytes in size
5427              are left-justified in their slots.  */
5428           if (AGGREGATE_TYPE_P (type))
5429             {
5430               if (size == 0)
5431                 size = rsize = UNITS_PER_WORD;
5432               else
5433                 size = rsize;
5434             }
5435         }
5436     }
5437
5438   incr = valist;
5439   if (align)
5440     {
5441       incr = fold (build2 (PLUS_EXPR, ptr_type_node, incr,
5442                            ssize_int (align - 1)));
5443       incr = fold (build2 (BIT_AND_EXPR, ptr_type_node, incr,
5444                            ssize_int (-align)));
5445     }
5446
5447   gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
5448   addr = incr;
5449
5450   if (BYTES_BIG_ENDIAN && size < rsize)
5451     addr = fold (build2 (PLUS_EXPR, ptr_type_node, incr,
5452                          ssize_int (rsize - size)));
5453
5454   if (indirect)
5455     {
5456       addr = fold_convert (build_pointer_type (ptrtype), addr);
5457       addr = build_va_arg_indirect_ref (addr);
5458     }
5459   /* If the address isn't aligned properly for the type,
5460      we may need to copy to a temporary.
5461      FIXME: This is inefficient.  Usually we can do this
5462      in registers.  */
5463   else if (align == 0
5464            && TYPE_ALIGN (type) > BITS_PER_WORD)
5465     {
5466       tree tmp = create_tmp_var (type, "va_arg_tmp");
5467       tree dest_addr = build_fold_addr_expr (tmp);
5468
5469       tree copy = build_function_call_expr
5470         (implicit_built_in_decls[BUILT_IN_MEMCPY],
5471          tree_cons (NULL_TREE, dest_addr,
5472                     tree_cons (NULL_TREE, addr,
5473                                tree_cons (NULL_TREE, size_int (rsize),
5474                                           NULL_TREE))));
5475
5476       gimplify_and_add (copy, pre_p);
5477       addr = dest_addr;
5478     }
5479   else
5480     addr = fold_convert (ptrtype, addr);
5481
5482   incr = fold (build2 (PLUS_EXPR, ptr_type_node, incr, ssize_int (rsize)));
5483   incr = build2 (MODIFY_EXPR, ptr_type_node, valist, incr);
5484   gimplify_and_add (incr, post_p);
5485
5486   return build_va_arg_indirect_ref (addr);
5487 }
5488 \f
5489 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
5490    Specify whether the vector mode is supported by the hardware.  */
5491
5492 static bool
5493 sparc_vector_mode_supported_p (enum machine_mode mode)
5494 {
5495   return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
5496 }
5497 \f
5498 /* Return the string to output an unconditional branch to LABEL, which is
5499    the operand number of the label.
5500
5501    DEST is the destination insn (i.e. the label), INSN is the source.  */
5502
5503 const char *
5504 output_ubranch (rtx dest, int label, rtx insn)
5505 {
5506   static char string[64];
5507   bool v9_form = false;
5508   char *p;
5509
5510   if (TARGET_V9 && INSN_ADDRESSES_SET_P ())
5511     {
5512       int delta = (INSN_ADDRESSES (INSN_UID (dest))
5513                    - INSN_ADDRESSES (INSN_UID (insn)));
5514       /* Leave some instructions for "slop".  */
5515       if (delta >= -260000 && delta < 260000)
5516         v9_form = true;
5517     }
5518
5519   if (v9_form)
5520     strcpy (string, "ba%*,pt\t%%xcc, ");
5521   else
5522     strcpy (string, "b%*\t");
5523
5524   p = strchr (string, '\0');
5525   *p++ = '%';
5526   *p++ = 'l';
5527   *p++ = '0' + label;
5528   *p++ = '%';
5529   *p++ = '(';
5530   *p = '\0';
5531
5532   return string;
5533 }
5534
5535 /* Return the string to output a conditional branch to LABEL, which is
5536    the operand number of the label.  OP is the conditional expression.
5537    XEXP (OP, 0) is assumed to be a condition code register (integer or
5538    floating point) and its mode specifies what kind of comparison we made.
5539
5540    DEST is the destination insn (i.e. the label), INSN is the source.
5541
5542    REVERSED is nonzero if we should reverse the sense of the comparison.
5543
5544    ANNUL is nonzero if we should generate an annulling branch.  */
5545
5546 const char *
5547 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
5548                 rtx insn)
5549 {
5550   static char string[64];
5551   enum rtx_code code = GET_CODE (op);
5552   rtx cc_reg = XEXP (op, 0);
5553   enum machine_mode mode = GET_MODE (cc_reg);
5554   const char *labelno, *branch;
5555   int spaces = 8, far;
5556   char *p;
5557
5558   /* v9 branches are limited to +-1MB.  If it is too far away,
5559      change
5560
5561      bne,pt %xcc, .LC30
5562
5563      to
5564
5565      be,pn %xcc, .+12
5566       nop
5567      ba .LC30
5568
5569      and
5570
5571      fbne,a,pn %fcc2, .LC29
5572
5573      to
5574
5575      fbe,pt %fcc2, .+16
5576       nop
5577      ba .LC29  */
5578
5579   far = TARGET_V9 && (get_attr_length (insn) >= 3);
5580   if (reversed ^ far)
5581     {
5582       /* Reversal of FP compares takes care -- an ordered compare
5583          becomes an unordered compare and vice versa.  */
5584       if (mode == CCFPmode || mode == CCFPEmode)
5585         code = reverse_condition_maybe_unordered (code);
5586       else
5587         code = reverse_condition (code);
5588     }
5589
5590   /* Start by writing the branch condition.  */
5591   if (mode == CCFPmode || mode == CCFPEmode)
5592     {
5593       switch (code)
5594         {
5595         case NE:
5596           branch = "fbne";
5597           break;
5598         case EQ:
5599           branch = "fbe";
5600           break;
5601         case GE:
5602           branch = "fbge";
5603           break;
5604         case GT:
5605           branch = "fbg";
5606           break;
5607         case LE:
5608           branch = "fble";
5609           break;
5610         case LT:
5611           branch = "fbl";
5612           break;
5613         case UNORDERED:
5614           branch = "fbu";
5615           break;
5616         case ORDERED:
5617           branch = "fbo";
5618           break;
5619         case UNGT:
5620           branch = "fbug";
5621           break;
5622         case UNLT:
5623           branch = "fbul";
5624           break;
5625         case UNEQ:
5626           branch = "fbue";
5627           break;
5628         case UNGE:
5629           branch = "fbuge";
5630           break;
5631         case UNLE:
5632           branch = "fbule";
5633           break;
5634         case LTGT:
5635           branch = "fblg";
5636           break;
5637
5638         default:
5639           gcc_unreachable ();
5640         }
5641
5642       /* ??? !v9: FP branches cannot be preceded by another floating point
5643          insn.  Because there is currently no concept of pre-delay slots,
5644          we can fix this only by always emitting a nop before a floating
5645          point branch.  */
5646
5647       string[0] = '\0';
5648       if (! TARGET_V9)
5649         strcpy (string, "nop\n\t");
5650       strcat (string, branch);
5651     }
5652   else
5653     {
5654       switch (code)
5655         {
5656         case NE:
5657           branch = "bne";
5658           break;
5659         case EQ:
5660           branch = "be";
5661           break;
5662         case GE:
5663           if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
5664             branch = "bpos";
5665           else
5666             branch = "bge";
5667           break;
5668         case GT:
5669           branch = "bg";
5670           break;
5671         case LE:
5672           branch = "ble";
5673           break;
5674         case LT:
5675           if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
5676             branch = "bneg";
5677           else
5678             branch = "bl";
5679           break;
5680         case GEU:
5681           branch = "bgeu";
5682           break;
5683         case GTU:
5684           branch = "bgu";
5685           break;
5686         case LEU:
5687           branch = "bleu";
5688           break;
5689         case LTU:
5690           branch = "blu";
5691           break;
5692
5693         default:
5694           gcc_unreachable ();
5695         }
5696       strcpy (string, branch);
5697     }
5698   spaces -= strlen (branch);
5699   p = strchr (string, '\0');
5700
5701   /* Now add the annulling, the label, and a possible noop.  */
5702   if (annul && ! far)
5703     {
5704       strcpy (p, ",a");
5705       p += 2;
5706       spaces -= 2;
5707     }
5708
5709   if (TARGET_V9)
5710     {
5711       rtx note;
5712       int v8 = 0;
5713
5714       if (! far && insn && INSN_ADDRESSES_SET_P ())
5715         {
5716           int delta = (INSN_ADDRESSES (INSN_UID (dest))
5717                        - INSN_ADDRESSES (INSN_UID (insn)));
5718           /* Leave some instructions for "slop".  */
5719           if (delta < -260000 || delta >= 260000)
5720             v8 = 1;
5721         }
5722
5723       if (mode == CCFPmode || mode == CCFPEmode)
5724         {
5725           static char v9_fcc_labelno[] = "%%fccX, ";
5726           /* Set the char indicating the number of the fcc reg to use.  */
5727           v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
5728           labelno = v9_fcc_labelno;
5729           if (v8)
5730             {
5731               gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
5732               labelno = "";
5733             }
5734         }
5735       else if (mode == CCXmode || mode == CCX_NOOVmode)
5736         {
5737           labelno = "%%xcc, ";
5738           gcc_assert (! v8);
5739         }
5740       else
5741         {
5742           labelno = "%%icc, ";
5743           if (v8)
5744             labelno = "";
5745         }
5746
5747       if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
5748         {
5749           strcpy (p,
5750                   ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
5751                   ? ",pt" : ",pn");
5752           p += 3;
5753           spaces -= 3;
5754         }
5755     }
5756   else
5757     labelno = "";
5758
5759   if (spaces > 0)
5760     *p++ = '\t';
5761   else
5762     *p++ = ' ';
5763   strcpy (p, labelno);
5764   p = strchr (p, '\0');
5765   if (far)
5766     {
5767       strcpy (p, ".+12\n\t nop\n\tb\t");
5768       /* Skip the next insn if requested or
5769          if we know that it will be a nop.  */
5770       if (annul || ! final_sequence)
5771         p[3] = '6';
5772       p += 14;
5773     }
5774   *p++ = '%';
5775   *p++ = 'l';
5776   *p++ = label + '0';
5777   *p++ = '%';
5778   *p++ = '#';
5779   *p = '\0';
5780
5781   return string;
5782 }
5783
5784 /* Emit a library call comparison between floating point X and Y.
5785    COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).
5786    TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
5787    values as arguments instead of the TFmode registers themselves,
5788    that's why we cannot call emit_float_lib_cmp.  */
5789 void
5790 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
5791 {
5792   const char *qpfunc;
5793   rtx slot0, slot1, result, tem, tem2;
5794   enum machine_mode mode;
5795
5796   switch (comparison)
5797     {
5798     case EQ:
5799       qpfunc = (TARGET_ARCH64) ? "_Qp_feq" : "_Q_feq";
5800       break;
5801
5802     case NE:
5803       qpfunc = (TARGET_ARCH64) ? "_Qp_fne" : "_Q_fne";
5804       break;
5805
5806     case GT:
5807       qpfunc = (TARGET_ARCH64) ? "_Qp_fgt" : "_Q_fgt";
5808       break;
5809
5810     case GE:
5811       qpfunc = (TARGET_ARCH64) ? "_Qp_fge" : "_Q_fge";
5812       break;
5813
5814     case LT:
5815       qpfunc = (TARGET_ARCH64) ? "_Qp_flt" : "_Q_flt";
5816       break;
5817
5818     case LE:
5819       qpfunc = (TARGET_ARCH64) ? "_Qp_fle" : "_Q_fle";
5820       break;
5821
5822     case ORDERED:
5823     case UNORDERED:
5824     case UNGT:
5825     case UNLT:
5826     case UNEQ:
5827     case UNGE:
5828     case UNLE:
5829     case LTGT:
5830       qpfunc = (TARGET_ARCH64) ? "_Qp_cmp" : "_Q_cmp";
5831       break;
5832
5833     default:
5834       gcc_unreachable ();
5835     }
5836
5837   if (TARGET_ARCH64)
5838     {
5839       if (GET_CODE (x) != MEM)
5840         {
5841           slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
5842           emit_insn (gen_rtx_SET (VOIDmode, slot0, x));
5843         }
5844       else
5845         slot0 = x;
5846
5847       if (GET_CODE (y) != MEM)
5848         {
5849           slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
5850           emit_insn (gen_rtx_SET (VOIDmode, slot1, y));
5851         }
5852       else
5853         slot1 = y;
5854
5855       emit_library_call (gen_rtx_SYMBOL_REF (Pmode, qpfunc), LCT_NORMAL,
5856                          DImode, 2,
5857                          XEXP (slot0, 0), Pmode,
5858                          XEXP (slot1, 0), Pmode);
5859
5860       mode = DImode;
5861     }
5862   else
5863     {
5864       emit_library_call (gen_rtx_SYMBOL_REF (Pmode, qpfunc), LCT_NORMAL,
5865                          SImode, 2,
5866                          x, TFmode, y, TFmode);
5867
5868       mode = SImode;
5869     }
5870
5871
5872   /* Immediately move the result of the libcall into a pseudo
5873      register so reload doesn't clobber the value if it needs
5874      the return register for a spill reg.  */
5875   result = gen_reg_rtx (mode);
5876   emit_move_insn (result, hard_libcall_value (mode));
5877
5878   switch (comparison)
5879     {
5880     default:
5881       emit_cmp_insn (result, const0_rtx, NE, NULL_RTX, mode, 0);
5882       break;
5883     case ORDERED:
5884     case UNORDERED:
5885       emit_cmp_insn (result, GEN_INT(3), comparison == UNORDERED ? EQ : NE,
5886                      NULL_RTX, mode, 0);
5887       break;
5888     case UNGT:
5889     case UNGE:
5890       emit_cmp_insn (result, const1_rtx,
5891                      comparison == UNGT ? GT : NE, NULL_RTX, mode, 0);
5892       break;
5893     case UNLE:
5894       emit_cmp_insn (result, const2_rtx, NE, NULL_RTX, mode, 0);
5895       break;
5896     case UNLT:
5897       tem = gen_reg_rtx (mode);
5898       if (TARGET_ARCH32)
5899         emit_insn (gen_andsi3 (tem, result, const1_rtx));
5900       else
5901         emit_insn (gen_anddi3 (tem, result, const1_rtx));
5902       emit_cmp_insn (tem, const0_rtx, NE, NULL_RTX, mode, 0);
5903       break;
5904     case UNEQ:
5905     case LTGT:
5906       tem = gen_reg_rtx (mode);
5907       if (TARGET_ARCH32)
5908         emit_insn (gen_addsi3 (tem, result, const1_rtx));
5909       else
5910         emit_insn (gen_adddi3 (tem, result, const1_rtx));
5911       tem2 = gen_reg_rtx (mode);
5912       if (TARGET_ARCH32)
5913         emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
5914       else
5915         emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
5916       emit_cmp_insn (tem2, const0_rtx, comparison == UNEQ ? EQ : NE,
5917                      NULL_RTX, mode, 0);
5918       break;
5919     }
5920 }
5921
5922 /* Generate an unsigned DImode to FP conversion.  This is the same code
5923    optabs would emit if we didn't have TFmode patterns.  */
5924
5925 void
5926 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
5927 {
5928   rtx neglab, donelab, i0, i1, f0, in, out;
5929
5930   out = operands[0];
5931   in = force_reg (DImode, operands[1]);
5932   neglab = gen_label_rtx ();
5933   donelab = gen_label_rtx ();
5934   i0 = gen_reg_rtx (DImode);
5935   i1 = gen_reg_rtx (DImode);
5936   f0 = gen_reg_rtx (mode);
5937
5938   emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
5939
5940   emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
5941   emit_jump_insn (gen_jump (donelab));
5942   emit_barrier ();
5943
5944   emit_label (neglab);
5945
5946   emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
5947   emit_insn (gen_anddi3 (i1, in, const1_rtx));
5948   emit_insn (gen_iordi3 (i0, i0, i1));
5949   emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
5950   emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
5951
5952   emit_label (donelab);
5953 }
5954
5955 /* Generate an FP to unsigned DImode conversion.  This is the same code
5956    optabs would emit if we didn't have TFmode patterns.  */
5957
5958 void
5959 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
5960 {
5961   rtx neglab, donelab, i0, i1, f0, in, out, limit;
5962
5963   out = operands[0];
5964   in = force_reg (mode, operands[1]);
5965   neglab = gen_label_rtx ();
5966   donelab = gen_label_rtx ();
5967   i0 = gen_reg_rtx (DImode);
5968   i1 = gen_reg_rtx (DImode);
5969   limit = gen_reg_rtx (mode);
5970   f0 = gen_reg_rtx (mode);
5971
5972   emit_move_insn (limit,
5973                   CONST_DOUBLE_FROM_REAL_VALUE (
5974                     REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
5975   emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
5976
5977   emit_insn (gen_rtx_SET (VOIDmode,
5978                           out,
5979                           gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
5980   emit_jump_insn (gen_jump (donelab));
5981   emit_barrier ();
5982
5983   emit_label (neglab);
5984
5985   emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
5986   emit_insn (gen_rtx_SET (VOIDmode,
5987                           i0,
5988                           gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
5989   emit_insn (gen_movdi (i1, const1_rtx));
5990   emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
5991   emit_insn (gen_xordi3 (out, i0, i1));
5992
5993   emit_label (donelab);
5994 }
5995
5996 /* Return the string to output a conditional branch to LABEL, testing
5997    register REG.  LABEL is the operand number of the label; REG is the
5998    operand number of the reg.  OP is the conditional expression.  The mode
5999    of REG says what kind of comparison we made.
6000
6001    DEST is the destination insn (i.e. the label), INSN is the source.
6002
6003    REVERSED is nonzero if we should reverse the sense of the comparison.
6004
6005    ANNUL is nonzero if we should generate an annulling branch.  */
6006
6007 const char *
6008 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
6009                  int annul, rtx insn)
6010 {
6011   static char string[64];
6012   enum rtx_code code = GET_CODE (op);
6013   enum machine_mode mode = GET_MODE (XEXP (op, 0));
6014   rtx note;
6015   int far;
6016   char *p;
6017
6018   /* branch on register are limited to +-128KB.  If it is too far away,
6019      change
6020
6021      brnz,pt %g1, .LC30
6022
6023      to
6024
6025      brz,pn %g1, .+12
6026       nop
6027      ba,pt %xcc, .LC30
6028
6029      and
6030
6031      brgez,a,pn %o1, .LC29
6032
6033      to
6034
6035      brlz,pt %o1, .+16
6036       nop
6037      ba,pt %xcc, .LC29  */
6038
6039   far = get_attr_length (insn) >= 3;
6040
6041   /* If not floating-point or if EQ or NE, we can just reverse the code.  */
6042   if (reversed ^ far)
6043     code = reverse_condition (code);
6044
6045   /* Only 64 bit versions of these instructions exist.  */
6046   gcc_assert (mode == DImode);
6047
6048   /* Start by writing the branch condition.  */
6049
6050   switch (code)
6051     {
6052     case NE:
6053       strcpy (string, "brnz");
6054       break;
6055
6056     case EQ:
6057       strcpy (string, "brz");
6058       break;
6059
6060     case GE:
6061       strcpy (string, "brgez");
6062       break;
6063
6064     case LT:
6065       strcpy (string, "brlz");
6066       break;
6067
6068     case LE:
6069       strcpy (string, "brlez");
6070       break;
6071
6072     case GT:
6073       strcpy (string, "brgz");
6074       break;
6075
6076     default:
6077       gcc_unreachable ();
6078     }
6079
6080   p = strchr (string, '\0');
6081
6082   /* Now add the annulling, reg, label, and nop.  */
6083   if (annul && ! far)
6084     {
6085       strcpy (p, ",a");
6086       p += 2;
6087     }
6088
6089   if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
6090     {
6091       strcpy (p,
6092               ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
6093               ? ",pt" : ",pn");
6094       p += 3;
6095     }
6096
6097   *p = p < string + 8 ? '\t' : ' ';
6098   p++;
6099   *p++ = '%';
6100   *p++ = '0' + reg;
6101   *p++ = ',';
6102   *p++ = ' ';
6103   if (far)
6104     {
6105       int veryfar = 1, delta;
6106
6107       if (INSN_ADDRESSES_SET_P ())
6108         {
6109           delta = (INSN_ADDRESSES (INSN_UID (dest))
6110                    - INSN_ADDRESSES (INSN_UID (insn)));
6111           /* Leave some instructions for "slop".  */
6112           if (delta >= -260000 && delta < 260000)
6113             veryfar = 0;
6114         }
6115
6116       strcpy (p, ".+12\n\t nop\n\t");
6117       /* Skip the next insn if requested or
6118          if we know that it will be a nop.  */
6119       if (annul || ! final_sequence)
6120         p[3] = '6';
6121       p += 12;
6122       if (veryfar)
6123         {
6124           strcpy (p, "b\t");
6125           p += 2;
6126         }
6127       else
6128         {
6129           strcpy (p, "ba,pt\t%%xcc, ");
6130           p += 13;
6131         }
6132     }
6133   *p++ = '%';
6134   *p++ = 'l';
6135   *p++ = '0' + label;
6136   *p++ = '%';
6137   *p++ = '#';
6138   *p = '\0';
6139
6140   return string;
6141 }
6142
6143 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
6144    Such instructions cannot be used in the delay slot of return insn on v9.
6145    If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
6146  */
6147
6148 static int
6149 epilogue_renumber (register rtx *where, int test)
6150 {
6151   register const char *fmt;
6152   register int i;
6153   register enum rtx_code code;
6154
6155   if (*where == 0)
6156     return 0;
6157
6158   code = GET_CODE (*where);
6159
6160   switch (code)
6161     {
6162     case REG:
6163       if (REGNO (*where) >= 8 && REGNO (*where) < 24)      /* oX or lX */
6164         return 1;
6165       if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
6166         *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
6167     case SCRATCH:
6168     case CC0:
6169     case PC:
6170     case CONST_INT:
6171     case CONST_DOUBLE:
6172       return 0;
6173
6174       /* Do not replace the frame pointer with the stack pointer because
6175          it can cause the delayed instruction to load below the stack.
6176          This occurs when instructions like:
6177
6178          (set (reg/i:SI 24 %i0)
6179              (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
6180                        (const_int -20 [0xffffffec])) 0))
6181
6182          are in the return delayed slot.  */
6183     case PLUS:
6184       if (GET_CODE (XEXP (*where, 0)) == REG
6185           && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
6186           && (GET_CODE (XEXP (*where, 1)) != CONST_INT
6187               || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
6188         return 1;
6189       break;
6190
6191     case MEM:
6192       if (SPARC_STACK_BIAS
6193           && GET_CODE (XEXP (*where, 0)) == REG
6194           && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
6195         return 1;
6196       break;
6197
6198     default:
6199       break;
6200     }
6201
6202   fmt = GET_RTX_FORMAT (code);
6203
6204   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
6205     {
6206       if (fmt[i] == 'E')
6207         {
6208           register int j;
6209           for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
6210             if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
6211               return 1;
6212         }
6213       else if (fmt[i] == 'e'
6214                && epilogue_renumber (&(XEXP (*where, i)), test))
6215         return 1;
6216     }
6217   return 0;
6218 }
6219 \f
6220 /* Leaf functions and non-leaf functions have different needs.  */
6221
6222 static const int
6223 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
6224
6225 static const int
6226 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
6227
6228 static const int *const reg_alloc_orders[] = {
6229   reg_leaf_alloc_order,
6230   reg_nonleaf_alloc_order};
6231
6232 void
6233 order_regs_for_local_alloc (void)
6234 {
6235   static int last_order_nonleaf = 1;
6236
6237   if (regs_ever_live[15] != last_order_nonleaf)
6238     {
6239       last_order_nonleaf = !last_order_nonleaf;
6240       memcpy ((char *) reg_alloc_order,
6241               (const char *) reg_alloc_orders[last_order_nonleaf],
6242               FIRST_PSEUDO_REGISTER * sizeof (int));
6243     }
6244 }
6245 \f
6246 /* Return 1 if REG and MEM are legitimate enough to allow the various
6247    mem<-->reg splits to be run.  */
6248
6249 int
6250 sparc_splitdi_legitimate (rtx reg, rtx mem)
6251 {
6252   /* Punt if we are here by mistake.  */
6253   gcc_assert (reload_completed);
6254
6255   /* We must have an offsettable memory reference.  */
6256   if (! offsettable_memref_p (mem))
6257     return 0;
6258
6259   /* If we have legitimate args for ldd/std, we do not want
6260      the split to happen.  */
6261   if ((REGNO (reg) % 2) == 0
6262       && mem_min_alignment (mem, 8))
6263     return 0;
6264
6265   /* Success.  */
6266   return 1;
6267 }
6268
6269 /* Return 1 if x and y are some kind of REG and they refer to
6270    different hard registers.  This test is guaranteed to be
6271    run after reload.  */
6272
6273 int
6274 sparc_absnegfloat_split_legitimate (rtx x, rtx y)
6275 {
6276   if (GET_CODE (x) != REG)
6277     return 0;
6278   if (GET_CODE (y) != REG)
6279     return 0;
6280   if (REGNO (x) == REGNO (y))
6281     return 0;
6282   return 1;
6283 }
6284
6285 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
6286    This makes them candidates for using ldd and std insns.
6287
6288    Note reg1 and reg2 *must* be hard registers.  */
6289
6290 int
6291 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
6292 {
6293   /* We might have been passed a SUBREG.  */
6294   if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
6295     return 0;
6296
6297   if (REGNO (reg1) % 2 != 0)
6298     return 0;
6299
6300   /* Integer ldd is deprecated in SPARC V9 */
6301   if (TARGET_V9 && REGNO (reg1) < 32)
6302     return 0;
6303
6304   return (REGNO (reg1) == REGNO (reg2) - 1);
6305 }
6306
6307 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
6308    an ldd or std insn.
6309
6310    This can only happen when addr1 and addr2, the addresses in mem1
6311    and mem2, are consecutive memory locations (addr1 + 4 == addr2).
6312    addr1 must also be aligned on a 64-bit boundary.
6313
6314    Also iff dependent_reg_rtx is not null it should not be used to
6315    compute the address for mem1, i.e. we cannot optimize a sequence
6316    like:
6317         ld [%o0], %o0
6318         ld [%o0 + 4], %o1
6319    to
6320         ldd [%o0], %o0
6321    nor:
6322         ld [%g3 + 4], %g3
6323         ld [%g3], %g2
6324    to
6325         ldd [%g3], %g2
6326
6327    But, note that the transformation from:
6328         ld [%g2 + 4], %g3
6329         ld [%g2], %g2
6330    to
6331         ldd [%g2], %g2
6332    is perfectly fine.  Thus, the peephole2 patterns always pass us
6333    the destination register of the first load, never the second one.
6334
6335    For stores we don't have a similar problem, so dependent_reg_rtx is
6336    NULL_RTX.  */
6337
6338 int
6339 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
6340 {
6341   rtx addr1, addr2;
6342   unsigned int reg1;
6343   HOST_WIDE_INT offset1;
6344
6345   /* The mems cannot be volatile.  */
6346   if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
6347     return 0;
6348
6349   /* MEM1 should be aligned on a 64-bit boundary.  */
6350   if (MEM_ALIGN (mem1) < 64)
6351     return 0;
6352
6353   addr1 = XEXP (mem1, 0);
6354   addr2 = XEXP (mem2, 0);
6355
6356   /* Extract a register number and offset (if used) from the first addr.  */
6357   if (GET_CODE (addr1) == PLUS)
6358     {
6359       /* If not a REG, return zero.  */
6360       if (GET_CODE (XEXP (addr1, 0)) != REG)
6361         return 0;
6362       else
6363         {
6364           reg1 = REGNO (XEXP (addr1, 0));
6365           /* The offset must be constant!  */
6366           if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
6367             return 0;
6368           offset1 = INTVAL (XEXP (addr1, 1));
6369         }
6370     }
6371   else if (GET_CODE (addr1) != REG)
6372     return 0;
6373   else
6374     {
6375       reg1 = REGNO (addr1);
6376       /* This was a simple (mem (reg)) expression.  Offset is 0.  */
6377       offset1 = 0;
6378     }
6379
6380   /* Make sure the second address is a (mem (plus (reg) (const_int).  */
6381   if (GET_CODE (addr2) != PLUS)
6382     return 0;
6383
6384   if (GET_CODE (XEXP (addr2, 0)) != REG
6385       || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
6386     return 0;
6387
6388   if (reg1 != REGNO (XEXP (addr2, 0)))
6389     return 0;
6390
6391   if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
6392     return 0;
6393
6394   /* The first offset must be evenly divisible by 8 to ensure the
6395      address is 64 bit aligned.  */
6396   if (offset1 % 8 != 0)
6397     return 0;
6398
6399   /* The offset for the second addr must be 4 more than the first addr.  */
6400   if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
6401     return 0;
6402
6403   /* All the tests passed.  addr1 and addr2 are valid for ldd and std
6404      instructions.  */
6405   return 1;
6406 }
6407
6408 /* Return 1 if reg is a pseudo, or is the first register in
6409    a hard register pair.  This makes it a candidate for use in
6410    ldd and std insns.  */
6411
6412 int
6413 register_ok_for_ldd (rtx reg)
6414 {
6415   /* We might have been passed a SUBREG.  */
6416   if (GET_CODE (reg) != REG)
6417     return 0;
6418
6419   if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
6420     return (REGNO (reg) % 2 == 0);
6421   else
6422     return 1;
6423 }
6424 \f
6425 /* Print operand X (an rtx) in assembler syntax to file FILE.
6426    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
6427    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
6428
6429 void
6430 print_operand (FILE *file, rtx x, int code)
6431 {
6432   switch (code)
6433     {
6434     case '#':
6435       /* Output an insn in a delay slot.  */
6436       if (final_sequence)
6437         sparc_indent_opcode = 1;
6438       else
6439         fputs ("\n\t nop", file);
6440       return;
6441     case '*':
6442       /* Output an annul flag if there's nothing for the delay slot and we
6443          are optimizing.  This is always used with '(' below.
6444          Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
6445          this is a dbx bug.  So, we only do this when optimizing.
6446          On UltraSPARC, a branch in a delay slot causes a pipeline flush.
6447          Always emit a nop in case the next instruction is a branch.  */
6448       if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
6449         fputs (",a", file);
6450       return;
6451     case '(':
6452       /* Output a 'nop' if there's nothing for the delay slot and we are
6453          not optimizing.  This is always used with '*' above.  */
6454       if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
6455         fputs ("\n\t nop", file);
6456       else if (final_sequence)
6457         sparc_indent_opcode = 1;
6458       return;
6459     case ')':
6460       /* Output the right displacement from the saved PC on function return.
6461          The caller may have placed an "unimp" insn immediately after the call
6462          so we have to account for it.  This insn is used in the 32-bit ABI
6463          when calling a function that returns a non zero-sized structure. The
6464          64-bit ABI doesn't have it.  Be careful to have this test be the same
6465          as that used on the call.  */
6466      if (! TARGET_ARCH64
6467          && current_function_returns_struct
6468          && (TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
6469              == INTEGER_CST)
6470          && ! integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
6471         fputs ("12", file);
6472       else
6473         fputc ('8', file);
6474       return;
6475     case '_':
6476       /* Output the Embedded Medium/Anywhere code model base register.  */
6477       fputs (EMBMEDANY_BASE_REG, file);
6478       return;
6479     case '&':
6480       /* Print some local dynamic TLS name.  */
6481       assemble_name (file, get_some_local_dynamic_name ());
6482       return;
6483
6484     case 'Y':
6485       /* Adjust the operand to take into account a RESTORE operation.  */
6486       if (GET_CODE (x) == CONST_INT)
6487         break;
6488       else if (GET_CODE (x) != REG)
6489         output_operand_lossage ("invalid %%Y operand");
6490       else if (REGNO (x) < 8)
6491         fputs (reg_names[REGNO (x)], file);
6492       else if (REGNO (x) >= 24 && REGNO (x) < 32)
6493         fputs (reg_names[REGNO (x)-16], file);
6494       else
6495         output_operand_lossage ("invalid %%Y operand");
6496       return;
6497     case 'L':
6498       /* Print out the low order register name of a register pair.  */
6499       if (WORDS_BIG_ENDIAN)
6500         fputs (reg_names[REGNO (x)+1], file);
6501       else
6502         fputs (reg_names[REGNO (x)], file);
6503       return;
6504     case 'H':
6505       /* Print out the high order register name of a register pair.  */
6506       if (WORDS_BIG_ENDIAN)
6507         fputs (reg_names[REGNO (x)], file);
6508       else
6509         fputs (reg_names[REGNO (x)+1], file);
6510       return;
6511     case 'R':
6512       /* Print out the second register name of a register pair or quad.
6513          I.e., R (%o0) => %o1.  */
6514       fputs (reg_names[REGNO (x)+1], file);
6515       return;
6516     case 'S':
6517       /* Print out the third register name of a register quad.
6518          I.e., S (%o0) => %o2.  */
6519       fputs (reg_names[REGNO (x)+2], file);
6520       return;
6521     case 'T':
6522       /* Print out the fourth register name of a register quad.
6523          I.e., T (%o0) => %o3.  */
6524       fputs (reg_names[REGNO (x)+3], file);
6525       return;
6526     case 'x':
6527       /* Print a condition code register.  */
6528       if (REGNO (x) == SPARC_ICC_REG)
6529         {
6530           /* We don't handle CC[X]_NOOVmode because they're not supposed
6531              to occur here.  */
6532           if (GET_MODE (x) == CCmode)
6533             fputs ("%icc", file);
6534           else if (GET_MODE (x) == CCXmode)
6535             fputs ("%xcc", file);
6536           else
6537             gcc_unreachable ();
6538         }
6539       else
6540         /* %fccN register */
6541         fputs (reg_names[REGNO (x)], file);
6542       return;
6543     case 'm':
6544       /* Print the operand's address only.  */
6545       output_address (XEXP (x, 0));
6546       return;
6547     case 'r':
6548       /* In this case we need a register.  Use %g0 if the
6549          operand is const0_rtx.  */
6550       if (x == const0_rtx
6551           || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
6552         {
6553           fputs ("%g0", file);
6554           return;
6555         }
6556       else
6557         break;
6558
6559     case 'A':
6560       switch (GET_CODE (x))
6561         {
6562         case IOR: fputs ("or", file); break;
6563         case AND: fputs ("and", file); break;
6564         case XOR: fputs ("xor", file); break;
6565         default: output_operand_lossage ("invalid %%A operand");
6566         }
6567       return;
6568
6569     case 'B':
6570       switch (GET_CODE (x))
6571         {
6572         case IOR: fputs ("orn", file); break;
6573         case AND: fputs ("andn", file); break;
6574         case XOR: fputs ("xnor", file); break;
6575         default: output_operand_lossage ("invalid %%B operand");
6576         }
6577       return;
6578
6579       /* These are used by the conditional move instructions.  */
6580     case 'c' :
6581     case 'C':
6582       {
6583         enum rtx_code rc = GET_CODE (x);
6584
6585         if (code == 'c')
6586           {
6587             enum machine_mode mode = GET_MODE (XEXP (x, 0));
6588             if (mode == CCFPmode || mode == CCFPEmode)
6589               rc = reverse_condition_maybe_unordered (GET_CODE (x));
6590             else
6591               rc = reverse_condition (GET_CODE (x));
6592           }
6593         switch (rc)
6594           {
6595           case NE: fputs ("ne", file); break;
6596           case EQ: fputs ("e", file); break;
6597           case GE: fputs ("ge", file); break;
6598           case GT: fputs ("g", file); break;
6599           case LE: fputs ("le", file); break;
6600           case LT: fputs ("l", file); break;
6601           case GEU: fputs ("geu", file); break;
6602           case GTU: fputs ("gu", file); break;
6603           case LEU: fputs ("leu", file); break;
6604           case LTU: fputs ("lu", file); break;
6605           case LTGT: fputs ("lg", file); break;
6606           case UNORDERED: fputs ("u", file); break;
6607           case ORDERED: fputs ("o", file); break;
6608           case UNLT: fputs ("ul", file); break;
6609           case UNLE: fputs ("ule", file); break;
6610           case UNGT: fputs ("ug", file); break;
6611           case UNGE: fputs ("uge", file); break;
6612           case UNEQ: fputs ("ue", file); break;
6613           default: output_operand_lossage (code == 'c'
6614                                            ? "invalid %%c operand"
6615                                            : "invalid %%C operand");
6616           }
6617         return;
6618       }
6619
6620       /* These are used by the movr instruction pattern.  */
6621     case 'd':
6622     case 'D':
6623       {
6624         enum rtx_code rc = (code == 'd'
6625                             ? reverse_condition (GET_CODE (x))
6626                             : GET_CODE (x));
6627         switch (rc)
6628           {
6629           case NE: fputs ("ne", file); break;
6630           case EQ: fputs ("e", file); break;
6631           case GE: fputs ("gez", file); break;
6632           case LT: fputs ("lz", file); break;
6633           case LE: fputs ("lez", file); break;
6634           case GT: fputs ("gz", file); break;
6635           default: output_operand_lossage (code == 'd'
6636                                            ? "invalid %%d operand"
6637                                            : "invalid %%D operand");
6638           }
6639         return;
6640       }
6641
6642     case 'b':
6643       {
6644         /* Print a sign-extended character.  */
6645         int i = trunc_int_for_mode (INTVAL (x), QImode);
6646         fprintf (file, "%d", i);
6647         return;
6648       }
6649
6650     case 'f':
6651       /* Operand must be a MEM; write its address.  */
6652       if (GET_CODE (x) != MEM)
6653         output_operand_lossage ("invalid %%f operand");
6654       output_address (XEXP (x, 0));
6655       return;
6656
6657     case 's':
6658       {
6659         /* Print a sign-extended 32-bit value.  */
6660         HOST_WIDE_INT i;
6661         if (GET_CODE(x) == CONST_INT)
6662           i = INTVAL (x);
6663         else if (GET_CODE(x) == CONST_DOUBLE)
6664           i = CONST_DOUBLE_LOW (x);
6665         else
6666           {
6667             output_operand_lossage ("invalid %%s operand");
6668             return;
6669           }
6670         i = trunc_int_for_mode (i, SImode);
6671         fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
6672         return;
6673       }
6674
6675     case 0:
6676       /* Do nothing special.  */
6677       break;
6678
6679     default:
6680       /* Undocumented flag.  */
6681       output_operand_lossage ("invalid operand output code");
6682     }
6683
6684   if (GET_CODE (x) == REG)
6685     fputs (reg_names[REGNO (x)], file);
6686   else if (GET_CODE (x) == MEM)
6687     {
6688       fputc ('[', file);
6689         /* Poor Sun assembler doesn't understand absolute addressing.  */
6690       if (CONSTANT_P (XEXP (x, 0)))
6691         fputs ("%g0+", file);
6692       output_address (XEXP (x, 0));
6693       fputc (']', file);
6694     }
6695   else if (GET_CODE (x) == HIGH)
6696     {
6697       fputs ("%hi(", file);
6698       output_addr_const (file, XEXP (x, 0));
6699       fputc (')', file);
6700     }
6701   else if (GET_CODE (x) == LO_SUM)
6702     {
6703       print_operand (file, XEXP (x, 0), 0);
6704       if (TARGET_CM_MEDMID)
6705         fputs ("+%l44(", file);
6706       else
6707         fputs ("+%lo(", file);
6708       output_addr_const (file, XEXP (x, 1));
6709       fputc (')', file);
6710     }
6711   else if (GET_CODE (x) == CONST_DOUBLE
6712            && (GET_MODE (x) == VOIDmode
6713                || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
6714     {
6715       if (CONST_DOUBLE_HIGH (x) == 0)
6716         fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
6717       else if (CONST_DOUBLE_HIGH (x) == -1
6718                && CONST_DOUBLE_LOW (x) < 0)
6719         fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
6720       else
6721         output_operand_lossage ("long long constant not a valid immediate operand");
6722     }
6723   else if (GET_CODE (x) == CONST_DOUBLE)
6724     output_operand_lossage ("floating point constant not a valid immediate operand");
6725   else { output_addr_const (file, x); }
6726 }
6727 \f
6728 /* Target hook for assembling integer objects.  The sparc version has
6729    special handling for aligned DI-mode objects.  */
6730
6731 static bool
6732 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
6733 {
6734   /* ??? We only output .xword's for symbols and only then in environments
6735      where the assembler can handle them.  */
6736   if (aligned_p && size == 8
6737       && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
6738     {
6739       if (TARGET_V9)
6740         {
6741           assemble_integer_with_op ("\t.xword\t", x);
6742           return true;
6743         }
6744       else
6745         {
6746           assemble_aligned_integer (4, const0_rtx);
6747           assemble_aligned_integer (4, x);
6748           return true;
6749         }
6750     }
6751   return default_assemble_integer (x, size, aligned_p);
6752 }
6753 \f
6754 /* Return the value of a code used in the .proc pseudo-op that says
6755    what kind of result this function returns.  For non-C types, we pick
6756    the closest C type.  */
6757
6758 #ifndef SHORT_TYPE_SIZE
6759 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
6760 #endif
6761
6762 #ifndef INT_TYPE_SIZE
6763 #define INT_TYPE_SIZE BITS_PER_WORD
6764 #endif
6765
6766 #ifndef LONG_TYPE_SIZE
6767 #define LONG_TYPE_SIZE BITS_PER_WORD
6768 #endif
6769
6770 #ifndef LONG_LONG_TYPE_SIZE
6771 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
6772 #endif
6773
6774 #ifndef FLOAT_TYPE_SIZE
6775 #define FLOAT_TYPE_SIZE BITS_PER_WORD
6776 #endif
6777
6778 #ifndef DOUBLE_TYPE_SIZE
6779 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
6780 #endif
6781
6782 #ifndef LONG_DOUBLE_TYPE_SIZE
6783 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
6784 #endif
6785
6786 unsigned long
6787 sparc_type_code (register tree type)
6788 {
6789   register unsigned long qualifiers = 0;
6790   register unsigned shift;
6791
6792   /* Only the first 30 bits of the qualifier are valid.  We must refrain from
6793      setting more, since some assemblers will give an error for this.  Also,
6794      we must be careful to avoid shifts of 32 bits or more to avoid getting
6795      unpredictable results.  */
6796
6797   for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
6798     {
6799       switch (TREE_CODE (type))
6800         {
6801         case ERROR_MARK:
6802           return qualifiers;
6803
6804         case ARRAY_TYPE:
6805           qualifiers |= (3 << shift);
6806           break;
6807
6808         case FUNCTION_TYPE:
6809         case METHOD_TYPE:
6810           qualifiers |= (2 << shift);
6811           break;
6812
6813         case POINTER_TYPE:
6814         case REFERENCE_TYPE:
6815         case OFFSET_TYPE:
6816           qualifiers |= (1 << shift);
6817           break;
6818
6819         case RECORD_TYPE:
6820           return (qualifiers | 8);
6821
6822         case UNION_TYPE:
6823         case QUAL_UNION_TYPE:
6824           return (qualifiers | 9);
6825
6826         case ENUMERAL_TYPE:
6827           return (qualifiers | 10);
6828
6829         case VOID_TYPE:
6830           return (qualifiers | 16);
6831
6832         case INTEGER_TYPE:
6833           /* If this is a range type, consider it to be the underlying
6834              type.  */
6835           if (TREE_TYPE (type) != 0)
6836             break;
6837
6838           /* Carefully distinguish all the standard types of C,
6839              without messing up if the language is not C.  We do this by
6840              testing TYPE_PRECISION and TYPE_UNSIGNED.  The old code used to
6841              look at both the names and the above fields, but that's redundant.
6842              Any type whose size is between two C types will be considered
6843              to be the wider of the two types.  Also, we do not have a
6844              special code to use for "long long", so anything wider than
6845              long is treated the same.  Note that we can't distinguish
6846              between "int" and "long" in this code if they are the same
6847              size, but that's fine, since neither can the assembler.  */
6848
6849           if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
6850             return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
6851
6852           else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
6853             return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
6854
6855           else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
6856             return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
6857
6858           else
6859             return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
6860
6861         case REAL_TYPE:
6862           /* If this is a range type, consider it to be the underlying
6863              type.  */
6864           if (TREE_TYPE (type) != 0)
6865             break;
6866
6867           /* Carefully distinguish all the standard types of C,
6868              without messing up if the language is not C.  */
6869
6870           if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
6871             return (qualifiers | 6);
6872
6873           else
6874             return (qualifiers | 7);
6875
6876         case COMPLEX_TYPE:      /* GNU Fortran COMPLEX type.  */
6877           /* ??? We need to distinguish between double and float complex types,
6878              but I don't know how yet because I can't reach this code from
6879              existing front-ends.  */
6880           return (qualifiers | 7);      /* Who knows? */
6881
6882         case VECTOR_TYPE:
6883         case CHAR_TYPE:         /* GNU Pascal CHAR type.  Not used in C.  */
6884         case BOOLEAN_TYPE:      /* GNU Fortran BOOLEAN type.  */
6885         case LANG_TYPE:         /* ? */
6886           return qualifiers;
6887
6888         default:
6889           gcc_unreachable ();           /* Not a type! */
6890         }
6891     }
6892
6893   return qualifiers;
6894 }
6895 \f
6896 /* Nested function support.  */
6897
6898 /* Emit RTL insns to initialize the variable parts of a trampoline.
6899    FNADDR is an RTX for the address of the function's pure code.
6900    CXT is an RTX for the static chain value for the function.
6901
6902    This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
6903    (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
6904    (to store insns).  This is a bit excessive.  Perhaps a different
6905    mechanism would be better here.
6906
6907    Emit enough FLUSH insns to synchronize the data and instruction caches.  */
6908
6909 void
6910 sparc_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
6911 {
6912   /* SPARC 32-bit trampoline:
6913
6914         sethi   %hi(fn), %g1
6915         sethi   %hi(static), %g2
6916         jmp     %g1+%lo(fn)
6917         or      %g2, %lo(static), %g2
6918
6919     SETHI i,r  = 00rr rrr1 00ii iiii iiii iiii iiii iiii
6920     JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
6921    */
6922
6923   emit_move_insn
6924     (gen_rtx_MEM (SImode, plus_constant (tramp, 0)),
6925      expand_binop (SImode, ior_optab,
6926                    expand_shift (RSHIFT_EXPR, SImode, fnaddr,
6927                                  size_int (10), 0, 1),
6928                    GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
6929                    NULL_RTX, 1, OPTAB_DIRECT));
6930
6931   emit_move_insn
6932     (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
6933      expand_binop (SImode, ior_optab,
6934                    expand_shift (RSHIFT_EXPR, SImode, cxt,
6935                                  size_int (10), 0, 1),
6936                    GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
6937                    NULL_RTX, 1, OPTAB_DIRECT));
6938
6939   emit_move_insn
6940     (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
6941      expand_binop (SImode, ior_optab,
6942                    expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
6943                    GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
6944                    NULL_RTX, 1, OPTAB_DIRECT));
6945
6946   emit_move_insn
6947     (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
6948      expand_binop (SImode, ior_optab,
6949                    expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
6950                    GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
6951                    NULL_RTX, 1, OPTAB_DIRECT));
6952
6953   /* On UltraSPARC a flush flushes an entire cache line.  The trampoline is
6954      aligned on a 16 byte boundary so one flush clears it all.  */
6955   emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, tramp))));
6956   if (sparc_cpu != PROCESSOR_ULTRASPARC
6957       && sparc_cpu != PROCESSOR_ULTRASPARC3)
6958     emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
6959                                                      plus_constant (tramp, 8)))));
6960
6961   /* Call __enable_execute_stack after writing onto the stack to make sure
6962      the stack address is accessible.  */
6963 #ifdef ENABLE_EXECUTE_STACK
6964   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
6965                      LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
6966 #endif
6967
6968 }
6969
6970 /* The 64-bit version is simpler because it makes more sense to load the
6971    values as "immediate" data out of the trampoline.  It's also easier since
6972    we can read the PC without clobbering a register.  */
6973
6974 void
6975 sparc64_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
6976 {
6977   /* SPARC 64-bit trampoline:
6978
6979         rd      %pc, %g1
6980         ldx     [%g1+24], %g5
6981         jmp     %g5
6982         ldx     [%g1+16], %g5
6983         +16 bytes data
6984    */
6985
6986   emit_move_insn (gen_rtx_MEM (SImode, tramp),
6987                   GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
6988   emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
6989                   GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
6990   emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
6991                   GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
6992   emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
6993                   GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
6994   emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), cxt);
6995   emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 24)), fnaddr);
6996   emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, tramp))));
6997
6998   if (sparc_cpu != PROCESSOR_ULTRASPARC
6999       && sparc_cpu != PROCESSOR_ULTRASPARC3)
7000     emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8)))));
7001
7002   /* Call __enable_execute_stack after writing onto the stack to make sure
7003      the stack address is accessible.  */
7004 #ifdef ENABLE_EXECUTE_STACK
7005   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
7006                      LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
7007 #endif
7008 }
7009 \f
7010 /* Adjust the cost of a scheduling dependency.  Return the new cost of
7011    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
7012
7013 static int
7014 supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
7015 {
7016   enum attr_type insn_type;
7017
7018   if (! recog_memoized (insn))
7019     return 0;
7020
7021   insn_type = get_attr_type (insn);
7022
7023   if (REG_NOTE_KIND (link) == 0)
7024     {
7025       /* Data dependency; DEP_INSN writes a register that INSN reads some
7026          cycles later.  */
7027
7028       /* if a load, then the dependence must be on the memory address;
7029          add an extra "cycle".  Note that the cost could be two cycles
7030          if the reg was written late in an instruction group; we ca not tell
7031          here.  */
7032       if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
7033         return cost + 3;
7034
7035       /* Get the delay only if the address of the store is the dependence.  */
7036       if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
7037         {
7038           rtx pat = PATTERN(insn);
7039           rtx dep_pat = PATTERN (dep_insn);
7040
7041           if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
7042             return cost;  /* This should not happen!  */
7043
7044           /* The dependency between the two instructions was on the data that
7045              is being stored.  Assume that this implies that the address of the
7046              store is not dependent.  */
7047           if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
7048             return cost;
7049
7050           return cost + 3;  /* An approximation.  */
7051         }
7052
7053       /* A shift instruction cannot receive its data from an instruction
7054          in the same cycle; add a one cycle penalty.  */
7055       if (insn_type == TYPE_SHIFT)
7056         return cost + 3;   /* Split before cascade into shift.  */
7057     }
7058   else
7059     {
7060       /* Anti- or output- dependency; DEP_INSN reads/writes a register that
7061          INSN writes some cycles later.  */
7062
7063       /* These are only significant for the fpu unit; writing a fp reg before
7064          the fpu has finished with it stalls the processor.  */
7065
7066       /* Reusing an integer register causes no problems.  */
7067       if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
7068         return 0;
7069     }
7070
7071   return cost;
7072 }
7073
7074 static int
7075 hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
7076 {
7077   enum attr_type insn_type, dep_type;
7078   rtx pat = PATTERN(insn);
7079   rtx dep_pat = PATTERN (dep_insn);
7080
7081   if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
7082     return cost;
7083
7084   insn_type = get_attr_type (insn);
7085   dep_type = get_attr_type (dep_insn);
7086
7087   switch (REG_NOTE_KIND (link))
7088     {
7089     case 0:
7090       /* Data dependency; DEP_INSN writes a register that INSN reads some
7091          cycles later.  */
7092
7093       switch (insn_type)
7094         {
7095         case TYPE_STORE:
7096         case TYPE_FPSTORE:
7097           /* Get the delay iff the address of the store is the dependence.  */
7098           if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
7099             return cost;
7100
7101           if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
7102             return cost;
7103           return cost + 3;
7104
7105         case TYPE_LOAD:
7106         case TYPE_SLOAD:
7107         case TYPE_FPLOAD:
7108           /* If a load, then the dependence must be on the memory address.  If
7109              the addresses aren't equal, then it might be a false dependency */
7110           if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
7111             {
7112               if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
7113                   || GET_CODE (SET_DEST (dep_pat)) != MEM
7114                   || GET_CODE (SET_SRC (pat)) != MEM
7115                   || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
7116                                     XEXP (SET_SRC (pat), 0)))
7117                 return cost + 2;
7118
7119               return cost + 8;
7120             }
7121           break;
7122
7123         case TYPE_BRANCH:
7124           /* Compare to branch latency is 0.  There is no benefit from
7125              separating compare and branch.  */
7126           if (dep_type == TYPE_COMPARE)
7127             return 0;
7128           /* Floating point compare to branch latency is less than
7129              compare to conditional move.  */
7130           if (dep_type == TYPE_FPCMP)
7131             return cost - 1;
7132           break;
7133         default:
7134           break;
7135         }
7136         break;
7137
7138     case REG_DEP_ANTI:
7139       /* Anti-dependencies only penalize the fpu unit.  */
7140       if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
7141         return 0;
7142       break;
7143
7144     default:
7145       break;
7146     }
7147
7148   return cost;
7149 }
7150
7151 static int
7152 sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
7153 {
7154   switch (sparc_cpu)
7155     {
7156     case PROCESSOR_SUPERSPARC:
7157       cost = supersparc_adjust_cost (insn, link, dep, cost);
7158       break;
7159     case PROCESSOR_HYPERSPARC:
7160     case PROCESSOR_SPARCLITE86X:
7161       cost = hypersparc_adjust_cost (insn, link, dep, cost);
7162       break;
7163     default:
7164       break;
7165     }
7166   return cost;
7167 }
7168
7169 static void
7170 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7171                   int sched_verbose ATTRIBUTE_UNUSED,
7172                   int max_ready ATTRIBUTE_UNUSED)
7173 {
7174 }
7175
7176 static int
7177 sparc_use_sched_lookahead (void)
7178 {
7179   if (sparc_cpu == PROCESSOR_ULTRASPARC
7180       || sparc_cpu == PROCESSOR_ULTRASPARC3)
7181     return 4;
7182   if ((1 << sparc_cpu) &
7183       ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
7184        (1 << PROCESSOR_SPARCLITE86X)))
7185     return 3;
7186   return 0;
7187 }
7188
7189 static int
7190 sparc_issue_rate (void)
7191 {
7192   switch (sparc_cpu)
7193     {
7194     default:
7195       return 1;
7196     case PROCESSOR_V9:
7197       /* Assume V9 processors are capable of at least dual-issue.  */
7198       return 2;
7199     case PROCESSOR_SUPERSPARC:
7200       return 3;
7201     case PROCESSOR_HYPERSPARC:
7202     case PROCESSOR_SPARCLITE86X:
7203       return 2;
7204     case PROCESSOR_ULTRASPARC:
7205     case PROCESSOR_ULTRASPARC3:
7206       return 4;
7207     }
7208 }
7209
7210 static int
7211 set_extends (rtx insn)
7212 {
7213   register rtx pat = PATTERN (insn);
7214
7215   switch (GET_CODE (SET_SRC (pat)))
7216     {
7217       /* Load and some shift instructions zero extend.  */
7218     case MEM:
7219     case ZERO_EXTEND:
7220       /* sethi clears the high bits */
7221     case HIGH:
7222       /* LO_SUM is used with sethi.  sethi cleared the high
7223          bits and the values used with lo_sum are positive */
7224     case LO_SUM:
7225       /* Store flag stores 0 or 1 */
7226     case LT: case LTU:
7227     case GT: case GTU:
7228     case LE: case LEU:
7229     case GE: case GEU:
7230     case EQ:
7231     case NE:
7232       return 1;
7233     case AND:
7234       {
7235         rtx op0 = XEXP (SET_SRC (pat), 0);
7236         rtx op1 = XEXP (SET_SRC (pat), 1);
7237         if (GET_CODE (op1) == CONST_INT)
7238           return INTVAL (op1) >= 0;
7239         if (GET_CODE (op0) != REG)
7240           return 0;
7241         if (sparc_check_64 (op0, insn) == 1)
7242           return 1;
7243         return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
7244       }
7245     case IOR:
7246     case XOR:
7247       {
7248         rtx op0 = XEXP (SET_SRC (pat), 0);
7249         rtx op1 = XEXP (SET_SRC (pat), 1);
7250         if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
7251           return 0;
7252         if (GET_CODE (op1) == CONST_INT)
7253           return INTVAL (op1) >= 0;
7254         return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
7255       }
7256     case LSHIFTRT:
7257       return GET_MODE (SET_SRC (pat)) == SImode;
7258       /* Positive integers leave the high bits zero.  */
7259     case CONST_DOUBLE:
7260       return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
7261     case CONST_INT:
7262       return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
7263     case ASHIFTRT:
7264     case SIGN_EXTEND:
7265       return - (GET_MODE (SET_SRC (pat)) == SImode);
7266     case REG:
7267       return sparc_check_64 (SET_SRC (pat), insn);
7268     default:
7269       return 0;
7270     }
7271 }
7272
7273 /* We _ought_ to have only one kind per function, but...  */
7274 static GTY(()) rtx sparc_addr_diff_list;
7275 static GTY(()) rtx sparc_addr_list;
7276
7277 void
7278 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
7279 {
7280   vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
7281   if (diff)
7282     sparc_addr_diff_list
7283       = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
7284   else
7285     sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
7286 }
7287
7288 static void
7289 sparc_output_addr_vec (rtx vec)
7290 {
7291   rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7292   int idx, vlen = XVECLEN (body, 0);
7293
7294 #ifdef ASM_OUTPUT_ADDR_VEC_START
7295   ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7296 #endif
7297
7298 #ifdef ASM_OUTPUT_CASE_LABEL
7299   ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7300                          NEXT_INSN (lab));
7301 #else
7302   (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7303 #endif
7304
7305   for (idx = 0; idx < vlen; idx++)
7306     {
7307       ASM_OUTPUT_ADDR_VEC_ELT
7308         (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
7309     }
7310
7311 #ifdef ASM_OUTPUT_ADDR_VEC_END
7312   ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7313 #endif
7314 }
7315
7316 static void
7317 sparc_output_addr_diff_vec (rtx vec)
7318 {
7319   rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
7320   rtx base = XEXP (XEXP (body, 0), 0);
7321   int idx, vlen = XVECLEN (body, 1);
7322
7323 #ifdef ASM_OUTPUT_ADDR_VEC_START
7324   ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
7325 #endif
7326
7327 #ifdef ASM_OUTPUT_CASE_LABEL
7328   ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
7329                          NEXT_INSN (lab));
7330 #else
7331   (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
7332 #endif
7333
7334   for (idx = 0; idx < vlen; idx++)
7335     {
7336       ASM_OUTPUT_ADDR_DIFF_ELT
7337         (asm_out_file,
7338          body,
7339          CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
7340          CODE_LABEL_NUMBER (base));
7341     }
7342
7343 #ifdef ASM_OUTPUT_ADDR_VEC_END
7344   ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
7345 #endif
7346 }
7347
7348 static void
7349 sparc_output_deferred_case_vectors (void)
7350 {
7351   rtx t;
7352   int align;
7353
7354   if (sparc_addr_list == NULL_RTX
7355       && sparc_addr_diff_list == NULL_RTX)
7356     return;
7357
7358   /* Align to cache line in the function's code section.  */
7359   current_function_section (current_function_decl);
7360
7361   align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
7362   if (align > 0)
7363     ASM_OUTPUT_ALIGN (asm_out_file, align);
7364
7365   for (t = sparc_addr_list; t ; t = XEXP (t, 1))
7366     sparc_output_addr_vec (XEXP (t, 0));
7367   for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
7368     sparc_output_addr_diff_vec (XEXP (t, 0));
7369
7370   sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
7371 }
7372
7373 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
7374    unknown.  Return 1 if the high bits are zero, -1 if the register is
7375    sign extended.  */
7376 int
7377 sparc_check_64 (rtx x, rtx insn)
7378 {
7379   /* If a register is set only once it is safe to ignore insns this
7380      code does not know how to handle.  The loop will either recognize
7381      the single set and return the correct value or fail to recognize
7382      it and return 0.  */
7383   int set_once = 0;
7384   rtx y = x;
7385
7386   gcc_assert (GET_CODE (x) == REG);
7387
7388   if (GET_MODE (x) == DImode)
7389     y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
7390
7391   if (flag_expensive_optimizations
7392       && REG_N_SETS (REGNO (y)) == 1)
7393     set_once = 1;
7394
7395   if (insn == 0)
7396     {
7397       if (set_once)
7398         insn = get_last_insn_anywhere ();
7399       else
7400         return 0;
7401     }
7402
7403   while ((insn = PREV_INSN (insn)))
7404     {
7405       switch (GET_CODE (insn))
7406         {
7407         case JUMP_INSN:
7408         case NOTE:
7409           break;
7410         case CODE_LABEL:
7411         case CALL_INSN:
7412         default:
7413           if (! set_once)
7414             return 0;
7415           break;
7416         case INSN:
7417           {
7418             rtx pat = PATTERN (insn);
7419             if (GET_CODE (pat) != SET)
7420               return 0;
7421             if (rtx_equal_p (x, SET_DEST (pat)))
7422               return set_extends (insn);
7423             if (y && rtx_equal_p (y, SET_DEST (pat)))
7424               return set_extends (insn);
7425             if (reg_overlap_mentioned_p (SET_DEST (pat), y))
7426               return 0;
7427           }
7428         }
7429     }
7430   return 0;
7431 }
7432
7433 /* Returns assembly code to perform a DImode shift using
7434    a 64-bit global or out register on SPARC-V8+.  */
7435 const char *
7436 output_v8plus_shift (rtx *operands, rtx insn, const char *opcode)
7437 {
7438   static char asm_code[60];
7439
7440   /* The scratch register is only required when the destination
7441      register is not a 64-bit global or out register.  */
7442   if (which_alternative != 2)
7443     operands[3] = operands[0];
7444
7445   /* We can only shift by constants <= 63. */
7446   if (GET_CODE (operands[2]) == CONST_INT)
7447     operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
7448
7449   if (GET_CODE (operands[1]) == CONST_INT)
7450     {
7451       output_asm_insn ("mov\t%1, %3", operands);
7452     }
7453   else
7454     {
7455       output_asm_insn ("sllx\t%H1, 32, %3", operands);
7456       if (sparc_check_64 (operands[1], insn) <= 0)
7457         output_asm_insn ("srl\t%L1, 0, %L1", operands);
7458       output_asm_insn ("or\t%L1, %3, %3", operands);
7459     }
7460
7461   strcpy(asm_code, opcode);
7462
7463   if (which_alternative != 2)
7464     return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
7465   else
7466     return strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
7467 }
7468 \f
7469 /* Output rtl to increment the profiler label LABELNO
7470    for profiling a function entry.  */
7471
7472 void
7473 sparc_profile_hook (int labelno)
7474 {
7475   char buf[32];
7476   rtx lab, fun;
7477
7478   ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
7479   lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
7480   fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
7481
7482   emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
7483 }
7484 \f
7485 #ifdef OBJECT_FORMAT_ELF
7486 static void
7487 sparc_elf_asm_named_section (const char *name, unsigned int flags,
7488                              tree decl)
7489 {
7490   if (flags & SECTION_MERGE)
7491     {
7492       /* entsize cannot be expressed in this section attributes
7493          encoding style.  */
7494       default_elf_asm_named_section (name, flags, decl);
7495       return;
7496     }
7497
7498   fprintf (asm_out_file, "\t.section\t\"%s\"", name);
7499
7500   if (!(flags & SECTION_DEBUG))
7501     fputs (",#alloc", asm_out_file);
7502   if (flags & SECTION_WRITE)
7503     fputs (",#write", asm_out_file);
7504   if (flags & SECTION_TLS)
7505     fputs (",#tls", asm_out_file);
7506   if (flags & SECTION_CODE)
7507     fputs (",#execinstr", asm_out_file);
7508
7509   /* ??? Handle SECTION_BSS.  */
7510
7511   fputc ('\n', asm_out_file);
7512 }
7513 #endif /* OBJECT_FORMAT_ELF */
7514
7515 /* We do not allow indirect calls to be optimized into sibling calls.
7516
7517    We cannot use sibling calls when delayed branches are disabled
7518    because they will likely require the call delay slot to be filled.
7519
7520    Also, on SPARC 32-bit we cannot emit a sibling call when the
7521    current function returns a structure.  This is because the "unimp
7522    after call" convention would cause the callee to return to the
7523    wrong place.  The generic code already disallows cases where the
7524    function being called returns a structure.
7525
7526    It may seem strange how this last case could occur.  Usually there
7527    is code after the call which jumps to epilogue code which dumps the
7528    return value into the struct return area.  That ought to invalidate
7529    the sibling call right?  Well, in the C++ case we can end up passing
7530    the pointer to the struct return area to a constructor (which returns
7531    void) and then nothing else happens.  Such a sibling call would look
7532    valid without the added check here.  */
7533 static bool
7534 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
7535 {
7536   return (decl
7537           && flag_delayed_branch
7538           && (TARGET_ARCH64 || ! current_function_returns_struct));
7539 }
7540 \f
7541 /* libfunc renaming.  */
7542 #include "config/gofast.h"
7543
7544 static void
7545 sparc_init_libfuncs (void)
7546 {
7547   if (TARGET_ARCH32)
7548     {
7549       /* Use the subroutines that Sun's library provides for integer
7550          multiply and divide.  The `*' prevents an underscore from
7551          being prepended by the compiler. .umul is a little faster
7552          than .mul.  */
7553       set_optab_libfunc (smul_optab, SImode, "*.umul");
7554       set_optab_libfunc (sdiv_optab, SImode, "*.div");
7555       set_optab_libfunc (udiv_optab, SImode, "*.udiv");
7556       set_optab_libfunc (smod_optab, SImode, "*.rem");
7557       set_optab_libfunc (umod_optab, SImode, "*.urem");
7558
7559       /* TFmode arithmetic.  These names are part of the SPARC 32bit ABI.  */
7560       set_optab_libfunc (add_optab, TFmode, "_Q_add");
7561       set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
7562       set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
7563       set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
7564       set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
7565
7566       /* We can define the TFmode sqrt optab only if TARGET_FPU.  This
7567          is because with soft-float, the SFmode and DFmode sqrt
7568          instructions will be absent, and the compiler will notice and
7569          try to use the TFmode sqrt instruction for calls to the
7570          builtin function sqrt, but this fails.  */
7571       if (TARGET_FPU)
7572         set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
7573
7574       set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
7575       set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
7576       set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
7577       set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
7578       set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
7579       set_optab_libfunc (le_optab, TFmode, "_Q_fle");
7580
7581       set_conv_libfunc (sext_optab,   TFmode, SFmode, "_Q_stoq");
7582       set_conv_libfunc (sext_optab,   TFmode, DFmode, "_Q_dtoq");
7583       set_conv_libfunc (trunc_optab,  SFmode, TFmode, "_Q_qtos");
7584       set_conv_libfunc (trunc_optab,  DFmode, TFmode, "_Q_qtod");
7585
7586       set_conv_libfunc (sfix_optab,   SImode, TFmode, "_Q_qtoi");
7587       set_conv_libfunc (ufix_optab,   SImode, TFmode, "_Q_qtou");
7588       set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
7589
7590       if (DITF_CONVERSION_LIBFUNCS)
7591         {
7592           set_conv_libfunc (sfix_optab,   DImode, TFmode, "_Q_qtoll");
7593           set_conv_libfunc (ufix_optab,   DImode, TFmode, "_Q_qtoull");
7594           set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
7595         }
7596
7597       if (SUN_CONVERSION_LIBFUNCS)
7598         {
7599           set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
7600           set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
7601           set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
7602           set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
7603         }
7604     }
7605   if (TARGET_ARCH64)
7606     {
7607       /* In the SPARC 64bit ABI, SImode multiply and divide functions
7608          do not exist in the library.  Make sure the compiler does not
7609          emit calls to them by accident.  (It should always use the
7610          hardware instructions.)  */
7611       set_optab_libfunc (smul_optab, SImode, 0);
7612       set_optab_libfunc (sdiv_optab, SImode, 0);
7613       set_optab_libfunc (udiv_optab, SImode, 0);
7614       set_optab_libfunc (smod_optab, SImode, 0);
7615       set_optab_libfunc (umod_optab, SImode, 0);
7616
7617       if (SUN_INTEGER_MULTIPLY_64)
7618         {
7619           set_optab_libfunc (smul_optab, DImode, "__mul64");
7620           set_optab_libfunc (sdiv_optab, DImode, "__div64");
7621           set_optab_libfunc (udiv_optab, DImode, "__udiv64");
7622           set_optab_libfunc (smod_optab, DImode, "__rem64");
7623           set_optab_libfunc (umod_optab, DImode, "__urem64");
7624         }
7625
7626       if (SUN_CONVERSION_LIBFUNCS)
7627         {
7628           set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
7629           set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
7630           set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
7631           set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
7632         }
7633     }
7634
7635   gofast_maybe_init_libfuncs ();
7636 }
7637 \f
7638 #define def_builtin(NAME, CODE, TYPE) \
7639   lang_hooks.builtin_function((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL, \
7640                               NULL_TREE)
7641
7642 /* Implement the TARGET_INIT_BUILTINS target hook.
7643    Create builtin functions for special SPARC instructions.  */
7644
7645 static void
7646 sparc_init_builtins (void)
7647 {
7648   if (TARGET_VIS)
7649     sparc_vis_init_builtins ();
7650 }
7651
7652 /* Create builtin functions for VIS 1.0 instructions.  */
7653
7654 static void
7655 sparc_vis_init_builtins (void)
7656 {
7657   tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
7658   tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
7659   tree v4hi = build_vector_type (intHI_type_node, 4);
7660   tree v2hi = build_vector_type (intHI_type_node, 2);
7661   tree v2si = build_vector_type (intSI_type_node, 2);
7662
7663   tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
7664   tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
7665   tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
7666   tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
7667   tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
7668   tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
7669   tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
7670   tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
7671   tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
7672   tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
7673   tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
7674   tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
7675   tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
7676                                                          v8qi, v8qi,
7677                                                          intDI_type_node, 0);
7678   tree di_ftype_di_di = build_function_type_list (intDI_type_node,
7679                                                   intDI_type_node,
7680                                                   intDI_type_node, 0);
7681   tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
7682                                                     ptr_type_node,
7683                                                     intSI_type_node, 0);
7684   tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
7685                                                     ptr_type_node,
7686                                                     intDI_type_node, 0);
7687
7688   /* Packing and expanding vectors.  */
7689   def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis, v4qi_ftype_v4hi);
7690   def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
7691                v8qi_ftype_v2si_v8qi);
7692   def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
7693                v2hi_ftype_v2si);
7694   def_builtin ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis, v4hi_ftype_v4qi);
7695   def_builtin ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
7696                v8qi_ftype_v4qi_v4qi);
7697
7698   /* Multiplications.  */
7699   def_builtin ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
7700                v4hi_ftype_v4qi_v4hi);
7701   def_builtin ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
7702                v4hi_ftype_v4qi_v2hi);
7703   def_builtin ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
7704                v4hi_ftype_v4qi_v2hi);
7705   def_builtin ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
7706                v4hi_ftype_v8qi_v4hi);
7707   def_builtin ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
7708                v4hi_ftype_v8qi_v4hi);
7709   def_builtin ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
7710                v2si_ftype_v4qi_v2hi);
7711   def_builtin ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
7712                v2si_ftype_v4qi_v2hi);
7713
7714   /* Data aligning.  */
7715   def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
7716                v4hi_ftype_v4hi_v4hi);
7717   def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
7718                v8qi_ftype_v8qi_v8qi);
7719   def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
7720                v2si_ftype_v2si_v2si);
7721   def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatadi_vis,
7722                di_ftype_di_di);
7723   if (TARGET_ARCH64)
7724     def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
7725                  ptr_ftype_ptr_di);
7726   else
7727     def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
7728                  ptr_ftype_ptr_si);
7729
7730   /* Pixel distance.  */
7731   def_builtin ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
7732                di_ftype_v8qi_v8qi_di);
7733 }
7734
7735 /* Handle TARGET_EXPAND_BUILTIN target hook.
7736    Expand builtin functions for sparc instrinsics.  */
7737
7738 static rtx
7739 sparc_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
7740                       enum machine_mode tmode, int ignore ATTRIBUTE_UNUSED)
7741 {
7742   tree arglist;
7743   tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7744   unsigned int icode = DECL_FUNCTION_CODE (fndecl);
7745   rtx pat, op[4];
7746   enum machine_mode mode[4];
7747   int arg_count = 0;
7748
7749   mode[arg_count] = tmode;
7750
7751   if (target == 0
7752       || GET_MODE (target) != tmode
7753       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7754     op[arg_count] = gen_reg_rtx (tmode);
7755   else
7756     op[arg_count] = target;
7757
7758   for (arglist = TREE_OPERAND (exp, 1); arglist;
7759        arglist = TREE_CHAIN (arglist))
7760     {
7761       tree arg = TREE_VALUE (arglist);
7762
7763       arg_count++;
7764       mode[arg_count] = insn_data[icode].operand[arg_count].mode;
7765       op[arg_count] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
7766
7767       if (! (*insn_data[icode].operand[arg_count].predicate) (op[arg_count],
7768                                                               mode[arg_count]))
7769         op[arg_count] = copy_to_mode_reg (mode[arg_count], op[arg_count]);
7770     }
7771
7772   switch (arg_count)
7773     {
7774     case 1:
7775       pat = GEN_FCN (icode) (op[0], op[1]);
7776       break;
7777     case 2:
7778       pat = GEN_FCN (icode) (op[0], op[1], op[2]);
7779       break;
7780     case 3:
7781       pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
7782       break;
7783     default:
7784       gcc_unreachable ();
7785     }
7786
7787   if (!pat)
7788     return NULL_RTX;
7789
7790   emit_insn (pat);
7791
7792   return op[0];
7793 }
7794 \f
7795 int
7796 sparc_extra_constraint_check (rtx op, int c, int strict)
7797 {
7798   int reload_ok_mem;
7799
7800   if (TARGET_ARCH64
7801       && (c == 'T' || c == 'U'))
7802     return 0;
7803
7804   switch (c)
7805     {
7806     case 'Q':
7807       return fp_sethi_p (op);
7808
7809     case 'R':
7810       return fp_mov_p (op);
7811
7812     case 'S':
7813       return fp_high_losum_p (op);
7814
7815     case 'U':
7816       if (! strict
7817           || (GET_CODE (op) == REG
7818               && (REGNO (op) < FIRST_PSEUDO_REGISTER
7819                   || reg_renumber[REGNO (op)] >= 0)))
7820         return register_ok_for_ldd (op);
7821
7822       return 0;
7823
7824     case 'W':
7825     case 'T':
7826       break;
7827
7828     case 'Y':
7829       return const_zero_operand (op, GET_MODE (op));
7830
7831     default:
7832       return 0;
7833     }
7834
7835   /* Our memory extra constraints have to emulate the
7836      behavior of 'm' and 'o' in order for reload to work
7837      correctly.  */
7838   if (GET_CODE (op) == MEM)
7839     {
7840       reload_ok_mem = 0;
7841       if ((TARGET_ARCH64 || mem_min_alignment (op, 8))
7842           && (! strict
7843               || strict_memory_address_p (Pmode, XEXP (op, 0))))
7844         reload_ok_mem = 1;
7845     }
7846   else
7847     {
7848       reload_ok_mem = (reload_in_progress
7849                        && GET_CODE (op) == REG
7850                        && REGNO (op) >= FIRST_PSEUDO_REGISTER
7851                        && reg_renumber [REGNO (op)] < 0);
7852     }
7853
7854   return reload_ok_mem;
7855 }
7856
7857 /* ??? This duplicates information provided to the compiler by the
7858    ??? scheduler description.  Some day, teach genautomata to output
7859    ??? the latencies and then CSE will just use that.  */
7860
7861 static bool
7862 sparc_rtx_costs (rtx x, int code, int outer_code, int *total)
7863 {
7864   enum machine_mode mode = GET_MODE (x);
7865   bool float_mode_p = FLOAT_MODE_P (mode);
7866
7867   switch (code)
7868     {
7869     case CONST_INT:
7870       if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
7871         {
7872           *total = 0;
7873           return true;
7874         }
7875       /* FALLTHRU */
7876
7877     case HIGH:
7878       *total = 2;
7879       return true;
7880
7881     case CONST:
7882     case LABEL_REF:
7883     case SYMBOL_REF:
7884       *total = 4;
7885       return true;
7886
7887     case CONST_DOUBLE:
7888       if (GET_MODE (x) == VOIDmode
7889           && ((CONST_DOUBLE_HIGH (x) == 0
7890                && CONST_DOUBLE_LOW (x) < 0x1000)
7891               || (CONST_DOUBLE_HIGH (x) == -1
7892                   && CONST_DOUBLE_LOW (x) < 0
7893                   && CONST_DOUBLE_LOW (x) >= -0x1000)))
7894         *total = 0;
7895       else
7896         *total = 8;
7897       return true;
7898
7899     case MEM:
7900       /* If outer-code was a sign or zero extension, a cost
7901          of COSTS_N_INSNS (1) was already added in.  This is
7902          why we are subtracting it back out.  */
7903       if (outer_code == ZERO_EXTEND)
7904         {
7905           *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
7906         }
7907       else if (outer_code == SIGN_EXTEND)
7908         {
7909           *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
7910         }
7911       else if (float_mode_p)
7912         {
7913           *total = sparc_costs->float_load;
7914         }
7915       else
7916         {
7917           *total = sparc_costs->int_load;
7918         }
7919
7920       return true;
7921
7922     case PLUS:
7923     case MINUS:
7924       if (float_mode_p)
7925         *total = sparc_costs->float_plusminus;
7926       else
7927         *total = COSTS_N_INSNS (1);
7928       return false;
7929
7930     case MULT:
7931       if (float_mode_p)
7932         *total = sparc_costs->float_mul;
7933       else if (! TARGET_HARD_MUL)
7934         *total = COSTS_N_INSNS (25);
7935       else
7936         {
7937           int bit_cost;
7938
7939           bit_cost = 0;
7940           if (sparc_costs->int_mul_bit_factor)
7941             {
7942               int nbits;
7943
7944               if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7945                 {
7946                   unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
7947                   for (nbits = 0; value != 0; value &= value - 1)
7948                     nbits++;
7949                 }
7950               else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7951                        && GET_MODE (XEXP (x, 1)) == VOIDmode)
7952                 {
7953                   rtx x1 = XEXP (x, 1);
7954                   unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
7955                   unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
7956
7957                   for (nbits = 0; value1 != 0; value1 &= value1 - 1)
7958                     nbits++;
7959                   for (; value2 != 0; value2 &= value2 - 1)
7960                     nbits++;
7961                 }
7962               else
7963                 nbits = 7;
7964
7965               if (nbits < 3)
7966                 nbits = 3;
7967               bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
7968               bit_cost = COSTS_N_INSNS (bit_cost);
7969             }
7970
7971           if (mode == DImode)
7972             *total = sparc_costs->int_mulX + bit_cost;
7973           else
7974             *total = sparc_costs->int_mul + bit_cost;
7975         }
7976       return false;
7977
7978     case ASHIFT:
7979     case ASHIFTRT:
7980     case LSHIFTRT:
7981       *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
7982       return false;
7983
7984     case DIV:
7985     case UDIV:
7986     case MOD:
7987     case UMOD:
7988       if (float_mode_p)
7989         {
7990           if (mode == DFmode)
7991             *total = sparc_costs->float_div_df;
7992           else
7993             *total = sparc_costs->float_div_sf;
7994         }
7995       else
7996         {
7997           if (mode == DImode)
7998             *total = sparc_costs->int_divX;
7999           else
8000             *total = sparc_costs->int_div;
8001         }
8002       return false;
8003
8004     case NEG:
8005       if (! float_mode_p)
8006         {
8007           *total = COSTS_N_INSNS (1);
8008           return false;
8009         }
8010       /* FALLTHRU */
8011
8012     case ABS:
8013     case FLOAT:
8014     case UNSIGNED_FLOAT:
8015     case FIX:
8016     case UNSIGNED_FIX:
8017     case FLOAT_EXTEND:
8018     case FLOAT_TRUNCATE:
8019       *total = sparc_costs->float_move;
8020       return false;
8021
8022     case SQRT:
8023       if (mode == DFmode)
8024         *total = sparc_costs->float_sqrt_df;
8025       else
8026         *total = sparc_costs->float_sqrt_sf;
8027       return false;
8028
8029     case COMPARE:
8030       if (float_mode_p)
8031         *total = sparc_costs->float_cmp;
8032       else
8033         *total = COSTS_N_INSNS (1);
8034       return false;
8035
8036     case IF_THEN_ELSE:
8037       if (float_mode_p)
8038         *total = sparc_costs->float_cmove;
8039       else
8040         *total = sparc_costs->int_cmove;
8041       return false;
8042
8043     case IOR:
8044       /* Handle the NAND vector patterns.  */
8045       if (sparc_vector_mode_supported_p (GET_MODE (x))
8046           && GET_CODE (XEXP (x, 0)) == NOT
8047           && GET_CODE (XEXP (x, 1)) == NOT)
8048         {
8049           *total = COSTS_N_INSNS (1);
8050           return true;
8051         }
8052       else
8053         return false;
8054
8055     default:
8056       return false;
8057     }
8058 }
8059
8060 /* Emit the sequence of insns SEQ while preserving the register REG.  */
8061
8062 static void
8063 emit_and_preserve (rtx seq, rtx reg)
8064 {
8065   rtx slot = gen_rtx_MEM (word_mode,
8066                           plus_constant (stack_pointer_rtx, SPARC_STACK_BIAS));
8067
8068   emit_insn (gen_stack_pointer_dec (GEN_INT (STACK_BOUNDARY/BITS_PER_UNIT)));
8069   emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
8070   emit_insn (seq);
8071   emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
8072   emit_insn (gen_stack_pointer_inc (GEN_INT (STACK_BOUNDARY/BITS_PER_UNIT)));
8073 }
8074
8075 /* Output the assembler code for a thunk function.  THUNK_DECL is the
8076    declaration for the thunk function itself, FUNCTION is the decl for
8077    the target function.  DELTA is an immediate constant offset to be
8078    added to THIS.  If VCALL_OFFSET is nonzero, the word at address
8079    (*THIS + VCALL_OFFSET) should be additionally added to THIS.  */
8080
8081 static void
8082 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8083                        HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8084                        tree function)
8085 {
8086   rtx this, insn, funexp;
8087   unsigned int int_arg_first;
8088
8089   reload_completed = 1;
8090   epilogue_completed = 1;
8091   no_new_pseudos = 1;
8092   reset_block_changes ();
8093
8094   emit_note (NOTE_INSN_PROLOGUE_END);
8095
8096   if (flag_delayed_branch)
8097     {
8098       /* We will emit a regular sibcall below, so we need to instruct
8099          output_sibcall that we are in a leaf function.  */
8100       sparc_leaf_function_p = current_function_uses_only_leaf_regs = 1;
8101
8102       /* This will cause final.c to invoke leaf_renumber_regs so we
8103          must behave as if we were in a not-yet-leafified function.  */
8104       int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
8105     }
8106   else
8107     {
8108       /* We will emit the sibcall manually below, so we will need to
8109          manually spill non-leaf registers.  */
8110       sparc_leaf_function_p = current_function_uses_only_leaf_regs = 0;
8111
8112       /* We really are in a leaf function.  */
8113       int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
8114     }
8115
8116   /* Find the "this" pointer.  Normally in %o0, but in ARCH64 if the function
8117      returns a structure, the structure return pointer is there instead.  */
8118   if (TARGET_ARCH64 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8119     this = gen_rtx_REG (Pmode, int_arg_first + 1);
8120   else
8121     this = gen_rtx_REG (Pmode, int_arg_first);
8122
8123   /* Add DELTA.  When possible use a plain add, otherwise load it into
8124      a register first.  */
8125   if (delta)
8126     {
8127       rtx delta_rtx = GEN_INT (delta);
8128
8129       if (! SPARC_SIMM13_P (delta))
8130         {
8131           rtx scratch = gen_rtx_REG (Pmode, 1);
8132           emit_move_insn (scratch, delta_rtx);
8133           delta_rtx = scratch;
8134         }
8135
8136       /* THIS += DELTA.  */
8137       emit_insn (gen_add2_insn (this, delta_rtx));
8138     }
8139
8140   /* Add the word at address (*THIS + VCALL_OFFSET).  */
8141   if (vcall_offset)
8142     {
8143       rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8144       rtx scratch = gen_rtx_REG (Pmode, 1);
8145
8146       gcc_assert (vcall_offset < 0);
8147
8148       /* SCRATCH = *THIS.  */
8149       emit_move_insn (scratch, gen_rtx_MEM (Pmode, this));
8150
8151       /* Prepare for adding VCALL_OFFSET.  The difficulty is that we
8152          may not have any available scratch register at this point.  */
8153       if (SPARC_SIMM13_P (vcall_offset))
8154         ;
8155       /* This is the case if ARCH64 (unless -ffixed-g5 is passed).  */
8156       else if (! fixed_regs[5]
8157                /* The below sequence is made up of at least 2 insns,
8158                   while the default method may need only one.  */
8159                && vcall_offset < -8192)
8160         {
8161           rtx scratch2 = gen_rtx_REG (Pmode, 5);
8162           emit_move_insn (scratch2, vcall_offset_rtx);
8163           vcall_offset_rtx = scratch2;
8164         }
8165       else
8166         {
8167           rtx increment = GEN_INT (-4096);
8168
8169           /* VCALL_OFFSET is a negative number whose typical range can be
8170              estimated as -32768..0 in 32-bit mode.  In almost all cases
8171              it is therefore cheaper to emit multiple add insns than
8172              spilling and loading the constant into a register (at least
8173              6 insns).  */
8174           while (! SPARC_SIMM13_P (vcall_offset))
8175             {
8176               emit_insn (gen_add2_insn (scratch, increment));
8177               vcall_offset += 4096;
8178             }
8179           vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
8180         }
8181
8182       /* SCRATCH = *(*THIS + VCALL_OFFSET).  */
8183       emit_move_insn (scratch, gen_rtx_MEM (Pmode,
8184                                             gen_rtx_PLUS (Pmode,
8185                                                           scratch,
8186                                                           vcall_offset_rtx)));
8187
8188       /* THIS += *(*THIS + VCALL_OFFSET).  */
8189       emit_insn (gen_add2_insn (this, scratch));
8190     }
8191
8192   /* Generate a tail call to the target function.  */
8193   if (! TREE_USED (function))
8194     {
8195       assemble_external (function);
8196       TREE_USED (function) = 1;
8197     }
8198   funexp = XEXP (DECL_RTL (function), 0);
8199
8200   if (flag_delayed_branch)
8201     {
8202       funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8203       insn = emit_call_insn (gen_sibcall (funexp));
8204       SIBLING_CALL_P (insn) = 1;
8205     }
8206   else
8207     {
8208       /* The hoops we have to jump through in order to generate a sibcall
8209          without using delay slots...  */
8210       rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
8211
8212       if (flag_pic)
8213         {
8214           spill_reg = gen_rtx_REG (word_mode, 15);  /* %o7 */
8215           start_sequence ();
8216           /* Delay emitting the PIC helper function because it needs to
8217              change the section and we are emitting assembly code.  */
8218           load_pic_register (true);  /* clobbers %o7 */
8219           scratch = legitimize_pic_address (funexp, Pmode, scratch);
8220           seq = get_insns ();
8221           end_sequence ();
8222           emit_and_preserve (seq, spill_reg);
8223         }
8224       else if (TARGET_ARCH32)
8225         {
8226           emit_insn (gen_rtx_SET (VOIDmode,
8227                                   scratch,
8228                                   gen_rtx_HIGH (SImode, funexp)));
8229           emit_insn (gen_rtx_SET (VOIDmode,
8230                                   scratch,
8231                                   gen_rtx_LO_SUM (SImode, scratch, funexp)));
8232         }
8233       else  /* TARGET_ARCH64 */
8234         {
8235           switch (sparc_cmodel)
8236             {
8237             case CM_MEDLOW:
8238             case CM_MEDMID:
8239               /* The destination can serve as a temporary.  */
8240               sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
8241               break;
8242
8243             case CM_MEDANY:
8244             case CM_EMBMEDANY:
8245               /* The destination cannot serve as a temporary.  */
8246               spill_reg = gen_rtx_REG (DImode, 15);  /* %o7 */
8247               start_sequence ();
8248               sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
8249               seq = get_insns ();
8250               end_sequence ();
8251               emit_and_preserve (seq, spill_reg);
8252               break;
8253
8254             default:
8255               gcc_unreachable ();
8256             }
8257         }
8258
8259       emit_jump_insn (gen_indirect_jump (scratch));
8260     }
8261
8262   emit_barrier ();
8263
8264   /* Run just enough of rest_of_compilation to get the insns emitted.
8265      There's not really enough bulk here to make other passes such as
8266      instruction scheduling worth while.  Note that use_thunk calls
8267      assemble_start_function and assemble_end_function.  */
8268   insn = get_insns ();
8269   insn_locators_initialize ();
8270   shorten_branches (insn);
8271   final_start_function (insn, file, 1);
8272   final (insn, file, 1);
8273   final_end_function ();
8274
8275   reload_completed = 0;
8276   epilogue_completed = 0;
8277   no_new_pseudos = 0;
8278 }
8279
8280 /* Return true if sparc_output_mi_thunk would be able to output the
8281    assembler code for the thunk function specified by the arguments
8282    it is passed, and false otherwise.  */
8283 static bool
8284 sparc_can_output_mi_thunk (tree thunk_fndecl ATTRIBUTE_UNUSED,
8285                            HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
8286                            HOST_WIDE_INT vcall_offset,
8287                            tree function ATTRIBUTE_UNUSED)
8288 {
8289   /* Bound the loop used in the default method above.  */
8290   return (vcall_offset >= -32768 || ! fixed_regs[5]);
8291 }
8292
8293 /* How to allocate a 'struct machine_function'.  */
8294
8295 static struct machine_function *
8296 sparc_init_machine_status (void)
8297 {
8298   return ggc_alloc_cleared (sizeof (struct machine_function));
8299 }
8300
8301 /* Locate some local-dynamic symbol still in use by this function
8302    so that we can print its name in local-dynamic base patterns.  */
8303
8304 static const char *
8305 get_some_local_dynamic_name (void)
8306 {
8307   rtx insn;
8308
8309   if (cfun->machine->some_ld_name)
8310     return cfun->machine->some_ld_name;
8311
8312   for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8313     if (INSN_P (insn)
8314         && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8315       return cfun->machine->some_ld_name;
8316
8317   gcc_unreachable ();
8318 }
8319
8320 static int
8321 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8322 {
8323   rtx x = *px;
8324
8325   if (x
8326       && GET_CODE (x) == SYMBOL_REF
8327       && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8328     {
8329       cfun->machine->some_ld_name = XSTR (x, 0);
8330       return 1;
8331     }
8332
8333   return 0;
8334 }
8335
8336 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
8337    This is called from dwarf2out.c to emit call frame instructions
8338    for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
8339 static void
8340 sparc_dwarf_handle_frame_unspec (const char *label,
8341                                  rtx pattern ATTRIBUTE_UNUSED,
8342                                  int index ATTRIBUTE_UNUSED)
8343 {
8344   gcc_assert (index == UNSPECV_SAVEW);
8345   dwarf2out_window_save (label);
8346 }
8347
8348 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
8349    We need to emit DTP-relative relocations.  */
8350
8351 void
8352 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
8353 {
8354   switch (size)
8355     {
8356     case 4:
8357       fputs ("\t.word\t%r_tls_dtpoff32(", file);
8358       break;
8359     case 8:
8360       fputs ("\t.xword\t%r_tls_dtpoff64(", file);
8361       break;
8362     default:
8363       gcc_unreachable ();
8364     }
8365   output_addr_const (file, x);
8366   fputs (")", file);
8367 }
8368
8369 static
8370 void sparc_file_end (void)
8371 {
8372   /* If we haven't emitted the special PIC helper function, do so now.  */
8373   if (pic_helper_symbol_name[0] && !pic_helper_emitted_p)
8374     emit_pic_helper ();
8375
8376   if (NEED_INDICATE_EXEC_STACK)
8377     file_end_indicate_exec_stack ();
8378 }
8379
8380 #include "gt-sparc.h"