gcc/config/alpha/alpha.c

   1 /* Subroutines used for code generation on the DEC Alpha.
   2    Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
   3    2000 Free Software Foundation, Inc.
   4    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
   5
   6 This file is part of GNU CC.
   7
   8 GNU CC is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU CC is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU CC; see the file COPYING.  If not, write to
  20 the Free Software Foundation, 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "rtl.h"
  27 #include "regs.h"
  28 #include "hard-reg-set.h"
  29 #include "real.h"
  30 #include "insn-config.h"
  31 #include "conditions.h"
  32 #include "insn-flags.h"
  33 #include "output.h"
  34 #include "insn-attr.h"
  35 #include "flags.h"
  36 #include "recog.h"
  37 #include "reload.h"
  38 #include "tree.h"
  39 #include "expr.h"
  40 #include "obstack.h"
  41 #include "except.h"
  42 #include "function.h"
  43 #include "toplev.h"
  44 #include "ggc.h"
  45 #include "tm_p.h"
  46
  47 /* External data.  */
  48 extern int rtx_equal_function_value_matters;
  49
  50 /* Specify which cpu to schedule for. */
  51
  52 enum processor_type alpha_cpu;
  53 static const char * const alpha_cpu_name[] =
  54 {
  55   "ev4", "ev5", "ev6"
  56 };
  57
  58 /* Specify how accurate floating-point traps need to be.  */
  59
  60 enum alpha_trap_precision alpha_tp;
  61
  62 /* Specify the floating-point rounding mode.  */
  63
  64 enum alpha_fp_rounding_mode alpha_fprm;
  65
  66 /* Specify which things cause traps.  */
  67
  68 enum alpha_fp_trap_mode alpha_fptm;
  69
  70 /* Strings decoded into the above options.  */
  71
  72 const char *alpha_cpu_string;   /* -mcpu= */
  73 const char *alpha_tune_string;  /* -mtune= */
  74 const char *alpha_tp_string;    /* -mtrap-precision=[p|s|i] */
  75 const char *alpha_fprm_string;  /* -mfp-rounding-mode=[n|m|c|d] */
  76 const char *alpha_fptm_string;  /* -mfp-trap-mode=[n|u|su|sui] */
  77 const char *alpha_mlat_string;  /* -mmemory-latency= */
  78
  79 /* Save information from a "cmpxx" operation until the branch or scc is
  80    emitted.  */
  81
  82 struct alpha_compare alpha_compare;
  83
  84 /* Non-zero if inside of a function, because the Alpha asm can't
  85    handle .files inside of functions.  */
  86
  87 static int inside_function = FALSE;
  88
  89 /* The number of cycles of latency we should assume on memory reads.  */
  90
  91 int alpha_memory_latency = 3;
  92
  93 /* Whether the function needs the GP.  */
  94
  95 static int alpha_function_needs_gp;
  96
  97 /* The alias set for prologue/epilogue register save/restore.  */
  98
  99 static int alpha_sr_alias_set;
 100
 101 /* The assembler name of the current function.  */
 102
 103 static const char *alpha_fnname;
 104
 105 /* Declarations of static functions.  */
 106 static void alpha_set_memflags_1
 107   PARAMS ((rtx, int, int, int));
 108 static rtx alpha_emit_set_const_1
 109   PARAMS ((rtx, enum machine_mode, HOST_WIDE_INT, int));
 110 static void alpha_expand_unaligned_load_words
 111   PARAMS ((rtx *out_regs, rtx smem, HOST_WIDE_INT words, HOST_WIDE_INT ofs));
 112 static void alpha_expand_unaligned_store_words
 113   PARAMS ((rtx *out_regs, rtx smem, HOST_WIDE_INT words, HOST_WIDE_INT ofs));
 114 static void alpha_sa_mask
 115   PARAMS ((unsigned long *imaskP, unsigned long *fmaskP));
 116 static int alpha_does_function_need_gp
 117   PARAMS ((void));
 118 static void alpha_init_machine_status
 119   PARAMS ((struct function *p));
 120 static void alpha_mark_machine_status
 121   PARAMS ((struct function *p));
 122 static int alpha_ra_ever_killed
 123   PARAMS ((void));
 124 static rtx set_frame_related_p
 125   PARAMS ((void));
 126 static const char *alpha_lookup_xfloating_lib_func
 127   PARAMS ((enum rtx_code));
 128 static int alpha_compute_xfloating_mode_arg
 129   PARAMS ((enum rtx_code, enum alpha_fp_rounding_mode));
 130 static void alpha_emit_xfloating_libcall
 131   PARAMS ((const char *, rtx, rtx[], int, rtx));
 132 static rtx alpha_emit_xfloating_compare
 133   PARAMS ((enum rtx_code, rtx, rtx));
 134
 135 /* Get the number of args of a function in one of two ways.  */
 136 #ifdef OPEN_VMS
 137 #define NUM_ARGS current_function_args_info.num_args
 138 #else
 139 #define NUM_ARGS current_function_args_info
 140 #endif
 141
 142 #define REG_PV 27
 143 #define REG_RA 26
 144 \f
 145 /* Parse target option strings. */
 146
 147 void
 148 override_options ()
 149 {
 150   int i;
 151   static struct cpu_table {
 152     const char *name;
 153     enum processor_type processor;
 154     int flags;
 155   } cpu_table[] = {
 156 #define EV5_MASK (MASK_CPU_EV5)
 157 #define EV6_MASK (MASK_CPU_EV6|MASK_BWX|MASK_MAX|MASK_FIX)
 158     { "ev4",    PROCESSOR_EV4, 0 },
 159     { "ev45",   PROCESSOR_EV4, 0 },
 160     { "21064",  PROCESSOR_EV4, 0 },
 161     { "ev5",    PROCESSOR_EV5, EV5_MASK },
 162     { "21164",  PROCESSOR_EV5, EV5_MASK },
 163     { "ev56",   PROCESSOR_EV5, EV5_MASK|MASK_BWX },
 164     { "21164a", PROCESSOR_EV5, EV5_MASK|MASK_BWX },
 165     { "pca56",  PROCESSOR_EV5, EV5_MASK|MASK_BWX|MASK_MAX },
 166     { "21164PC",PROCESSOR_EV5, EV5_MASK|MASK_BWX|MASK_MAX },
 167     { "21164pc",PROCESSOR_EV5, EV5_MASK|MASK_BWX|MASK_MAX },
 168     { "ev6",    PROCESSOR_EV6, EV6_MASK },
 169     { "21264",  PROCESSOR_EV6, EV6_MASK },
 170     { "ev67",   PROCESSOR_EV6, EV6_MASK|MASK_CIX },
 171     { "21264a", PROCESSOR_EV6, EV6_MASK|MASK_CIX },
 172     { 0, 0, 0 }
 173   };
 174
 175   alpha_tp = ALPHA_TP_PROG;
 176   alpha_fprm = ALPHA_FPRM_NORM;
 177   alpha_fptm = ALPHA_FPTM_N;
 178
 179   if (TARGET_IEEE)
 180     {
 181       alpha_tp = ALPHA_TP_INSN;
 182       alpha_fptm = ALPHA_FPTM_SU;
 183     }
 184
 185   if (TARGET_IEEE_WITH_INEXACT)
 186     {
 187       alpha_tp = ALPHA_TP_INSN;
 188       alpha_fptm = ALPHA_FPTM_SUI;
 189     }
 190
 191   if (alpha_tp_string)
 192     {
 193       if (! strcmp (alpha_tp_string, "p"))
 194         alpha_tp = ALPHA_TP_PROG;
 195       else if (! strcmp (alpha_tp_string, "f"))
 196         alpha_tp = ALPHA_TP_FUNC;
 197       else if (! strcmp (alpha_tp_string, "i"))
 198         alpha_tp = ALPHA_TP_INSN;
 199       else
 200         error ("bad value `%s' for -mtrap-precision switch", alpha_tp_string);
 201     }
 202
 203   if (alpha_fprm_string)
 204     {
 205       if (! strcmp (alpha_fprm_string, "n"))
 206         alpha_fprm = ALPHA_FPRM_NORM;
 207       else if (! strcmp (alpha_fprm_string, "m"))
 208         alpha_fprm = ALPHA_FPRM_MINF;
 209       else if (! strcmp (alpha_fprm_string, "c"))
 210         alpha_fprm = ALPHA_FPRM_CHOP;
 211       else if (! strcmp (alpha_fprm_string,"d"))
 212         alpha_fprm = ALPHA_FPRM_DYN;
 213       else
 214         error ("bad value `%s' for -mfp-rounding-mode switch",
 215                alpha_fprm_string);
 216     }
 217
 218   if (alpha_fptm_string)
 219     {
 220       if (strcmp (alpha_fptm_string, "n") == 0)
 221         alpha_fptm = ALPHA_FPTM_N;
 222       else if (strcmp (alpha_fptm_string, "u") == 0)
 223         alpha_fptm = ALPHA_FPTM_U;
 224       else if (strcmp (alpha_fptm_string, "su") == 0)
 225         alpha_fptm = ALPHA_FPTM_SU;
 226       else if (strcmp (alpha_fptm_string, "sui") == 0)
 227         alpha_fptm = ALPHA_FPTM_SUI;
 228       else
 229         error ("bad value `%s' for -mfp-trap-mode switch", alpha_fptm_string);
 230     }
 231
 232   alpha_cpu
 233     = TARGET_CPU_DEFAULT & MASK_CPU_EV6 ? PROCESSOR_EV6
 234       : (TARGET_CPU_DEFAULT & MASK_CPU_EV5 ? PROCESSOR_EV5 : PROCESSOR_EV4);
 235
 236   if (alpha_cpu_string)
 237     {
 238       for (i = 0; cpu_table [i].name; i++)
 239         if (! strcmp (alpha_cpu_string, cpu_table [i].name))
 240           {
 241             alpha_cpu = cpu_table [i].processor;
 242             target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX
 243                                | MASK_CPU_EV5 | MASK_CPU_EV6);
 244             target_flags |= cpu_table [i].flags;
 245             break;
 246           }
 247       if (! cpu_table [i].name)
 248         error ("bad value `%s' for -mcpu switch", alpha_cpu_string);
 249     }
 250
 251   if (alpha_tune_string)
 252     {
 253       for (i = 0; cpu_table [i].name; i++)
 254         if (! strcmp (alpha_tune_string, cpu_table [i].name))
 255           {
 256             alpha_cpu = cpu_table [i].processor;
 257             break;
 258           }
 259       if (! cpu_table [i].name)
 260         error ("bad value `%s' for -mcpu switch", alpha_tune_string);
 261     }
 262
 263   /* Do some sanity checks on the above options. */
 264
 265   if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
 266       && alpha_tp != ALPHA_TP_INSN && ! TARGET_CPU_EV6)
 267     {
 268       warning ("fp software completion requires -mtrap-precision=i");
 269       alpha_tp = ALPHA_TP_INSN;
 270     }
 271
 272   if (TARGET_CPU_EV6)
 273     {
 274       /* Except for EV6 pass 1 (not released), we always have precise
 275          arithmetic traps.  Which means we can do software completion
 276          without minding trap shadows.  */
 277       alpha_tp = ALPHA_TP_PROG;
 278     }
 279
 280   if (TARGET_FLOAT_VAX)
 281     {
 282       if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
 283         {
 284           warning ("rounding mode not supported for VAX floats");
 285           alpha_fprm = ALPHA_FPRM_NORM;
 286         }
 287       if (alpha_fptm == ALPHA_FPTM_SUI)
 288         {
 289           warning ("trap mode not supported for VAX floats");
 290           alpha_fptm = ALPHA_FPTM_SU;
 291         }
 292     }
 293
 294   {
 295     char *end;
 296     int lat;
 297
 298     if (!alpha_mlat_string)
 299       alpha_mlat_string = "L1";
 300
 301     if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
 302         && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
 303       ;
 304     else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
 305              && ISDIGIT ((unsigned char)alpha_mlat_string[1])
 306              && alpha_mlat_string[2] == '\0')
 307       {
 308         static int const cache_latency[][4] =
 309         {
 310           { 3, 30, -1 },        /* ev4 -- Bcache is a guess */
 311           { 2, 12, 38 },        /* ev5 -- Bcache from PC164 LMbench numbers */
 312           { 3, 12, 30 },        /* ev6 -- Bcache from DS20 LMbench. */
 313         };
 314
 315         lat = alpha_mlat_string[1] - '0';
 316         if (lat <= 0 || lat > 3 || cache_latency[alpha_cpu][lat-1] == -1)
 317           {
 318             warning ("L%d cache latency unknown for %s",
 319                      lat, alpha_cpu_name[alpha_cpu]);
 320             lat = 3;
 321           }
 322         else
 323           lat = cache_latency[alpha_cpu][lat-1];
 324       }
 325     else if (! strcmp (alpha_mlat_string, "main"))
 326       {
 327         /* Most current memories have about 370ns latency.  This is
 328            a reasonable guess for a fast cpu.  */
 329         lat = 150;
 330       }
 331     else
 332       {
 333         warning ("bad value `%s' for -mmemory-latency", alpha_mlat_string);
 334         lat = 3;
 335       }
 336
 337     alpha_memory_latency = lat;
 338   }
 339
 340   /* Default the definition of "small data" to 8 bytes.  */
 341   if (!g_switch_set)
 342     g_switch_value = 8;
 343
 344   /* Acquire a unique set number for our register saves and restores.  */
 345   alpha_sr_alias_set = new_alias_set ();
 346
 347   /* Set up function hooks.  */
 348   init_machine_status = alpha_init_machine_status;
 349   mark_machine_status = alpha_mark_machine_status;
 350 }
 351 \f
 352 /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones.  */
 353
 354 int
 355 zap_mask (value)
 356      HOST_WIDE_INT value;
 357 {
 358   int i;
 359
 360   for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
 361        i++, value >>= 8)
 362     if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
 363       return 0;
 364
 365   return 1;
 366 }
 367
 368 /* Returns 1 if OP is either the constant zero or a register.  If a
 369    register, it must be in the proper mode unless MODE is VOIDmode.  */
 370
 371 int
 372 reg_or_0_operand (op, mode)
 373       register rtx op;
 374       enum machine_mode mode;
 375 {
 376   return op == const0_rtx || register_operand (op, mode);
 377 }
 378
 379 /* Return 1 if OP is a constant in the range of 0-63 (for a shift) or
 380    any register.  */
 381
 382 int
 383 reg_or_6bit_operand (op, mode)
 384      register rtx op;
 385      enum machine_mode mode;
 386 {
 387   return ((GET_CODE (op) == CONST_INT
 388            && (unsigned HOST_WIDE_INT) INTVAL (op) < 64)
 389           || register_operand (op, mode));
 390 }
 391
 392
 393 /* Return 1 if OP is an 8-bit constant or any register.  */
 394
 395 int
 396 reg_or_8bit_operand (op, mode)
 397      register rtx op;
 398      enum machine_mode mode;
 399 {
 400   return ((GET_CODE (op) == CONST_INT
 401            && (unsigned HOST_WIDE_INT) INTVAL (op) < 0x100)
 402           || register_operand (op, mode));
 403 }
 404
 405 /* Return 1 if OP is an 8-bit constant.  */
 406
 407 int
 408 cint8_operand (op, mode)
 409      register rtx op;
 410      enum machine_mode mode ATTRIBUTE_UNUSED;
 411 {
 412   return ((GET_CODE (op) == CONST_INT
 413            && (unsigned HOST_WIDE_INT) INTVAL (op) < 0x100));
 414 }
 415
 416 /* Return 1 if the operand is a valid second operand to an add insn.  */
 417
 418 int
 419 add_operand (op, mode)
 420      register rtx op;
 421      enum machine_mode mode;
 422 {
 423   if (GET_CODE (op) == CONST_INT)
 424     /* Constraints I, J, O and P are covered by K.  */
 425     return (CONST_OK_FOR_LETTER_P (INTVAL (op), 'K')
 426             || CONST_OK_FOR_LETTER_P (INTVAL (op), 'L'));
 427
 428   return register_operand (op, mode);
 429 }
 430
 431 /* Return 1 if the operand is a valid second operand to a sign-extending
 432    add insn.  */
 433
 434 int
 435 sext_add_operand (op, mode)
 436      register rtx op;
 437      enum machine_mode mode;
 438 {
 439   if (GET_CODE (op) == CONST_INT)
 440     return (CONST_OK_FOR_LETTER_P (INTVAL (op), 'I')
 441             || CONST_OK_FOR_LETTER_P (INTVAL (op), 'O'));
 442
 443   return reg_not_elim_operand (op, mode);
 444 }
 445
 446 /* Return 1 if OP is the constant 4 or 8.  */
 447
 448 int
 449 const48_operand (op, mode)
 450      register rtx op;
 451      enum machine_mode mode ATTRIBUTE_UNUSED;
 452 {
 453   return (GET_CODE (op) == CONST_INT
 454           && (INTVAL (op) == 4 || INTVAL (op) == 8));
 455 }
 456
 457 /* Return 1 if OP is a valid first operand to an AND insn.  */
 458
 459 int
 460 and_operand (op, mode)
 461      register rtx op;
 462      enum machine_mode mode;
 463 {
 464   if (GET_CODE (op) == CONST_DOUBLE && GET_MODE (op) == VOIDmode)
 465     return (zap_mask (CONST_DOUBLE_LOW (op))
 466             && zap_mask (CONST_DOUBLE_HIGH (op)));
 467
 468   if (GET_CODE (op) == CONST_INT)
 469     return ((unsigned HOST_WIDE_INT) INTVAL (op) < 0x100
 470             || (unsigned HOST_WIDE_INT) ~ INTVAL (op) < 0x100
 471             || zap_mask (INTVAL (op)));
 472
 473   return register_operand (op, mode);
 474 }
 475
 476 /* Return 1 if OP is a valid first operand to an IOR or XOR insn.  */
 477
 478 int
 479 or_operand (op, mode)
 480      register rtx op;
 481      enum machine_mode mode;
 482 {
 483   if (GET_CODE (op) == CONST_INT)
 484     return ((unsigned HOST_WIDE_INT) INTVAL (op) < 0x100
 485             || (unsigned HOST_WIDE_INT) ~ INTVAL (op) < 0x100);
 486
 487   return register_operand (op, mode);
 488 }
 489
 490 /* Return 1 if OP is a constant that is the width, in bits, of an integral
 491    mode smaller than DImode.  */
 492
 493 int
 494 mode_width_operand (op, mode)
 495      register rtx op;
 496      enum machine_mode mode ATTRIBUTE_UNUSED;
 497 {
 498   return (GET_CODE (op) == CONST_INT
 499           && (INTVAL (op) == 8 || INTVAL (op) == 16
 500               || INTVAL (op) == 32 || INTVAL (op) == 64));
 501 }
 502
 503 /* Return 1 if OP is a constant that is the width of an integral machine mode
 504    smaller than an integer.  */
 505
 506 int
 507 mode_mask_operand (op, mode)
 508      register rtx op;
 509      enum machine_mode mode ATTRIBUTE_UNUSED;
 510 {
 511 #if HOST_BITS_PER_WIDE_INT == 32
 512   if (GET_CODE (op) == CONST_DOUBLE)
 513     return (CONST_DOUBLE_LOW (op) == -1
 514             && (CONST_DOUBLE_HIGH (op) == -1
 515                 || CONST_DOUBLE_HIGH (op) == 0));
 516 #else
 517   if (GET_CODE (op) == CONST_DOUBLE)
 518     return (CONST_DOUBLE_LOW (op) == -1 && CONST_DOUBLE_HIGH (op) == 0);
 519 #endif
 520
 521   return (GET_CODE (op) == CONST_INT
 522           && (INTVAL (op) == 0xff
 523               || INTVAL (op) == 0xffff
 524               || INTVAL (op) == (HOST_WIDE_INT)0xffffffff
 525 #if HOST_BITS_PER_WIDE_INT == 64
 526               || INTVAL (op) == -1
 527 #endif
 528               ));
 529 }
 530
 531 /* Return 1 if OP is a multiple of 8 less than 64.  */
 532
 533 int
 534 mul8_operand (op, mode)
 535      register rtx op;
 536      enum machine_mode mode ATTRIBUTE_UNUSED;
 537 {
 538   return (GET_CODE (op) == CONST_INT
 539           && (unsigned HOST_WIDE_INT) INTVAL (op) < 64
 540           && (INTVAL (op) & 7) == 0);
 541 }
 542
 543 /* Return 1 if OP is the constant zero in floating-point.  */
 544
 545 int
 546 fp0_operand (op, mode)
 547      register rtx op;
 548      enum machine_mode mode;
 549 {
 550   return (GET_MODE (op) == mode
 551           && GET_MODE_CLASS (mode) == MODE_FLOAT && op == CONST0_RTX (mode));
 552 }
 553
 554 /* Return 1 if OP is the floating-point constant zero or a register.  */
 555
 556 int
 557 reg_or_fp0_operand (op, mode)
 558      register rtx op;
 559      enum machine_mode mode;
 560 {
 561   return fp0_operand (op, mode) || register_operand (op, mode);
 562 }
 563
 564 /* Return 1 if OP is a hard floating-point register.  */
 565
 566 int
 567 hard_fp_register_operand (op, mode)
 568      register rtx op;
 569      enum machine_mode mode;
 570 {
 571   if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
 572     return 0;
 573
 574   if (GET_CODE (op) == SUBREG)
 575     op = SUBREG_REG (op);
 576   return GET_CODE (op) == REG && REGNO_REG_CLASS (REGNO (op)) == FLOAT_REGS;
 577 }
 578
 579 /* Return 1 if OP is a hard general register.  */
 580
 581 int
 582 hard_int_register_operand (op, mode)
 583      register rtx op;
 584      enum machine_mode mode;
 585 {
 586   if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
 587     return 0;
 588
 589   if (GET_CODE (op) == SUBREG)
 590     op = SUBREG_REG (op);
 591   return GET_CODE (op) == REG && REGNO_REG_CLASS (REGNO (op)) == GENERAL_REGS;
 592 }
 593
 594 /* Return 1 if OP is a register or a constant integer.  */
 595
 596
 597 int
 598 reg_or_cint_operand (op, mode)
 599     register rtx op;
 600     enum machine_mode mode;
 601 {
 602      return (GET_CODE (op) == CONST_INT
 603              || register_operand (op, mode));
 604 }
 605
 606 /* Return 1 if OP is something that can be reloaded into a register;
 607    if it is a MEM, it need not be valid.  */
 608
 609 int
 610 some_operand (op, mode)
 611      register rtx op;
 612      enum machine_mode mode;
 613 {
 614   if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
 615     return 0;
 616
 617   switch (GET_CODE (op))
 618     {
 619     case REG:  case MEM:  case CONST_DOUBLE:  case CONST_INT:  case LABEL_REF:
 620     case SYMBOL_REF:  case CONST:
 621       return 1;
 622
 623     case SUBREG:
 624       return some_operand (SUBREG_REG (op), VOIDmode);
 625
 626     default:
 627       break;
 628     }
 629
 630   return 0;
 631 }
 632
 633 /* Likewise, but don't accept constants.  */
 634
 635 int
 636 some_ni_operand (op, mode)
 637      register rtx op;
 638      enum machine_mode mode;
 639 {
 640   if (GET_MODE (op) != mode && mode != VOIDmode)
 641     return 0;
 642
 643   if (GET_CODE (op) == SUBREG)
 644     op = SUBREG_REG (op);
 645
 646   return (GET_CODE (op) == REG || GET_CODE (op) == MEM);
 647 }
 648
 649 /* Return 1 if OP is a valid operand for the source of a move insn.  */
 650
 651 int
 652 input_operand (op, mode)
 653      register rtx op;
 654      enum machine_mode mode;
 655 {
 656   if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
 657     return 0;
 658
 659   if (GET_MODE_CLASS (mode) == MODE_FLOAT && GET_MODE (op) != mode)
 660     return 0;
 661
 662   switch (GET_CODE (op))
 663     {
 664     case LABEL_REF:
 665     case SYMBOL_REF:
 666     case CONST:
 667       /* This handles both the Windows/NT and OSF cases.  */
 668       return mode == ptr_mode || mode == DImode;
 669
 670     case REG:
 671     case ADDRESSOF:
 672       return 1;
 673
 674     case SUBREG:
 675       if (register_operand (op, mode))
 676         return 1;
 677       /* ... fall through ... */
 678     case MEM:
 679       return ((TARGET_BWX || (mode != HImode && mode != QImode))
 680               && general_operand (op, mode));
 681
 682     case CONST_DOUBLE:
 683       return GET_MODE_CLASS (mode) == MODE_FLOAT && op == CONST0_RTX (mode);
 684
 685     case CONST_INT:
 686       return mode == QImode || mode == HImode || add_operand (op, mode);
 687
 688     case CONSTANT_P_RTX:
 689       return 1;
 690
 691     default:
 692       break;
 693     }
 694
 695   return 0;
 696 }
 697
 698 /* Return 1 if OP is a SYMBOL_REF for a function known to be in this
 699    file.  */
 700
 701 int
 702 current_file_function_operand (op, mode)
 703      rtx op;
 704      enum machine_mode mode ATTRIBUTE_UNUSED;
 705 {
 706   return (GET_CODE (op) == SYMBOL_REF
 707           && ! profile_flag && ! profile_block_flag
 708           && (SYMBOL_REF_FLAG (op)
 709               || op == XEXP (DECL_RTL (current_function_decl), 0)));
 710 }
 711
 712 /* Return 1 if OP is a valid operand for the MEM of a CALL insn.  */
 713
 714 int
 715 call_operand (op, mode)
 716      rtx op;
 717      enum machine_mode mode;
 718 {
 719   if (mode != Pmode)
 720     return 0;
 721
 722   return (GET_CODE (op) == SYMBOL_REF
 723           || (GET_CODE (op) == REG
 724               && (TARGET_OPEN_VMS || TARGET_WINDOWS_NT || REGNO (op) == 27)));
 725 }
 726
 727 /* Return 1 if OP is a valid Alpha comparison operator.  Here we know which
 728    comparisons are valid in which insn.  */
 729
 730 int
 731 alpha_comparison_operator (op, mode)
 732      register rtx op;
 733      enum machine_mode mode;
 734 {
 735   enum rtx_code code = GET_CODE (op);
 736
 737   if (mode != GET_MODE (op) && mode != VOIDmode)
 738     return 0;
 739
 740   return (code == EQ || code == LE || code == LT
 741           || code == LEU || code == LTU);
 742 }
 743
 744 /* Return 1 if OP is a valid Alpha comparison operator against zero.
 745    Here we know which comparisons are valid in which insn.  */
 746
 747 int
 748 alpha_zero_comparison_operator (op, mode)
 749      register rtx op;
 750      enum machine_mode mode;
 751 {
 752   enum rtx_code code = GET_CODE (op);
 753
 754   if (mode != GET_MODE (op) && mode != VOIDmode)
 755     return 0;
 756
 757   return (code == EQ || code == NE || code == LE || code == LT
 758           || code == LEU || code == LTU);
 759 }
 760
 761 /* Return 1 if OP is a valid Alpha swapped comparison operator.  */
 762
 763 int
 764 alpha_swapped_comparison_operator (op, mode)
 765      register rtx op;
 766      enum machine_mode mode;
 767 {
 768   enum rtx_code code = GET_CODE (op);
 769
 770   if ((mode != GET_MODE (op) && mode != VOIDmode)
 771       || GET_RTX_CLASS (code) != '<')
 772     return 0;
 773
 774   code = swap_condition (code);
 775   return (code == EQ || code == LE || code == LT
 776           || code == LEU || code == LTU);
 777 }
 778
 779 /* Return 1 if OP is a signed comparison operation.  */
 780
 781 int
 782 signed_comparison_operator (op, mode)
 783      register rtx op;
 784      enum machine_mode mode ATTRIBUTE_UNUSED;
 785 {
 786   enum rtx_code code = GET_CODE (op);
 787
 788   if (mode != GET_MODE (op) && mode != VOIDmode)
 789     return 0;
 790
 791   return (code == EQ || code == NE
 792           || code == LE || code == LT
 793           || code == GE || code == GT);
 794 }
 795
 796 /* Return 1 if OP is a valid Alpha floating point comparison operator.
 797    Here we know which comparisons are valid in which insn.  */
 798
 799 int
 800 alpha_fp_comparison_operator (op, mode)
 801      register rtx op;
 802      enum machine_mode mode;
 803 {
 804   enum rtx_code code = GET_CODE (op);
 805
 806   if (mode != GET_MODE (op) && mode != VOIDmode)
 807     return 0;
 808
 809   return (code == EQ || code == LE || code == LT || code == UNORDERED);
 810 }
 811
 812 /* Return 1 if this is a divide or modulus operator.  */
 813
 814 int
 815 divmod_operator (op, mode)
 816      register rtx op;
 817      enum machine_mode mode ATTRIBUTE_UNUSED;
 818 {
 819   switch (GET_CODE (op))
 820     {
 821     case DIV:  case MOD:  case UDIV:  case UMOD:
 822       return 1;
 823
 824     default:
 825       break;
 826     }
 827
 828   return 0;
 829 }
 830
 831 /* Return 1 if this memory address is a known aligned register plus
 832    a constant.  It must be a valid address.  This means that we can do
 833    this as an aligned reference plus some offset.
 834
 835    Take into account what reload will do.  */
 836
 837 int
 838 aligned_memory_operand (op, mode)
 839      register rtx op;
 840      enum machine_mode mode;
 841 {
 842   rtx base;
 843
 844   if (reload_in_progress)
 845     {
 846       rtx tmp = op;
 847       if (GET_CODE (tmp) == SUBREG)
 848         tmp = SUBREG_REG (tmp);
 849       if (GET_CODE (tmp) == REG
 850           && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
 851         {
 852           op = reg_equiv_memory_loc[REGNO (tmp)];
 853           if (op == 0)
 854             return 0;
 855         }
 856     }
 857
 858   if (GET_CODE (op) != MEM
 859       || GET_MODE (op) != mode)
 860     return 0;
 861   op = XEXP (op, 0);
 862
 863   /* LEGITIMIZE_RELOAD_ADDRESS creates (plus (plus reg const_hi) const_lo)
 864      sorts of constructs.  Dig for the real base register.  */
 865   if (reload_in_progress
 866       && GET_CODE (op) == PLUS
 867       && GET_CODE (XEXP (op, 0)) == PLUS)
 868     base = XEXP (XEXP (op, 0), 0);
 869   else
 870     {
 871       if (! memory_address_p (mode, op))
 872         return 0;
 873       base = (GET_CODE (op) == PLUS ? XEXP (op, 0) : op);
 874     }
 875
 876   return (GET_CODE (base) == REG && REGNO_POINTER_ALIGN (REGNO (base)) >= 32);
 877 }
 878
 879 /* Similar, but return 1 if OP is a MEM which is not alignable.  */
 880
 881 int
 882 unaligned_memory_operand (op, mode)
 883      register rtx op;
 884      enum machine_mode mode;
 885 {
 886   rtx base;
 887
 888   if (reload_in_progress)
 889     {
 890       rtx tmp = op;
 891       if (GET_CODE (tmp) == SUBREG)
 892         tmp = SUBREG_REG (tmp);
 893       if (GET_CODE (tmp) == REG
 894           && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
 895         {
 896           op = reg_equiv_memory_loc[REGNO (tmp)];
 897           if (op == 0)
 898             return 0;
 899         }
 900     }
 901
 902   if (GET_CODE (op) != MEM
 903       || GET_MODE (op) != mode)
 904     return 0;
 905   op = XEXP (op, 0);
 906
 907   /* LEGITIMIZE_RELOAD_ADDRESS creates (plus (plus reg const_hi) const_lo)
 908      sorts of constructs.  Dig for the real base register.  */
 909   if (reload_in_progress
 910       && GET_CODE (op) == PLUS
 911       && GET_CODE (XEXP (op, 0)) == PLUS)
 912     base = XEXP (XEXP (op, 0), 0);
 913   else
 914     {
 915       if (! memory_address_p (mode, op))
 916         return 0;
 917       base = (GET_CODE (op) == PLUS ? XEXP (op, 0) : op);
 918     }
 919
 920   return (GET_CODE (base) == REG && REGNO_POINTER_ALIGN (REGNO (base)) < 32);
 921 }
 922
 923 /* Return 1 if OP is either a register or an unaligned memory location.  */
 924
 925 int
 926 reg_or_unaligned_mem_operand (op, mode)
 927      rtx op;
 928      enum machine_mode mode;
 929 {
 930   return register_operand (op, mode) || unaligned_memory_operand (op, mode);
 931 }
 932
 933 /* Return 1 if OP is any memory location.  During reload a pseudo matches.  */
 934
 935 int
 936 any_memory_operand (op, mode)
 937      register rtx op;
 938      enum machine_mode mode ATTRIBUTE_UNUSED;
 939 {
 940   return (GET_CODE (op) == MEM
 941           || (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
 942           || (reload_in_progress && GET_CODE (op) == REG
 943               && REGNO (op) >= FIRST_PSEUDO_REGISTER)
 944           || (reload_in_progress && GET_CODE (op) == SUBREG
 945               && GET_CODE (SUBREG_REG (op)) == REG
 946               && REGNO (SUBREG_REG (op)) >= FIRST_PSEUDO_REGISTER));
 947 }
 948
 949 /* Returns 1 if OP is not an eliminable register.
 950
 951    This exists to cure a pathological abort in the s8addq (et al) patterns,
 952
 953         long foo () { long t; bar(); return (long) &t * 26107; }
 954
 955    which run afoul of a hack in reload to cure a (presumably) similar
 956    problem with lea-type instructions on other targets.  But there is
 957    one of us and many of them, so work around the problem by selectively
 958    preventing combine from making the optimization.  */
 959
 960 int
 961 reg_not_elim_operand (op, mode)
 962       register rtx op;
 963       enum machine_mode mode;
 964 {
 965   rtx inner = op;
 966   if (GET_CODE (op) == SUBREG)
 967     inner = SUBREG_REG (op);
 968   if (inner == frame_pointer_rtx || inner == arg_pointer_rtx)
 969     return 0;
 970
 971   return register_operand (op, mode);
 972 }
 973
 974 /* Return 1 is OP is a memory location that is not a reference (using
 975    an AND) to an unaligned location.  Take into account what reload
 976    will do.  */
 977
 978 int
 979 normal_memory_operand (op, mode)
 980      register rtx op;
 981      enum machine_mode mode ATTRIBUTE_UNUSED;
 982 {
 983   if (reload_in_progress)
 984     {
 985       rtx tmp = op;
 986       if (GET_CODE (tmp) == SUBREG)
 987         tmp = SUBREG_REG (tmp);
 988       if (GET_CODE (tmp) == REG
 989           && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
 990         {
 991           op = reg_equiv_memory_loc[REGNO (tmp)];
 992
 993           /* This may not have been assigned an equivalent address if it will
 994              be eliminated.  In that case, it doesn't matter what we do.  */
 995           if (op == 0)
 996             return 1;
 997         }
 998     }
 999
1000   return GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) != AND;
1001 }
1002
1003 /* Accept a register, but not a subreg of any kind.  This allows us to
1004    avoid pathological cases in reload wrt data movement common in
1005    int->fp conversion.  */
1006
1007 int
1008 reg_no_subreg_operand (op, mode)
1009      register rtx op;
1010      enum machine_mode mode;
1011 {
1012   if (GET_CODE (op) == SUBREG)
1013     return 0;
1014   return register_operand (op, mode);
1015 }
1016
1017 /* Recognize a addition operation that includes a constant.  Used to
1018    convince reload to canonize (plus (plus reg c1) c2) during register
1019    elimination.  */
1020
1021 int
1022 addition_operation (op, mode)
1023      register rtx op;
1024      enum machine_mode mode;
1025 {
1026   if (GET_MODE (op) != mode && mode != VOIDmode)
1027     return 0;
1028   if (GET_CODE (op) == PLUS
1029       && register_operand (XEXP (op, 0), mode)
1030       && GET_CODE (XEXP (op, 1)) == CONST_INT
1031       && CONST_OK_FOR_LETTER_P (INTVAL (XEXP (op, 1)), 'K'))
1032     return 1;
1033   return 0;
1034 }
1035
1036 /* Return 1 if this function can directly return via $26.  */
1037
1038 int
1039 direct_return ()
1040 {
1041   return (! TARGET_OPEN_VMS && reload_completed && alpha_sa_size () == 0
1042           && get_frame_size () == 0
1043           && current_function_outgoing_args_size == 0
1044           && current_function_pretend_args_size == 0);
1045 }
1046 \f
1047 /* REF is an alignable memory location.  Place an aligned SImode
1048    reference into *PALIGNED_MEM and the number of bits to shift into
1049    *PBITNUM.  SCRATCH is a free register for use in reloading out
1050    of range stack slots.  */
1051
1052 void
1053 get_aligned_mem (ref, paligned_mem, pbitnum)
1054      rtx ref;
1055      rtx *paligned_mem, *pbitnum;
1056 {
1057   rtx base;
1058   HOST_WIDE_INT offset = 0;
1059
1060   if (GET_CODE (ref) != MEM)
1061     abort ();
1062
1063   if (reload_in_progress
1064       && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1065     {
1066       base = find_replacement (&XEXP (ref, 0));
1067
1068       if (! memory_address_p (GET_MODE (ref), base))
1069         abort ();
1070     }
1071   else
1072     {
1073       base = XEXP (ref, 0);
1074     }
1075
1076   if (GET_CODE (base) == PLUS)
1077     offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1078
1079   *paligned_mem = gen_rtx_MEM (SImode, plus_constant (base, offset & ~3));
1080   MEM_COPY_ATTRIBUTES (*paligned_mem, ref);
1081
1082   /* Sadly, we cannot use alias sets here because we may overlap other
1083      data in a different alias set.  */
1084   MEM_ALIAS_SET (*paligned_mem) = 0;
1085
1086   *pbitnum = GEN_INT ((offset & 3) * 8);
1087 }
1088
1089 /* Similar, but just get the address.  Handle the two reload cases.
1090    Add EXTRA_OFFSET to the address we return.  */
1091
1092 rtx
1093 get_unaligned_address (ref, extra_offset)
1094      rtx ref;
1095      int extra_offset;
1096 {
1097   rtx base;
1098   HOST_WIDE_INT offset = 0;
1099
1100   if (GET_CODE (ref) != MEM)
1101     abort ();
1102
1103   if (reload_in_progress
1104       && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1105     {
1106       base = find_replacement (&XEXP (ref, 0));
1107
1108       if (! memory_address_p (GET_MODE (ref), base))
1109         abort ();
1110     }
1111   else
1112     {
1113       base = XEXP (ref, 0);
1114     }
1115
1116   if (GET_CODE (base) == PLUS)
1117     offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1118
1119   return plus_constant (base, offset + extra_offset);
1120 }
1121
1122 /* Loading and storing HImode or QImode values to and from memory
1123    usually requires a scratch register.  The exceptions are loading
1124    QImode and HImode from an aligned address to a general register
1125    unless byte instructions are permitted.
1126
1127    We also cannot load an unaligned address or a paradoxical SUBREG
1128    into an FP register.
1129
1130    We also cannot do integral arithmetic into FP regs, as might result
1131    from register elimination into a DImode fp register.  */
1132
1133 enum reg_class
1134 secondary_reload_class (class, mode, x, in)
1135      enum reg_class class;
1136      enum machine_mode mode;
1137      rtx x;
1138      int in;
1139 {
1140   if ((mode == QImode || mode == HImode) && ! TARGET_BWX)
1141     {
1142       if (GET_CODE (x) == MEM
1143           || (GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
1144           || (GET_CODE (x) == SUBREG
1145               && (GET_CODE (SUBREG_REG (x)) == MEM
1146                   || (GET_CODE (SUBREG_REG (x)) == REG
1147                       && REGNO (SUBREG_REG (x)) >= FIRST_PSEUDO_REGISTER))))
1148         {
1149           if (!in || !aligned_memory_operand(x, mode))
1150             return GENERAL_REGS;
1151         }
1152     }
1153
1154   if (class == FLOAT_REGS)
1155     {
1156       if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == AND)
1157         return GENERAL_REGS;
1158
1159       if (GET_CODE (x) == SUBREG
1160           && (GET_MODE_SIZE (GET_MODE (x))
1161               > GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
1162         return GENERAL_REGS;
1163
1164       if (in && INTEGRAL_MODE_P (mode) && ! general_operand (x, mode))
1165         return GENERAL_REGS;
1166     }
1167
1168   return NO_REGS;
1169 }
1170 \f
1171 /* Subfunction of the following function.  Update the flags of any MEM
1172    found in part of X.  */
1173
1174 static void
1175 alpha_set_memflags_1 (x, in_struct_p, volatile_p, unchanging_p)
1176      rtx x;
1177      int in_struct_p, volatile_p, unchanging_p;
1178 {
1179   int i;
1180
1181   switch (GET_CODE (x))
1182     {
1183     case SEQUENCE:
1184     case PARALLEL:
1185       for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
1186         alpha_set_memflags_1 (XVECEXP (x, 0, i), in_struct_p, volatile_p,
1187                               unchanging_p);
1188       break;
1189
1190     case INSN:
1191       alpha_set_memflags_1 (PATTERN (x), in_struct_p, volatile_p,
1192                             unchanging_p);
1193       break;
1194
1195     case SET:
1196       alpha_set_memflags_1 (SET_DEST (x), in_struct_p, volatile_p,
1197                             unchanging_p);
1198       alpha_set_memflags_1 (SET_SRC (x), in_struct_p, volatile_p,
1199                             unchanging_p);
1200       break;
1201
1202     case MEM:
1203       MEM_IN_STRUCT_P (x) = in_struct_p;
1204       MEM_VOLATILE_P (x) = volatile_p;
1205       RTX_UNCHANGING_P (x) = unchanging_p;
1206       /* Sadly, we cannot use alias sets because the extra aliasing
1207          produced by the AND interferes.  Given that two-byte quantities
1208          are the only thing we would be able to differentiate anyway,
1209          there does not seem to be any point in convoluting the early
1210          out of the alias check.  */
1211       /* MEM_ALIAS_SET (x) = alias_set; */
1212       break;
1213
1214     default:
1215       break;
1216     }
1217 }
1218
1219 /* Given INSN, which is either an INSN or a SEQUENCE generated to
1220    perform a memory operation, look for any MEMs in either a SET_DEST or
1221    a SET_SRC and copy the in-struct, unchanging, and volatile flags from
1222    REF into each of the MEMs found.  If REF is not a MEM, don't do
1223    anything.  */
1224
1225 void
1226 alpha_set_memflags (insn, ref)
1227      rtx insn;
1228      rtx ref;
1229 {
1230   int in_struct_p, volatile_p, unchanging_p;
1231
1232   if (GET_CODE (ref) != MEM)
1233     return;
1234
1235   in_struct_p = MEM_IN_STRUCT_P (ref);
1236   volatile_p = MEM_VOLATILE_P (ref);
1237   unchanging_p = RTX_UNCHANGING_P (ref);
1238
1239   /* This is only called from alpha.md, after having had something
1240      generated from one of the insn patterns.  So if everything is
1241      zero, the pattern is already up-to-date.  */
1242   if (! in_struct_p && ! volatile_p && ! unchanging_p)
1243     return;
1244
1245   alpha_set_memflags_1 (insn, in_struct_p, volatile_p, unchanging_p);
1246 }
1247 \f
1248 /* Try to output insns to set TARGET equal to the constant C if it can be
1249    done in less than N insns.  Do all computations in MODE.  Returns the place
1250    where the output has been placed if it can be done and the insns have been
1251    emitted.  If it would take more than N insns, zero is returned and no
1252    insns and emitted.  */
1253
1254 rtx
1255 alpha_emit_set_const (target, mode, c, n)
1256      rtx target;
1257      enum machine_mode mode;
1258      HOST_WIDE_INT c;
1259      int n;
1260 {
1261   rtx pat;
1262   int i;
1263
1264   /* Try 1 insn, then 2, then up to N. */
1265   for (i = 1; i <= n; i++)
1266     if ((pat = alpha_emit_set_const_1 (target, mode, c, i)) != 0)
1267       return pat;
1268
1269   return 0;
1270 }
1271
1272 /* Internal routine for the above to check for N or below insns.  */
1273
1274 static rtx
1275 alpha_emit_set_const_1 (target, mode, c, n)
1276      rtx target;
1277      enum machine_mode mode;
1278      HOST_WIDE_INT c;
1279      int n;
1280 {
1281   HOST_WIDE_INT new;
1282   int i, bits;
1283   /* Use a pseudo if highly optimizing and still generating RTL.  */
1284   rtx subtarget
1285     = (flag_expensive_optimizations && rtx_equal_function_value_matters
1286        ? 0 : target);
1287   rtx temp;
1288
1289 #if HOST_BITS_PER_WIDE_INT == 64
1290   /* We are only called for SImode and DImode.  If this is SImode, ensure that
1291      we are sign extended to a full word.  This does not make any sense when
1292      cross-compiling on a narrow machine.  */
1293
1294   if (mode == SImode)
1295     c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
1296 #endif
1297
1298   /* If this is a sign-extended 32-bit constant, we can do this in at most
1299      three insns, so do it if we have enough insns left.  We always have
1300      a sign-extended 32-bit constant when compiling on a narrow machine.   */
1301
1302   if (HOST_BITS_PER_WIDE_INT != 64
1303       || c >> 31 == -1 || c >> 31 == 0)
1304     {
1305       HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
1306       HOST_WIDE_INT tmp1 = c - low;
1307       HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
1308       HOST_WIDE_INT extra = 0;
1309
1310       /* If HIGH will be interpreted as negative but the constant is
1311          positive, we must adjust it to do two ldha insns.  */
1312
1313       if ((high & 0x8000) != 0 && c >= 0)
1314         {
1315           extra = 0x4000;
1316           tmp1 -= 0x40000000;
1317           high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
1318         }
1319
1320       if (c == low || (low == 0 && extra == 0))
1321         {
1322           /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
1323              but that meant that we can't handle INT_MIN on 32-bit machines
1324              (like NT/Alpha), because we recurse indefinitely through
1325              emit_move_insn to gen_movdi.  So instead, since we know exactly
1326              what we want, create it explicitly.  */
1327
1328           if (target == NULL)
1329             target = gen_reg_rtx (mode);
1330           emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (c)));
1331           return target;
1332         }
1333       else if (n >= 2 + (extra != 0))
1334         {
1335           temp = copy_to_suggested_reg (GEN_INT (high << 16), subtarget, mode);
1336
1337           if (extra != 0)
1338             temp = expand_binop (mode, add_optab, temp, GEN_INT (extra << 16),
1339                                  subtarget, 0, OPTAB_WIDEN);
1340
1341           return expand_binop (mode, add_optab, temp, GEN_INT (low),
1342                                target, 0, OPTAB_WIDEN);
1343         }
1344     }
1345
1346   /* If we couldn't do it that way, try some other methods.  But if we have
1347      no instructions left, don't bother.  Likewise, if this is SImode and
1348      we can't make pseudos, we can't do anything since the expand_binop
1349      and expand_unop calls will widen and try to make pseudos.  */
1350
1351   if (n == 1
1352       || (mode == SImode && ! rtx_equal_function_value_matters))
1353     return 0;
1354
1355   /* Next, see if we can load a related constant and then shift and possibly
1356      negate it to get the constant we want.  Try this once each increasing
1357      numbers of insns.  */
1358
1359   for (i = 1; i < n; i++)
1360     {
1361       /* First, see if minus some low bits, we've an easy load of
1362          high bits.  */
1363
1364       new = ((c & 0xffff) ^ 0x8000) - 0x8000;
1365       if (new != 0
1366           && (temp = alpha_emit_set_const (subtarget, mode, c - new, i)) != 0)
1367         return expand_binop (mode, add_optab, temp, GEN_INT (new),
1368                              target, 0, OPTAB_WIDEN);
1369
1370       /* Next try complementing.  */
1371       if ((temp = alpha_emit_set_const (subtarget, mode, ~ c, i)) != 0)
1372         return expand_unop (mode, one_cmpl_optab, temp, target, 0);
1373
1374       /* Next try to form a constant and do a left shift.  We can do this
1375          if some low-order bits are zero; the exact_log2 call below tells
1376          us that information.  The bits we are shifting out could be any
1377          value, but here we'll just try the 0- and sign-extended forms of
1378          the constant.  To try to increase the chance of having the same
1379          constant in more than one insn, start at the highest number of
1380          bits to shift, but try all possibilities in case a ZAPNOT will
1381          be useful.  */
1382
1383       if ((bits = exact_log2 (c & - c)) > 0)
1384         for (; bits > 0; bits--)
1385           if ((temp = (alpha_emit_set_const
1386                        (subtarget, mode, c >> bits, i))) != 0
1387               || ((temp = (alpha_emit_set_const
1388                           (subtarget, mode,
1389                            ((unsigned HOST_WIDE_INT) c) >> bits, i)))
1390                   != 0))
1391             return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
1392                                  target, 0, OPTAB_WIDEN);
1393
1394       /* Now try high-order zero bits.  Here we try the shifted-in bits as
1395          all zero and all ones.  Be careful to avoid shifting outside the
1396          mode and to avoid shifting outside the host wide int size.  */
1397       /* On narrow hosts, don't shift a 1 into the high bit, since we'll
1398          confuse the recursive call and set all of the high 32 bits.  */
1399
1400       if ((bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1401                    - floor_log2 (c) - 1 - (HOST_BITS_PER_WIDE_INT < 64))) > 0)
1402         for (; bits > 0; bits--)
1403           if ((temp = alpha_emit_set_const (subtarget, mode,
1404                                             c << bits, i)) != 0
1405               || ((temp = (alpha_emit_set_const
1406                            (subtarget, mode,
1407                             ((c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1)),
1408                             i)))
1409                   != 0))
1410             return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
1411                                  target, 1, OPTAB_WIDEN);
1412
1413       /* Now try high-order 1 bits.  We get that with a sign-extension.
1414          But one bit isn't enough here.  Be careful to avoid shifting outside
1415          the mode and to avoid shifting outside the host wide int size. */
1416
1417       if ((bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1418                    - floor_log2 (~ c) - 2)) > 0)
1419         for (; bits > 0; bits--)
1420           if ((temp = alpha_emit_set_const (subtarget, mode,
1421                                             c << bits, i)) != 0
1422               || ((temp = (alpha_emit_set_const
1423                            (subtarget, mode,
1424                             ((c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1)),
1425                             i)))
1426                   != 0))
1427             return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
1428                                  target, 0, OPTAB_WIDEN);
1429     }
1430
1431 #if HOST_BITS_PER_WIDE_INT == 64
1432   /* Finally, see if can load a value into the target that is the same as the
1433      constant except that all bytes that are 0 are changed to be 0xff.  If we
1434      can, then we can do a ZAPNOT to obtain the desired constant.  */
1435
1436   new = c;
1437   for (i = 0; i < 64; i += 8)
1438     if ((new & ((HOST_WIDE_INT) 0xff << i)) == 0)
1439       new |= (HOST_WIDE_INT) 0xff << i;
1440
1441   /* We are only called for SImode and DImode.  If this is SImode, ensure that
1442      we are sign extended to a full word.  */
1443
1444   if (mode == SImode)
1445     new = ((new & 0xffffffff) ^ 0x80000000) - 0x80000000;
1446
1447   if (new != c && new != -1
1448       && (temp = alpha_emit_set_const (subtarget, mode, new, n - 1)) != 0)
1449     return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new),
1450                          target, 0, OPTAB_WIDEN);
1451 #endif
1452
1453   return 0;
1454 }
1455
1456 /* Having failed to find a 3 insn sequence in alpha_emit_set_const,
1457    fall back to a straight forward decomposition.  We do this to avoid
1458    exponential run times encountered when looking for longer sequences
1459    with alpha_emit_set_const.  */
1460
1461 rtx
1462 alpha_emit_set_long_const (target, c1, c2)
1463      rtx target;
1464      HOST_WIDE_INT c1, c2;
1465 {
1466   HOST_WIDE_INT d1, d2, d3, d4;
1467
1468   /* Decompose the entire word */
1469 #if HOST_BITS_PER_WIDE_INT >= 64
1470   if (c2 != -(c1 < 0))
1471     abort ();
1472   d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1473   c1 -= d1;
1474   d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1475   c1 = (c1 - d2) >> 32;
1476   d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1477   c1 -= d3;
1478   d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1479   if (c1 != d4)
1480     abort ();
1481 #else
1482   d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1483   c1 -= d1;
1484   d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1485   if (c1 != d2)
1486     abort ();
1487   c2 += (d2 < 0);
1488   d3 = ((c2 & 0xffff) ^ 0x8000) - 0x8000;
1489   c2 -= d3;
1490   d4 = ((c2 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1491   if (c2 != d4)
1492     abort ();
1493 #endif
1494
1495   /* Construct the high word */
1496   if (d4)
1497     {
1498       emit_move_insn (target, GEN_INT (d4));
1499       if (d3)
1500         emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
1501     }
1502   else
1503     emit_move_insn (target, GEN_INT (d3));
1504
1505   /* Shift it into place */
1506   emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
1507
1508   /* Add in the low bits.  */
1509   if (d2)
1510     emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
1511   if (d1)
1512     emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
1513
1514   return target;
1515 }
1516
1517 /* Generate an unsigned DImode to FP conversion.  This is the same code
1518    optabs would emit if we didn't have TFmode patterns.
1519
1520    For SFmode, this is the only construction I've found that can pass
1521    gcc.c-torture/execute/ieee/rbug.c.  No scenario that uses DFmode
1522    intermediates will work, because you'll get intermediate rounding
1523    that ruins the end result.  Some of this could be fixed by turning
1524    on round-to-positive-infinity, but that requires diddling the fpsr,
1525    which kills performance.  I tried turning this around and converting
1526    to a negative number, so that I could turn on /m, but either I did
1527    it wrong or there's something else cause I wound up with the exact
1528    same single-bit error.  There is a branch-less form of this same code:
1529
1530         srl     $16,1,$1
1531         and     $16,1,$2
1532         cmplt   $16,0,$3
1533         or      $1,$2,$2
1534         cmovge  $16,$16,$2
1535         itoft   $3,$f10
1536         itoft   $2,$f11
1537         cvtqs   $f11,$f11
1538         adds    $f11,$f11,$f0
1539         fcmoveq $f10,$f11,$f0
1540
1541    I'm not using it because it's the same number of instructions as
1542    this branch-full form, and it has more serialized long latency
1543    instructions on the critical path.
1544
1545    For DFmode, we can avoid rounding errors by breaking up the word
1546    into two pieces, converting them separately, and adding them back:
1547
1548    LC0: .long 0,0x5f800000
1549
1550         itoft   $16,$f11
1551         lda     $2,LC0
1552         cmplt   $16,0,$1
1553         cpyse   $f11,$f31,$f10
1554         cpyse   $f31,$f11,$f11
1555         s4addq  $1,$2,$1
1556         lds     $f12,0($1)
1557         cvtqt   $f10,$f10
1558         cvtqt   $f11,$f11
1559         addt    $f12,$f10,$f0
1560         addt    $f0,$f11,$f0
1561
1562    This doesn't seem to be a clear-cut win over the optabs form.
1563    It probably all depends on the distribution of numbers being
1564    converted -- in the optabs form, all but high-bit-set has a
1565    much lower minimum execution time.  */
1566
1567 void
1568 alpha_emit_floatuns (operands)
1569      rtx operands[2];
1570 {
1571   rtx neglab, donelab, i0, i1, f0, in, out;
1572   enum machine_mode mode;
1573
1574   out = operands[0];
1575   in = force_reg (DImode, operands[1]);
1576   mode = GET_MODE (out);
1577   neglab = gen_label_rtx ();
1578   donelab = gen_label_rtx ();
1579   i0 = gen_reg_rtx (DImode);
1580   i1 = gen_reg_rtx (DImode);
1581   f0 = gen_reg_rtx (mode);
1582
1583   emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0,
1584                            8, neglab);
1585
1586   emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
1587   emit_jump_insn (gen_jump (donelab));
1588   emit_barrier ();
1589
1590   emit_label (neglab);
1591
1592   emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
1593   emit_insn (gen_anddi3 (i1, in, const1_rtx));
1594   emit_insn (gen_iordi3 (i0, i0, i1));
1595   emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
1596   emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
1597
1598   emit_label (donelab);
1599 }
1600
1601 /* Generate the comparison for a conditional branch.  */
1602
1603 rtx
1604 alpha_emit_conditional_branch (code)
1605      enum rtx_code code;
1606 {
1607   enum rtx_code cmp_code, branch_code;
1608   enum machine_mode cmp_mode, branch_mode = VOIDmode;
1609   rtx op0 = alpha_compare.op0, op1 = alpha_compare.op1;
1610   rtx tem;
1611
1612   if (alpha_compare.fp_p && GET_MODE (op0) == TFmode)
1613     {
1614       if (! TARGET_HAS_XFLOATING_LIBS)
1615         abort ();
1616
1617       /* X_floating library comparison functions return
1618            -1  unordered
1619             0  false
1620             1  true
1621          Convert the compare against the raw return value.  */
1622
1623       if (code == UNORDERED || code == ORDERED)
1624         cmp_code = EQ;
1625       else
1626         cmp_code = code;
1627
1628       op0 = alpha_emit_xfloating_compare (cmp_code, op0, op1);
1629       op1 = const0_rtx;
1630       alpha_compare.fp_p = 0;
1631
1632       if (code == UNORDERED)
1633         code = LT;
1634       else if (code == ORDERED)
1635         code = GE;
1636       else
1637         code = GT;
1638     }
1639
1640   /* The general case: fold the comparison code to the types of compares
1641      that we have, choosing the branch as necessary.  */
1642   switch (code)
1643     {
1644     case EQ:  case LE:  case LT:  case LEU:  case LTU:
1645     case UNORDERED:
1646       /* We have these compares: */
1647       cmp_code = code, branch_code = NE;
1648       break;
1649
1650     case NE:
1651     case ORDERED:
1652       /* These must be reversed. */
1653       cmp_code = reverse_condition (code), branch_code = EQ;
1654       break;
1655
1656     case GE:  case GT: case GEU:  case GTU:
1657       /* For FP, we swap them, for INT, we reverse them.  */
1658       if (alpha_compare.fp_p)
1659         {
1660           cmp_code = swap_condition (code);
1661           branch_code = NE;
1662           tem = op0, op0 = op1, op1 = tem;
1663         }
1664       else
1665         {
1666           cmp_code = reverse_condition (code);
1667           branch_code = EQ;
1668         }
1669       break;
1670
1671     default:
1672       abort ();
1673     }
1674
1675   if (alpha_compare.fp_p)
1676     {
1677       cmp_mode = DFmode;
1678       if (flag_fast_math)
1679         {
1680           /* When we are not as concerned about non-finite values, and we
1681              are comparing against zero, we can branch directly.  */
1682           if (op1 == CONST0_RTX (DFmode))
1683             cmp_code = NIL, branch_code = code;
1684           else if (op0 == CONST0_RTX (DFmode))
1685             {
1686               /* Undo the swap we probably did just above.  */
1687               tem = op0, op0 = op1, op1 = tem;
1688               branch_code = swap_condition (cmp_code);
1689               cmp_code = NIL;
1690             }
1691         }
1692       else
1693         {
1694           /* ??? We mark the the branch mode to be CCmode to prevent the
1695              compare and branch from being combined, since the compare
1696              insn follows IEEE rules that the branch does not.  */
1697           branch_mode = CCmode;
1698         }
1699     }
1700   else
1701     {
1702       cmp_mode = DImode;
1703
1704       /* The following optimizations are only for signed compares.  */
1705       if (code != LEU && code != LTU && code != GEU && code != GTU)
1706         {
1707           /* Whee.  Compare and branch against 0 directly.  */
1708           if (op1 == const0_rtx)
1709             cmp_code = NIL, branch_code = code;
1710
1711           /* We want to use cmpcc/bcc when we can, since there is a zero delay
1712              bypass between logicals and br/cmov on EV5.  But we don't want to
1713              force valid immediate constants into registers needlessly.  */
1714           else if (GET_CODE (op1) == CONST_INT)
1715             {
1716               HOST_WIDE_INT v = INTVAL (op1), n = -v;
1717
1718               if (! CONST_OK_FOR_LETTER_P (v, 'I')
1719                   && (CONST_OK_FOR_LETTER_P (n, 'K')
1720                       || CONST_OK_FOR_LETTER_P (n, 'L')))
1721                 {
1722                   cmp_code = PLUS, branch_code = code;
1723                   op1 = GEN_INT (n);
1724                 }
1725             }
1726         }
1727
1728       if (!reg_or_0_operand (op0, DImode))
1729         op0 = force_reg (DImode, op0);
1730       if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
1731         op1 = force_reg (DImode, op1);
1732     }
1733
1734   /* Emit an initial compare instruction, if necessary.  */
1735   tem = op0;
1736   if (cmp_code != NIL)
1737     {
1738       tem = gen_reg_rtx (cmp_mode);
1739       emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
1740     }
1741
1742   /* Zero the operands.  */
1743   memset (&alpha_compare, 0, sizeof (alpha_compare));
1744
1745   /* Return the branch comparison.  */
1746   return gen_rtx_fmt_ee (branch_code, branch_mode, tem, CONST0_RTX (cmp_mode));
1747 }
1748
1749 /* Certain simplifications can be done to make invalid setcc operations
1750    valid.  Return the final comparison, or NULL if we can't work.  */
1751
1752 rtx
1753 alpha_emit_setcc (code)
1754      enum rtx_code code;
1755 {
1756   enum rtx_code cmp_code;
1757   rtx op0 = alpha_compare.op0, op1 = alpha_compare.op1;
1758   int fp_p = alpha_compare.fp_p;
1759   rtx tmp;
1760
1761   /* Zero the operands.  */
1762   memset (&alpha_compare, 0, sizeof (alpha_compare));
1763
1764   if (fp_p && GET_MODE (op0) == TFmode)
1765     {
1766       if (! TARGET_HAS_XFLOATING_LIBS)
1767         abort ();
1768
1769       /* X_floating library comparison functions return
1770            -1  unordered
1771             0  false
1772             1  true
1773          Convert the compare against the raw return value.  */
1774
1775       if (code == UNORDERED || code == ORDERED)
1776         cmp_code = EQ;
1777       else
1778         cmp_code = code;
1779
1780       op0 = alpha_emit_xfloating_compare (cmp_code, op0, op1);
1781       op1 = const0_rtx;
1782       fp_p = 0;
1783
1784       if (code == UNORDERED)
1785         code = LT;
1786       else if (code == ORDERED)
1787         code = GE;
1788       else
1789         code = GT;
1790     }
1791
1792   if (fp_p && !TARGET_FIX)
1793     return NULL_RTX;
1794
1795   /* The general case: fold the comparison code to the types of compares
1796      that we have, choosing the branch as necessary.  */
1797
1798   cmp_code = NIL;
1799   switch (code)
1800     {
1801     case EQ:  case LE:  case LT:  case LEU:  case LTU:
1802     case UNORDERED:
1803       /* We have these compares.  */
1804       if (fp_p)
1805         cmp_code = code, code = NE;
1806       break;
1807
1808     case NE:
1809       if (!fp_p && op1 == const0_rtx)
1810         break;
1811       /* FALLTHRU */
1812
1813     case ORDERED:
1814       cmp_code = reverse_condition (code);
1815       code = EQ;
1816       break;
1817
1818     case GE:  case GT: case GEU:  case GTU:
1819       /* These are normally need swapping, but for integer zero we have
1820          special patterns that recognize swapped operands.  */
1821       if (!fp_p && op1 == const0_rtx)
1822         break;
1823       code = swap_condition (code);
1824       if (fp_p)
1825         cmp_code = code, code = NE;
1826       tmp = op0, op0 = op1, op1 = tmp;
1827       break;
1828
1829     default:
1830       abort ();
1831     }
1832
1833   if (!fp_p)
1834     {
1835       if (!register_operand (op0, DImode))
1836         op0 = force_reg (DImode, op0);
1837       if (!reg_or_8bit_operand (op1, DImode))
1838         op1 = force_reg (DImode, op1);
1839     }
1840
1841   /* Emit an initial compare instruction, if necessary.  */
1842   if (cmp_code != NIL)
1843     {
1844       enum machine_mode mode = fp_p ? DFmode : DImode;
1845
1846       tmp = gen_reg_rtx (mode);
1847       emit_insn (gen_rtx_SET (VOIDmode, tmp,
1848                               gen_rtx_fmt_ee (cmp_code, mode, op0, op1)));
1849
1850       op0 = fp_p ? gen_lowpart (DImode, tmp) : tmp;
1851       op1 = const0_rtx;
1852     }
1853
1854   /* Return the setcc comparison.  */
1855   return gen_rtx_fmt_ee (code, DImode, op0, op1);
1856 }
1857
1858
1859 /* Rewrite a comparison against zero CMP of the form
1860    (CODE (cc0) (const_int 0)) so it can be written validly in
1861    a conditional move (if_then_else CMP ...).
1862    If both of the operands that set cc0 are non-zero we must emit
1863    an insn to perform the compare (it can't be done within
1864    the conditional move). */
1865 rtx
1866 alpha_emit_conditional_move (cmp, mode)
1867      rtx cmp;
1868      enum machine_mode mode;
1869 {
1870   enum rtx_code code = GET_CODE (cmp);
1871   enum rtx_code cmov_code = NE;
1872   rtx op0 = alpha_compare.op0;
1873   rtx op1 = alpha_compare.op1;
1874   int fp_p = alpha_compare.fp_p;
1875   enum machine_mode cmp_mode
1876     = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
1877   enum machine_mode cmp_op_mode = fp_p ? DFmode : DImode;
1878   enum machine_mode cmov_mode = VOIDmode;
1879   int local_fast_math = flag_fast_math;
1880   rtx tem;
1881
1882   /* Zero the operands.  */
1883   memset (&alpha_compare, 0, sizeof (alpha_compare));
1884
1885   if (fp_p != FLOAT_MODE_P (mode))
1886     {
1887       enum rtx_code cmp_code;
1888
1889       if (! TARGET_FIX)
1890         return 0;
1891
1892       /* If we have fp<->int register move instructions, do a cmov by
1893          performing the comparison in fp registers, and move the
1894          zero/non-zero value to integer registers, where we can then
1895          use a normal cmov, or vice-versa.  */
1896
1897       switch (code)
1898         {
1899         case EQ: case LE: case LT: case LEU: case LTU:
1900           /* We have these compares.  */
1901           cmp_code = code, code = NE;
1902           break;
1903
1904         case NE:
1905           /* This must be reversed.  */
1906           cmp_code = EQ, code = EQ;
1907           break;
1908
1909         case GE: case GT: case GEU: case GTU:
1910           /* These must be swapped.  */
1911           cmp_code = swap_condition (code);
1912           code = NE;
1913           tem = op0, op0 = op1, op1 = tem;
1914           break;
1915
1916         default:
1917           abort ();
1918         }
1919
1920       tem = gen_reg_rtx (cmp_op_mode);
1921       emit_insn (gen_rtx_SET (VOIDmode, tem,
1922                               gen_rtx_fmt_ee (cmp_code, cmp_op_mode,
1923                                               op0, op1)));
1924
1925       cmp_mode = cmp_op_mode = fp_p ? DImode : DFmode;
1926       op0 = gen_lowpart (cmp_op_mode, tem);
1927       op1 = CONST0_RTX (cmp_op_mode);
1928       fp_p = !fp_p;
1929       local_fast_math = 1;
1930     }
1931
1932   /* We may be able to use a conditional move directly.
1933      This avoids emitting spurious compares. */
1934   if (signed_comparison_operator (cmp, VOIDmode)
1935       && (!fp_p || local_fast_math)
1936       && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
1937     return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
1938
1939   /* We can't put the comparison inside the conditional move;
1940      emit a compare instruction and put that inside the
1941      conditional move.  Make sure we emit only comparisons we have;
1942      swap or reverse as necessary.  */
1943
1944   if (no_new_pseudos)
1945     return NULL_RTX;
1946
1947   switch (code)
1948     {
1949     case EQ:  case LE:  case LT:  case LEU:  case LTU:
1950       /* We have these compares: */
1951       break;
1952
1953     case NE:
1954       /* This must be reversed. */
1955       code = reverse_condition (code);
1956       cmov_code = EQ;
1957       break;
1958
1959     case GE:  case GT:  case GEU:  case GTU:
1960       /* These must be swapped.  */
1961       code = swap_condition (code);
1962       tem = op0, op0 = op1, op1 = tem;
1963       break;
1964
1965     default:
1966       abort ();
1967     }
1968
1969   if (!fp_p)
1970     {
1971       if (!reg_or_0_operand (op0, DImode))
1972         op0 = force_reg (DImode, op0);
1973       if (!reg_or_8bit_operand (op1, DImode))
1974         op1 = force_reg (DImode, op1);
1975     }
1976
1977   /* ??? We mark the branch mode to be CCmode to prevent the compare
1978      and cmov from being combined, since the compare insn follows IEEE
1979      rules that the cmov does not.  */
1980   if (fp_p && !local_fast_math)
1981     cmov_mode = CCmode;
1982
1983   tem = gen_reg_rtx (cmp_op_mode);
1984   emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_op_mode, op0, op1));
1985   return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_op_mode));
1986 }
1987
1988 /* Simplify a conditional move of two constants into a setcc with
1989    arithmetic.  This is done with a splitter since combine would
1990    just undo the work if done during code generation.  It also catches
1991    cases we wouldn't have before cse.  */
1992
1993 int
1994 alpha_split_conditional_move (code, dest, cond, t_rtx, f_rtx)
1995      enum rtx_code code;
1996      rtx dest, cond, t_rtx, f_rtx;
1997 {
1998   HOST_WIDE_INT t, f, diff;
1999   enum machine_mode mode;
2000   rtx target, subtarget, tmp;
2001
2002   mode = GET_MODE (dest);
2003   t = INTVAL (t_rtx);
2004   f = INTVAL (f_rtx);
2005   diff = t - f;
2006
2007   if (((code == NE || code == EQ) && diff < 0)
2008       || (code == GE || code == GT))
2009     {
2010       code = reverse_condition (code);
2011       diff = t, t = f, f = diff;
2012       diff = t - f;
2013     }
2014
2015   subtarget = target = dest;
2016   if (mode != DImode)
2017     {
2018       target = gen_lowpart (DImode, dest);
2019       if (! no_new_pseudos)
2020         subtarget = gen_reg_rtx (DImode);
2021       else
2022         subtarget = target;
2023     }
2024
2025   if (f == 0 && exact_log2 (diff) > 0
2026       /* On EV6, we've got enough shifters to make non-arithmatic shifts
2027          viable over a longer latency cmove.  On EV5, the E0 slot is a
2028          scarce resource, and on EV4 shift has the same latency as a cmove. */
2029       && (diff <= 8 || alpha_cpu == PROCESSOR_EV6))
2030     {
2031       tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2032       emit_insn (gen_rtx_SET (VOIDmode, subtarget, tmp));
2033
2034       tmp = gen_rtx_ASHIFT (DImode, subtarget, GEN_INT (exact_log2 (t)));
2035       emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2036     }
2037   else if (f == 0 && t == -1)
2038     {
2039       tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2040       emit_insn (gen_rtx_SET (VOIDmode, subtarget, tmp));
2041
2042       emit_insn (gen_negdi2 (target, subtarget));
2043     }
2044   else if (diff == 1 || diff == 4 || diff == 8)
2045     {
2046       rtx add_op;
2047
2048       tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2049       emit_insn (gen_rtx_SET (VOIDmode, subtarget, tmp));
2050
2051       if (diff == 1)
2052         emit_insn (gen_adddi3 (target, subtarget, GEN_INT (f)));
2053       else
2054         {
2055           add_op = GEN_INT (f);
2056           if (sext_add_operand (add_op, mode))
2057             {
2058               tmp = gen_rtx_MULT (DImode, subtarget, GEN_INT (diff));
2059               tmp = gen_rtx_PLUS (DImode, tmp, add_op);
2060               emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2061             }
2062           else
2063             return 0;
2064         }
2065     }
2066   else
2067     return 0;
2068
2069   return 1;
2070 }
2071 \f
2072 /* Look up the function X_floating library function name for the
2073    given operation.  */
2074
2075 static const char *
2076 alpha_lookup_xfloating_lib_func (code)
2077      enum rtx_code code;
2078 {
2079   struct xfloating_op
2080     {
2081       enum rtx_code code;
2082       const char *func;
2083     };
2084
2085   static const struct xfloating_op vms_xfloating_ops[] =
2086     {
2087       { PLUS,           "OTS$ADD_X" },
2088       { MINUS,          "OTS$SUB_X" },
2089       { MULT,           "OTS$MUL_X" },
2090       { DIV,            "OTS$DIV_X" },
2091       { EQ,             "OTS$EQL_X" },
2092       { NE,             "OTS$NEQ_X" },
2093       { LT,             "OTS$LSS_X" },
2094       { LE,             "OTS$LEQ_X" },
2095       { GT,             "OTS$GTR_X" },
2096       { GE,             "OTS$GEQ_X" },
2097       { FIX,            "OTS$CVTXQ" },
2098       { FLOAT,          "OTS$CVTQX" },
2099       { UNSIGNED_FLOAT, "OTS$CVTQUX" },
2100       { FLOAT_EXTEND,   "OTS$CVT_FLOAT_T_X" },
2101       { FLOAT_TRUNCATE, "OTS$CVT_FLOAT_X_T" },
2102     };
2103
2104   static const struct xfloating_op osf_xfloating_ops[] =
2105     {
2106       { PLUS,           "_OtsAddX" },
2107       { MINUS,          "_OtsSubX" },
2108       { MULT,           "_OtsMulX" },
2109       { DIV,            "_OtsDivX" },
2110       { EQ,             "_OtsEqlX" },
2111       { NE,             "_OtsNeqX" },
2112       { LT,             "_OtsLssX" },
2113       { LE,             "_OtsLeqX" },
2114       { GT,             "_OtsGtrX" },
2115       { GE,             "_OtsGeqX" },
2116       { FIX,            "_OtsCvtXQ" },
2117       { FLOAT,          "_OtsCvtQX" },
2118       { UNSIGNED_FLOAT, "_OtsCvtQUX" },
2119       { FLOAT_EXTEND,   "_OtsConvertFloatTX" },
2120       { FLOAT_TRUNCATE, "_OtsConvertFloatXT" },
2121     };
2122
2123   const struct xfloating_op *ops;
2124   const long n = ARRAY_SIZE (osf_xfloating_ops);
2125   long i;
2126
2127   /* How irritating.  Nothing to key off for the table.  Hardcode
2128      knowledge of the G_floating routines.  */
2129   if (TARGET_FLOAT_VAX)
2130     {
2131       if (TARGET_OPEN_VMS)
2132         {
2133           if (code == FLOAT_EXTEND)
2134             return "OTS$CVT_FLOAT_G_X";
2135           if (code == FLOAT_TRUNCATE)
2136             return "OTS$CVT_FLOAT_X_G";
2137         }
2138       else
2139         {
2140           if (code == FLOAT_EXTEND)
2141             return "_OtsConvertFloatGX";
2142           if (code == FLOAT_TRUNCATE)
2143             return "_OtsConvertFloatXG";
2144         }
2145     }
2146
2147   if (TARGET_OPEN_VMS)
2148     ops = vms_xfloating_ops;
2149   else
2150     ops = osf_xfloating_ops;
2151
2152   for (i = 0; i < n; ++i)
2153     if (ops[i].code == code)
2154       return ops[i].func;
2155
2156   abort();
2157 }
2158
2159 /* Most X_floating operations take the rounding mode as an argument.
2160    Compute that here.  */
2161
2162 static int
2163 alpha_compute_xfloating_mode_arg (code, round)
2164      enum rtx_code code;
2165      enum alpha_fp_rounding_mode round;
2166 {
2167   int mode;
2168
2169   switch (round)
2170     {
2171     case ALPHA_FPRM_NORM:
2172       mode = 2;
2173       break;
2174     case ALPHA_FPRM_MINF:
2175       mode = 1;
2176       break;
2177     case ALPHA_FPRM_CHOP:
2178       mode = 0;
2179       break;
2180     case ALPHA_FPRM_DYN:
2181       mode = 4;
2182       break;
2183     default:
2184       abort ();
2185
2186     /* XXX For reference, round to +inf is mode = 3.  */
2187     }
2188
2189   if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
2190     mode |= 0x10000;
2191
2192   return mode;
2193 }
2194
2195 /* Emit an X_floating library function call.
2196
2197    Note that these functions do not follow normal calling conventions:
2198    TFmode arguments are passed in two integer registers (as opposed to
2199    indirect); TFmode return values appear in R16+R17.
2200
2201    FUNC is the function name to call.
2202    TARGET is where the output belongs.
2203    OPERANDS are the inputs.
2204    NOPERANDS is the count of inputs.
2205    EQUIV is the expression equivalent for the function.
2206 */
2207
2208 static void
2209 alpha_emit_xfloating_libcall (func, target, operands, noperands, equiv)
2210      const char *func;
2211      rtx target;
2212      rtx operands[];
2213      int noperands;
2214      rtx equiv;
2215 {
2216   rtx usage = NULL_RTX, tmp, reg;
2217   int regno = 16, i;
2218
2219   start_sequence ();
2220
2221   for (i = 0; i < noperands; ++i)
2222     {
2223       switch (GET_MODE (operands[i]))
2224         {
2225         case TFmode:
2226           reg = gen_rtx_REG (TFmode, regno);
2227           regno += 2;
2228           break;
2229
2230         case DFmode:
2231           reg = gen_rtx_REG (DFmode, regno + 32);
2232           regno += 1;
2233           break;
2234
2235         case VOIDmode:
2236           if (GET_CODE (operands[i]) != CONST_INT)
2237             abort ();
2238           /* FALLTHRU */
2239         case DImode:
2240           reg = gen_rtx_REG (DImode, regno);
2241           regno += 1;
2242           break;
2243
2244         default:
2245           abort ();
2246         }
2247
2248       emit_move_insn (reg, operands[i]);
2249       usage = alloc_EXPR_LIST (0, gen_rtx_USE (VOIDmode, reg), usage);
2250     }
2251
2252   switch (GET_MODE (target))
2253     {
2254     case TFmode:
2255       reg = gen_rtx_REG (TFmode, 16);
2256       break;
2257     case DFmode:
2258       reg = gen_rtx_REG (DFmode, 32);
2259       break;
2260     case DImode:
2261       reg = gen_rtx_REG (DImode, 0);
2262       break;
2263     default:
2264       abort ();
2265     }
2266
2267   tmp = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, (char *) func));
2268   tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx,
2269                                         const0_rtx, const0_rtx));
2270   CALL_INSN_FUNCTION_USAGE (tmp) = usage;
2271
2272   tmp = get_insns ();
2273   end_sequence ();
2274
2275   emit_libcall_block (tmp, target, reg, equiv);
2276 }
2277
2278 /* Emit an X_floating library function call for arithmetic (+,-,*,/).  */
2279
2280 void
2281 alpha_emit_xfloating_arith (code, operands)
2282      enum rtx_code code;
2283      rtx operands[];
2284 {
2285   const char *func;
2286   int mode;
2287   rtx out_operands[3];
2288
2289   func = alpha_lookup_xfloating_lib_func (code);
2290   mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
2291
2292   out_operands[0] = operands[1];
2293   out_operands[1] = operands[2];
2294   out_operands[2] = GEN_INT (mode);
2295   alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
2296                                 gen_rtx_fmt_ee (code, TFmode, operands[1],
2297                                                 operands[2]));
2298 }
2299
2300 /* Emit an X_floating library function call for a comparison.  */
2301
2302 static rtx
2303 alpha_emit_xfloating_compare (code, op0, op1)
2304      enum rtx_code code;
2305      rtx op0, op1;
2306 {
2307   const char *func;
2308   rtx out, operands[2];
2309
2310   func = alpha_lookup_xfloating_lib_func (code);
2311
2312   operands[0] = op0;
2313   operands[1] = op1;
2314   out = gen_reg_rtx (DImode);
2315
2316   /* ??? Strange equiv cause what's actually returned is -1,0,1, not a
2317      proper boolean value.  */
2318   alpha_emit_xfloating_libcall (func, out, operands, 2,
2319                                 gen_rtx_COMPARE (TFmode, op0, op1));
2320
2321   return out;
2322 }
2323
2324 /* Emit an X_floating library function call for a conversion.  */
2325
2326 void
2327 alpha_emit_xfloating_cvt (code, operands)
2328      enum rtx_code code;
2329      rtx operands[];
2330 {
2331   int noperands = 1, mode;
2332   rtx out_operands[2];
2333   const char *func;
2334
2335   func = alpha_lookup_xfloating_lib_func (code);
2336
2337   out_operands[0] = operands[1];
2338
2339   switch (code)
2340     {
2341     case FIX:
2342       mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
2343       out_operands[1] = GEN_INT (mode);
2344       noperands = 2;
2345       break;
2346     case FLOAT_TRUNCATE:
2347       mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
2348       out_operands[1] = GEN_INT (mode);
2349       noperands = 2;
2350       break;
2351     default:
2352       break;
2353     }
2354
2355   alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
2356                                 gen_rtx_fmt_e (code, GET_MODE (operands[0]),
2357                                                operands[1]));
2358 }
2359
2360 void
2361 alpha_split_tfmode_pair (operands)
2362      rtx operands[4];
2363 {
2364   if (GET_CODE (operands[1]) == REG)
2365     {
2366       operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
2367       operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
2368     }
2369   else if (GET_CODE (operands[1]) == MEM)
2370     {
2371       operands[3] = change_address (operands[1], DImode,
2372                                     plus_constant (XEXP (operands[1], 0), 8));
2373       operands[2] = change_address (operands[1], DImode, NULL_RTX);
2374     }
2375   else if (operands[1] == CONST0_RTX (TFmode))
2376     operands[2] = operands[3] = const0_rtx;
2377   else
2378     abort ();
2379
2380   if (GET_CODE (operands[0]) == REG)
2381     {
2382       operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
2383       operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
2384     }
2385   else if (GET_CODE (operands[0]) == MEM)
2386     {
2387       operands[1] = change_address (operands[0], DImode,
2388                                     plus_constant (XEXP (operands[0], 0), 8));
2389       operands[0] = change_address (operands[0], DImode, NULL_RTX);
2390     }
2391   else
2392     abort ();
2393 }
2394 \f
2395 /* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
2396    unaligned data:
2397
2398            unsigned:                       signed:
2399    word:   ldq_u  r1,X(r11)                ldq_u  r1,X(r11)
2400            ldq_u  r2,X+1(r11)              ldq_u  r2,X+1(r11)
2401            lda    r3,X(r11)                lda    r3,X+2(r11)
2402            extwl  r1,r3,r1                 extql  r1,r3,r1
2403            extwh  r2,r3,r2                 extqh  r2,r3,r2
2404            or     r1.r2.r1                 or     r1,r2,r1
2405                                            sra    r1,48,r1
2406
2407    long:   ldq_u  r1,X(r11)                ldq_u  r1,X(r11)
2408            ldq_u  r2,X+3(r11)              ldq_u  r2,X+3(r11)
2409            lda    r3,X(r11)                lda    r3,X(r11)
2410            extll  r1,r3,r1                 extll  r1,r3,r1
2411            extlh  r2,r3,r2                 extlh  r2,r3,r2
2412            or     r1.r2.r1                 addl   r1,r2,r1
2413
2414    quad:   ldq_u  r1,X(r11)
2415            ldq_u  r2,X+7(r11)
2416            lda    r3,X(r11)
2417            extql  r1,r3,r1
2418            extqh  r2,r3,r2
2419            or     r1.r2.r1
2420 */
2421
2422 void
2423 alpha_expand_unaligned_load (tgt, mem, size, ofs, sign)
2424      rtx tgt, mem;
2425      HOST_WIDE_INT size, ofs;
2426      int sign;
2427 {
2428   rtx meml, memh, addr, extl, exth, tmp;
2429   enum machine_mode mode;
2430
2431   meml = gen_reg_rtx (DImode);
2432   memh = gen_reg_rtx (DImode);
2433   addr = gen_reg_rtx (DImode);
2434   extl = gen_reg_rtx (DImode);
2435   exth = gen_reg_rtx (DImode);
2436
2437   /* AND addresses cannot be in any alias set, since they may implicitly
2438      alias surrounding code.  Ideally we'd have some alias set that
2439      covered all types except those with alignment 8 or higher.  */
2440
2441   tmp = change_address (mem, DImode,
2442                         gen_rtx_AND (DImode,
2443                                      plus_constant (XEXP (mem, 0), ofs),
2444                                      GEN_INT (-8)));
2445   MEM_ALIAS_SET (tmp) = 0;
2446   emit_move_insn (meml, tmp);
2447
2448   tmp = change_address (mem, DImode,
2449                         gen_rtx_AND (DImode,
2450                                      plus_constant (XEXP (mem, 0),
2451                                                     ofs + size - 1),
2452                                      GEN_INT (-8)));
2453   MEM_ALIAS_SET (tmp) = 0;
2454   emit_move_insn (memh, tmp);
2455
2456   if (sign && size == 2)
2457     {
2458       emit_move_insn (addr, plus_constant (XEXP (mem, 0), ofs+2));
2459
2460       emit_insn (gen_extxl (extl, meml, GEN_INT (64), addr));
2461       emit_insn (gen_extqh (exth, memh, addr));
2462
2463       /* We must use tgt here for the target.  Alpha-vms port fails if we use
2464          addr for the target, because addr is marked as a pointer and combine
2465          knows that pointers are always sign-extended 32 bit values.  */
2466       addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
2467       addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
2468                            addr, 1, OPTAB_WIDEN);
2469     }
2470   else
2471     {
2472       emit_move_insn (addr, plus_constant (XEXP (mem, 0), ofs));
2473       emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr));
2474       switch ((int) size)
2475         {
2476         case 2:
2477           emit_insn (gen_extwh (exth, memh, addr));
2478           mode = HImode;
2479           break;
2480
2481         case 4:
2482           emit_insn (gen_extlh (exth, memh, addr));
2483           mode = SImode;
2484           break;
2485
2486         case 8:
2487           emit_insn (gen_extqh (exth, memh, addr));
2488           mode = DImode;
2489           break;
2490
2491         default:
2492           abort();
2493         }
2494
2495       addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
2496                            gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
2497                            sign, OPTAB_WIDEN);
2498     }
2499
2500   if (addr != tgt)
2501     emit_move_insn (tgt, gen_lowpart(GET_MODE (tgt), addr));
2502 }
2503
2504 /* Similarly, use ins and msk instructions to perform unaligned stores.  */
2505
2506 void
2507 alpha_expand_unaligned_store (dst, src, size, ofs)
2508      rtx dst, src;
2509      HOST_WIDE_INT size, ofs;
2510 {
2511   rtx dstl, dsth, addr, insl, insh, meml, memh;
2512
2513   dstl = gen_reg_rtx (DImode);
2514   dsth = gen_reg_rtx (DImode);
2515   insl = gen_reg_rtx (DImode);
2516   insh = gen_reg_rtx (DImode);
2517
2518   /* AND addresses cannot be in any alias set, since they may implicitly
2519      alias surrounding code.  Ideally we'd have some alias set that
2520      covered all types except those with alignment 8 or higher.  */
2521
2522   meml = change_address (dst, DImode,
2523                          gen_rtx_AND (DImode,
2524                                       plus_constant (XEXP (dst, 0), ofs),
2525                                       GEN_INT (-8)));
2526   MEM_ALIAS_SET (meml) = 0;
2527
2528   memh = change_address (dst, DImode,
2529                          gen_rtx_AND (DImode,
2530                                       plus_constant (XEXP (dst, 0),
2531                                                      ofs+size-1),
2532                                       GEN_INT (-8)));
2533   MEM_ALIAS_SET (memh) = 0;
2534
2535   emit_move_insn (dsth, memh);
2536   emit_move_insn (dstl, meml);
2537   addr = copy_addr_to_reg (plus_constant (XEXP (dst, 0), ofs));
2538
2539   if (src != const0_rtx)
2540     {
2541       emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
2542                             GEN_INT (size*8), addr));
2543
2544       switch ((int) size)
2545         {
2546         case 2:
2547           emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
2548           break;
2549         case 4:
2550           emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
2551           break;
2552         case 8:
2553           emit_insn (gen_insql (insl, src, addr));
2554           break;
2555         }
2556     }
2557
2558   emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
2559
2560   switch ((int) size)
2561     {
2562     case 2:
2563       emit_insn (gen_mskxl (dstl, dstl, GEN_INT (0xffff), addr));
2564       break;
2565     case 4:
2566       emit_insn (gen_mskxl (dstl, dstl, GEN_INT (0xffffffff), addr));
2567       break;
2568     case 8:
2569       {
2570 #if HOST_BITS_PER_WIDE_INT == 32
2571         rtx msk = immed_double_const (0xffffffff, 0xffffffff, DImode);
2572 #else
2573         rtx msk = immed_double_const (0xffffffffffffffff, 0, DImode);
2574 #endif
2575         emit_insn (gen_mskxl (dstl, dstl, msk, addr));
2576       }
2577       break;
2578     }
2579
2580   if (src != const0_rtx)
2581     {
2582       dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
2583       dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
2584     }
2585
2586   /* Must store high before low for degenerate case of aligned.  */
2587   emit_move_insn (memh, dsth);
2588   emit_move_insn (meml, dstl);
2589 }
2590
2591 /* The block move code tries to maximize speed by separating loads and
2592    stores at the expense of register pressure: we load all of the data
2593    before we store it back out.  There are two secondary effects worth
2594    mentioning, that this speeds copying to/from aligned and unaligned
2595    buffers, and that it makes the code significantly easier to write.  */
2596
2597 #define MAX_MOVE_WORDS  8
2598
2599 /* Load an integral number of consecutive unaligned quadwords.  */
2600
2601 static void
2602 alpha_expand_unaligned_load_words (out_regs, smem, words, ofs)
2603      rtx *out_regs;
2604      rtx smem;
2605      HOST_WIDE_INT words, ofs;
2606 {
2607   rtx const im8 = GEN_INT (-8);
2608   rtx const i64 = GEN_INT (64);
2609   rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
2610   rtx sreg, areg, tmp;
2611   HOST_WIDE_INT i;
2612
2613   /* Generate all the tmp registers we need.  */
2614   for (i = 0; i < words; ++i)
2615     {
2616       data_regs[i] = out_regs[i];
2617       ext_tmps[i] = gen_reg_rtx (DImode);
2618     }
2619   data_regs[words] = gen_reg_rtx (DImode);
2620
2621   if (ofs != 0)
2622     smem = change_address (smem, GET_MODE (smem),
2623                            plus_constant (XEXP (smem, 0), ofs));
2624
2625   /* Load up all of the source data.  */
2626   for (i = 0; i < words; ++i)
2627     {
2628       tmp = change_address (smem, DImode,
2629                             gen_rtx_AND (DImode,
2630                                          plus_constant (XEXP(smem,0), 8*i),
2631                                          im8));
2632       MEM_ALIAS_SET (tmp) = 0;
2633       emit_move_insn (data_regs[i], tmp);
2634     }
2635
2636   tmp = change_address (smem, DImode,
2637                         gen_rtx_AND (DImode,
2638                                      plus_constant (XEXP(smem,0), 8*words - 1),
2639                                      im8));
2640   MEM_ALIAS_SET (tmp) = 0;
2641   emit_move_insn (data_regs[words], tmp);
2642
2643   /* Extract the half-word fragments.  Unfortunately DEC decided to make
2644      extxh with offset zero a noop instead of zeroing the register, so
2645      we must take care of that edge condition ourselves with cmov.  */
2646
2647   sreg = copy_addr_to_reg (XEXP (smem, 0));
2648   areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL,
2649                        1, OPTAB_WIDEN);
2650   for (i = 0; i < words; ++i)
2651     {
2652       emit_insn (gen_extxl (data_regs[i], data_regs[i], i64, sreg));
2653
2654       emit_insn (gen_extqh (ext_tmps[i], data_regs[i+1], sreg));
2655       emit_insn (gen_rtx_SET (VOIDmode, ext_tmps[i],
2656                               gen_rtx_IF_THEN_ELSE (DImode,
2657                                                     gen_rtx_EQ (DImode, areg,
2658                                                                 const0_rtx),
2659                                                     const0_rtx, ext_tmps[i])));
2660     }
2661
2662   /* Merge the half-words into whole words.  */
2663   for (i = 0; i < words; ++i)
2664     {
2665       out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i],
2666                                   ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN);
2667     }
2668 }
2669
2670 /* Store an integral number of consecutive unaligned quadwords.  DATA_REGS
2671    may be NULL to store zeros.  */
2672
2673 static void
2674 alpha_expand_unaligned_store_words (data_regs, dmem, words, ofs)
2675      rtx *data_regs;
2676      rtx dmem;
2677      HOST_WIDE_INT words, ofs;
2678 {
2679   rtx const im8 = GEN_INT (-8);
2680   rtx const i64 = GEN_INT (64);
2681 #if HOST_BITS_PER_WIDE_INT == 32
2682   rtx const im1 = immed_double_const (0xffffffff, 0xffffffff, DImode);
2683 #else
2684   rtx const im1 = immed_double_const (0xffffffffffffffff, 0, DImode);
2685 #endif
2686   rtx ins_tmps[MAX_MOVE_WORDS];
2687   rtx st_tmp_1, st_tmp_2, dreg;
2688   rtx st_addr_1, st_addr_2;
2689   HOST_WIDE_INT i;
2690
2691   /* Generate all the tmp registers we need.  */
2692   if (data_regs != NULL)
2693     for (i = 0; i < words; ++i)
2694       ins_tmps[i] = gen_reg_rtx(DImode);
2695   st_tmp_1 = gen_reg_rtx(DImode);
2696   st_tmp_2 = gen_reg_rtx(DImode);
2697
2698   if (ofs != 0)
2699     dmem = change_address (dmem, GET_MODE (dmem),
2700                            plus_constant (XEXP (dmem, 0), ofs));
2701
2702
2703   st_addr_2 = change_address (dmem, DImode,
2704                               gen_rtx_AND (DImode,
2705                                            plus_constant (XEXP(dmem,0),
2706                                                           words*8 - 1),
2707                                        im8));
2708   MEM_ALIAS_SET (st_addr_2) = 0;
2709
2710   st_addr_1 = change_address (dmem, DImode,
2711                               gen_rtx_AND (DImode,
2712                                            XEXP (dmem, 0),
2713                                            im8));
2714   MEM_ALIAS_SET (st_addr_1) = 0;
2715
2716   /* Load up the destination end bits.  */
2717   emit_move_insn (st_tmp_2, st_addr_2);
2718   emit_move_insn (st_tmp_1, st_addr_1);
2719
2720   /* Shift the input data into place.  */
2721   dreg = copy_addr_to_reg (XEXP (dmem, 0));
2722   if (data_regs != NULL)
2723     {
2724       for (i = words-1; i >= 0; --i)
2725         {
2726           emit_insn (gen_insxh (ins_tmps[i], data_regs[i], i64, dreg));
2727           emit_insn (gen_insql (data_regs[i], data_regs[i], dreg));
2728         }
2729       for (i = words-1; i > 0; --i)
2730         {
2731           ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i],
2732                                         ins_tmps[i-1], ins_tmps[i-1], 1,
2733                                         OPTAB_WIDEN);
2734         }
2735     }
2736
2737   /* Split and merge the ends with the destination data.  */
2738   emit_insn (gen_mskxh (st_tmp_2, st_tmp_2, i64, dreg));
2739   emit_insn (gen_mskxl (st_tmp_1, st_tmp_1, im1, dreg));
2740
2741   if (data_regs != NULL)
2742     {
2743       st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1],
2744                                st_tmp_2, 1, OPTAB_WIDEN);
2745       st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
2746                                st_tmp_1, 1, OPTAB_WIDEN);
2747     }
2748
2749   /* Store it all.  */
2750   emit_move_insn (st_addr_2, st_tmp_2);
2751   for (i = words-1; i > 0; --i)
2752     {
2753       rtx tmp = change_address (dmem, DImode,
2754                                 gen_rtx_AND (DImode,
2755                                              plus_constant(XEXP (dmem,0), i*8),
2756                                              im8));
2757       MEM_ALIAS_SET (tmp) = 0;
2758       emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx);
2759     }
2760   emit_move_insn (st_addr_1, st_tmp_1);
2761 }
2762
2763
2764 /* Expand string/block move operations.
2765
2766    operands[0] is the pointer to the destination.
2767    operands[1] is the pointer to the source.
2768    operands[2] is the number of bytes to move.
2769    operands[3] is the alignment.  */
2770
2771 int
2772 alpha_expand_block_move (operands)
2773      rtx operands[];
2774 {
2775   rtx bytes_rtx = operands[2];
2776   rtx align_rtx = operands[3];
2777   HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
2778   unsigned HOST_WIDE_INT bytes = orig_bytes;
2779   unsigned HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
2780   unsigned HOST_WIDE_INT dst_align = src_align;
2781   rtx orig_src = operands[1];
2782   rtx orig_dst = operands[0];
2783   rtx data_regs[2 * MAX_MOVE_WORDS + 16];
2784   rtx tmp;
2785   unsigned int i, words, ofs, nregs = 0;
2786
2787   if (orig_bytes <= 0)
2788     return 1;
2789   else if (bytes > MAX_MOVE_WORDS * BITS_PER_UNIT)
2790     return 0;
2791
2792   /* Look for additional alignment information from recorded register info.  */
2793
2794   tmp = XEXP (orig_src, 0);
2795   if (GET_CODE (tmp) == REG)
2796     src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
2797   else if (GET_CODE (tmp) == PLUS
2798            && GET_CODE (XEXP (tmp, 0)) == REG
2799            && GET_CODE (XEXP (tmp, 1)) == CONST_INT)
2800     {
2801       unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
2802       unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
2803
2804       if (a > src_align)
2805         {
2806           if (a >= 64 && c % 8 == 0)
2807             src_align = 64;
2808           else if (a >= 32 && c % 4 == 0)
2809             src_align = 32;
2810           else if (a >= 16 && c % 2 == 0)
2811             src_align = 16;
2812         }
2813     }
2814
2815   tmp = XEXP (orig_dst, 0);
2816   if (GET_CODE (tmp) == REG)
2817     dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
2818   else if (GET_CODE (tmp) == PLUS
2819            && GET_CODE (XEXP (tmp, 0)) == REG
2820            && GET_CODE (XEXP (tmp, 1)) == CONST_INT)
2821     {
2822       unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
2823       unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
2824
2825       if (a > dst_align)
2826         {
2827           if (a >= 64 && c % 8 == 0)
2828             dst_align = 64;
2829           else if (a >= 32 && c % 4 == 0)
2830             dst_align = 32;
2831           else if (a >= 16 && c % 2 == 0)
2832             dst_align = 16;
2833         }
2834     }
2835
2836   /* Load the entire block into registers.  */
2837   if (GET_CODE (XEXP (orig_src, 0)) == ADDRESSOF)
2838     {
2839       enum machine_mode mode;
2840
2841       tmp = XEXP (XEXP (orig_src, 0), 0);
2842
2843       /* Don't use the existing register if we're reading more than
2844          is held in the register.  Nor if there is not a mode that
2845          handles the exact size.  */
2846       mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 1);
2847       if (mode != BLKmode
2848           && GET_MODE_SIZE (GET_MODE (tmp)) >= bytes)
2849         {
2850           if (mode == TImode)
2851             {
2852               data_regs[nregs] = gen_lowpart (DImode, tmp);
2853               data_regs[nregs+1] = gen_highpart (DImode, tmp);
2854               nregs += 2;
2855             }
2856           else
2857             data_regs[nregs++] = gen_lowpart (mode, tmp);
2858
2859           goto src_done;
2860         }
2861
2862       /* No appropriate mode; fall back on memory.  */
2863       orig_src = change_address (orig_src, GET_MODE (orig_src),
2864                                  copy_addr_to_reg (XEXP (orig_src, 0)));
2865     }
2866
2867   ofs = 0;
2868   if (src_align >= 64 && bytes >= 8)
2869     {
2870       words = bytes / 8;
2871
2872       for (i = 0; i < words; ++i)
2873         data_regs[nregs + i] = gen_reg_rtx(DImode);
2874
2875       for (i = 0; i < words; ++i)
2876         emit_move_insn (data_regs[nregs + i],
2877                         change_address (orig_src, DImode,
2878                                         plus_constant (XEXP (orig_src, 0),
2879                                                        ofs + i * 8)));
2880
2881       nregs += words;
2882       bytes -= words * 8;
2883       ofs += words * 8;
2884     }
2885
2886   if (src_align >= 32 && bytes >= 4)
2887     {
2888       words = bytes / 4;
2889
2890       for (i = 0; i < words; ++i)
2891         data_regs[nregs + i] = gen_reg_rtx(SImode);
2892
2893       for (i = 0; i < words; ++i)
2894         emit_move_insn (data_regs[nregs + i],
2895                         change_address (orig_src, SImode,
2896                                         plus_constant (XEXP (orig_src, 0),
2897                                                        ofs + i * 4)));
2898
2899       nregs += words;
2900       bytes -= words * 4;
2901       ofs += words * 4;
2902     }
2903
2904   if (bytes >= 16)
2905     {
2906       words = bytes / 8;
2907
2908       for (i = 0; i < words+1; ++i)
2909         data_regs[nregs + i] = gen_reg_rtx(DImode);
2910
2911       alpha_expand_unaligned_load_words (data_regs + nregs, orig_src,
2912                                          words, ofs);
2913
2914       nregs += words;
2915       bytes -= words * 8;
2916       ofs += words * 8;
2917     }
2918
2919   if (! TARGET_BWX && bytes >= 8)
2920     {
2921       data_regs[nregs++] = tmp = gen_reg_rtx (DImode);
2922       alpha_expand_unaligned_load (tmp, orig_src, 8, ofs, 0);
2923       bytes -= 8;
2924       ofs += 8;
2925     }
2926
2927   if (! TARGET_BWX && bytes >= 4)
2928     {
2929       data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
2930       alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
2931       bytes -= 4;
2932       ofs += 4;
2933     }
2934
2935   if (bytes >= 2)
2936     {
2937       if (src_align >= 16)
2938         {
2939           do {
2940             data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
2941             emit_move_insn (tmp,
2942                             change_address (orig_src, HImode,
2943                                             plus_constant (XEXP (orig_src, 0),
2944                                                            ofs)));
2945             bytes -= 2;
2946             ofs += 2;
2947           } while (bytes >= 2);
2948         }
2949
2950       else if (! TARGET_BWX)
2951         {
2952           data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
2953           alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
2954           bytes -= 2;
2955           ofs += 2;
2956         }
2957     }
2958
2959   while (bytes > 0)
2960     {
2961       data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
2962       emit_move_insn (tmp,
2963                       change_address (orig_src, QImode,
2964                                       plus_constant (XEXP (orig_src, 0),
2965                                                      ofs)));
2966       bytes -= 1;
2967       ofs += 1;
2968     }
2969
2970  src_done:
2971
2972   if (nregs > ARRAY_SIZE (data_regs))
2973     abort ();
2974
2975   /* Now save it back out again.  */
2976
2977   i = 0, ofs = 0;
2978
2979   if (GET_CODE (XEXP (orig_dst, 0)) == ADDRESSOF)
2980     {
2981       enum machine_mode mode;
2982       tmp = XEXP (XEXP (orig_dst, 0), 0);
2983
2984       mode = mode_for_size (orig_bytes * BITS_PER_UNIT, MODE_INT, 1);
2985       if (GET_MODE (tmp) == mode)
2986         {
2987           if (nregs == 1)
2988             {
2989               emit_move_insn (tmp, data_regs[0]);
2990               i = 1;
2991               goto dst_done;
2992             }
2993
2994           else if (nregs == 2 && mode == TImode)
2995             {
2996               /* Undo the subregging done above when copying between
2997                  two TImode registers.  */
2998               if (GET_CODE (data_regs[0]) == SUBREG
2999                   && GET_MODE (SUBREG_REG (data_regs[0])) == TImode)
3000                 emit_move_insn (tmp, SUBREG_REG (data_regs[0]));
3001               else
3002                 {
3003                   rtx seq;
3004
3005                   start_sequence ();
3006                   emit_move_insn (gen_lowpart (DImode, tmp), data_regs[0]);
3007                   emit_move_insn (gen_highpart (DImode, tmp), data_regs[1]);
3008                   seq = get_insns ();
3009                   end_sequence ();
3010
3011                   emit_no_conflict_block (seq, tmp, data_regs[0],
3012                                           data_regs[1], NULL_RTX);
3013                 }
3014
3015               i = 2;
3016               goto dst_done;
3017             }
3018         }
3019
3020       /* ??? If nregs > 1, consider reconstructing the word in regs.  */
3021       /* ??? Optimize mode < dst_mode with strict_low_part.  */
3022
3023       /* No appropriate mode; fall back on memory.  We can speed things
3024          up by recognizing extra alignment information.  */
3025       orig_dst = change_address (orig_dst, GET_MODE (orig_dst),
3026                                  copy_addr_to_reg (XEXP (orig_dst, 0)));
3027       dst_align = GET_MODE_SIZE (GET_MODE (tmp));
3028     }
3029
3030   /* Write out the data in whatever chunks reading the source allowed.  */
3031   if (dst_align >= 64)
3032     {
3033       while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3034         {
3035           emit_move_insn (change_address (orig_dst, DImode,
3036                                           plus_constant (XEXP (orig_dst, 0),
3037                                                          ofs)),
3038                           data_regs[i]);
3039           ofs += 8;
3040           i++;
3041         }
3042     }
3043
3044   if (dst_align >= 32)
3045     {
3046       /* If the source has remaining DImode regs, write them out in
3047          two pieces.  */
3048       while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3049         {
3050           tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
3051                               NULL_RTX, 1, OPTAB_WIDEN);
3052
3053           emit_move_insn (change_address (orig_dst, SImode,
3054                                           plus_constant (XEXP (orig_dst, 0),
3055                                                          ofs)),
3056                           gen_lowpart (SImode, data_regs[i]));
3057           emit_move_insn (change_address (orig_dst, SImode,
3058                                           plus_constant (XEXP (orig_dst, 0),
3059                                                          ofs + 4)),
3060                           gen_lowpart (SImode, tmp));
3061           ofs += 8;
3062           i++;
3063         }
3064
3065       while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3066         {
3067           emit_move_insn (change_address(orig_dst, SImode,
3068                                          plus_constant (XEXP (orig_dst, 0),
3069                                                         ofs)),
3070                           data_regs[i]);
3071           ofs += 4;
3072           i++;
3073         }
3074     }
3075
3076   if (i < nregs && GET_MODE (data_regs[i]) == DImode)
3077     {
3078       /* Write out a remaining block of words using unaligned methods.  */
3079
3080       for (words = 1; i + words < nregs; words++)
3081         if (GET_MODE (data_regs[i + words]) != DImode)
3082           break;
3083
3084       if (words == 1)
3085         alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
3086       else
3087         alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
3088                                             words, ofs);
3089
3090       i += words;
3091       ofs += words * 8;
3092     }
3093
3094   /* Due to the above, this won't be aligned.  */
3095   /* ??? If we have more than one of these, consider constructing full
3096      words in registers and using alpha_expand_unaligned_store_words.  */
3097   while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3098     {
3099       alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
3100       ofs += 4;
3101       i++;
3102     }
3103
3104   if (dst_align >= 16)
3105     while (i < nregs && GET_MODE (data_regs[i]) == HImode)
3106       {
3107         emit_move_insn (change_address (orig_dst, HImode,
3108                                         plus_constant (XEXP (orig_dst, 0),
3109                                                        ofs)),
3110                         data_regs[i]);
3111         i++;
3112         ofs += 2;
3113       }
3114   else
3115     while (i < nregs && GET_MODE (data_regs[i]) == HImode)
3116       {
3117         alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
3118         i++;
3119         ofs += 2;
3120       }
3121
3122   while (i < nregs && GET_MODE (data_regs[i]) == QImode)
3123     {
3124       emit_move_insn (change_address (orig_dst, QImode,
3125                                       plus_constant (XEXP (orig_dst, 0),
3126                                                      ofs)),
3127                       data_regs[i]);
3128       i++;
3129       ofs += 1;
3130     }
3131
3132  dst_done:
3133
3134   if (i != nregs)
3135     abort ();
3136
3137   return 1;
3138 }
3139
3140 int
3141 alpha_expand_block_clear (operands)
3142      rtx operands[];
3143 {
3144   rtx bytes_rtx = operands[1];
3145   rtx align_rtx = operands[2];
3146   HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3147   unsigned HOST_WIDE_INT bytes = orig_bytes;
3148   unsigned HOST_WIDE_INT align = INTVAL (align_rtx);
3149   rtx orig_dst = operands[0];
3150   rtx tmp;
3151   unsigned HOST_WIDE_INT i, words, ofs = 0;
3152
3153   if (orig_bytes <= 0)
3154     return 1;
3155   if (bytes > MAX_MOVE_WORDS*8)
3156     return 0;
3157
3158   /* Look for stricter alignment.  */
3159   tmp = XEXP (orig_dst, 0);
3160   if (GET_CODE (tmp) == REG)
3161     align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3162   else if (GET_CODE (tmp) == PLUS
3163            && GET_CODE (XEXP (tmp, 0)) == REG
3164            && GET_CODE (XEXP (tmp, 1)) == CONST_INT)
3165     {
3166       unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3167       unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3168
3169       if (a > align)
3170         {
3171           if (a >= 64 && c % 8 == 0)
3172             align = 64;
3173           else if (a >= 32 && c % 4 == 0)
3174             align = 32;
3175           else if (a >= 16 && c % 2 == 0)
3176             align = 16;
3177         }
3178     }
3179
3180   else if (GET_CODE (tmp) == ADDRESSOF)
3181     {
3182       enum machine_mode mode;
3183
3184       mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 1);
3185       if (GET_MODE (XEXP (tmp, 0)) == mode)
3186         {
3187           emit_move_insn (XEXP (tmp, 0), const0_rtx);
3188           return 1;
3189         }
3190
3191       /* No appropriate mode; fall back on memory.  */
3192       orig_dst = change_address (orig_dst, GET_MODE (orig_dst),
3193                                  copy_addr_to_reg (tmp));
3194       align = GET_MODE_SIZE (GET_MODE (XEXP (tmp, 0)));
3195     }
3196
3197   /* Handle a block of contiguous words first.  */
3198
3199   if (align >= 64 && bytes >= 8)
3200     {
3201       words = bytes / 8;
3202
3203       for (i = 0; i < words; ++i)
3204         emit_move_insn (change_address(orig_dst, DImode,
3205                                        plus_constant (XEXP (orig_dst, 0),
3206                                                       ofs + i * 8)),
3207                           const0_rtx);
3208
3209       bytes -= words * 8;
3210       ofs += words * 8;
3211     }
3212
3213   if (align >= 16 && bytes >= 4)
3214     {
3215       words = bytes / 4;
3216
3217       for (i = 0; i < words; ++i)
3218         emit_move_insn (change_address (orig_dst, SImode,
3219                                         plus_constant (XEXP (orig_dst, 0),
3220                                                        ofs + i * 4)),
3221                         const0_rtx);
3222
3223       bytes -= words * 4;
3224       ofs += words * 4;
3225     }
3226
3227   if (bytes >= 16)
3228     {
3229       words = bytes / 8;
3230
3231       alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
3232
3233       bytes -= words * 8;
3234       ofs += words * 8;
3235     }
3236
3237   /* Next clean up any trailing pieces.  We know from the contiguous
3238      block move that there are no aligned SImode or DImode hunks left.  */
3239
3240   if (! TARGET_BWX && bytes >= 8)
3241     {
3242       alpha_expand_unaligned_store (orig_dst, const0_rtx, 8, ofs);
3243       bytes -= 8;
3244       ofs += 8;
3245     }
3246
3247   if (!TARGET_BWX && bytes >= 4)
3248     {
3249       alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
3250       bytes -= 4;
3251       ofs += 4;
3252     }
3253
3254   if (bytes >= 2)
3255     {
3256       if (align >= 16)
3257         {
3258           do {
3259             emit_move_insn (change_address (orig_dst, HImode,
3260                                             plus_constant (XEXP (orig_dst, 0),
3261                                                            ofs)),
3262                             const0_rtx);
3263             bytes -= 2;
3264             ofs += 2;
3265           } while (bytes >= 2);
3266         }
3267       else if (! TARGET_BWX)
3268         {
3269           alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
3270           bytes -= 2;
3271           ofs += 2;
3272         }
3273     }
3274
3275   while (bytes > 0)
3276     {
3277       emit_move_insn (change_address (orig_dst, QImode,
3278                                       plus_constant (XEXP (orig_dst, 0),
3279                                                      ofs)),
3280                       const0_rtx);
3281       bytes -= 1;
3282       ofs += 1;
3283     }
3284
3285   return 1;
3286 }
3287 \f
3288 /* Adjust the cost of a scheduling dependency.  Return the new cost of
3289    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
3290
3291 int
3292 alpha_adjust_cost (insn, link, dep_insn, cost)
3293      rtx insn;
3294      rtx link;
3295      rtx dep_insn;
3296      int cost;
3297 {
3298   rtx set, set_src;
3299   enum attr_type insn_type, dep_insn_type;
3300
3301   /* If the dependence is an anti-dependence, there is no cost.  For an
3302      output dependence, there is sometimes a cost, but it doesn't seem
3303      worth handling those few cases.  */
3304
3305   if (REG_NOTE_KIND (link) != 0)
3306     return 0;
3307
3308   /* If we can't recognize the insns, we can't really do anything.  */
3309   if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
3310     return cost;
3311
3312   insn_type = get_attr_type (insn);
3313   dep_insn_type = get_attr_type (dep_insn);
3314
3315   /* Bring in the user-defined memory latency.  */
3316   if (dep_insn_type == TYPE_ILD
3317       || dep_insn_type == TYPE_FLD
3318       || dep_insn_type == TYPE_LDSYM)
3319     cost += alpha_memory_latency-1;
3320
3321   switch (alpha_cpu)
3322     {
3323     case PROCESSOR_EV4:
3324       /* On EV4, if INSN is a store insn and DEP_INSN is setting the data
3325          being stored, we can sometimes lower the cost.  */
3326
3327       if ((insn_type == TYPE_IST || insn_type == TYPE_FST)
3328           && (set = single_set (dep_insn)) != 0
3329           && GET_CODE (PATTERN (insn)) == SET
3330           && rtx_equal_p (SET_DEST (set), SET_SRC (PATTERN (insn))))
3331         {
3332           switch (dep_insn_type)
3333             {
3334             case TYPE_ILD:
3335             case TYPE_FLD:
3336               /* No savings here.  */
3337               return cost;
3338
3339             case TYPE_IMUL:
3340               /* In these cases, we save one cycle.  */
3341               return cost - 1;
3342
3343             default:
3344               /* In all other cases, we save two cycles.  */
3345               return MAX (0, cost - 2);
3346             }
3347         }
3348
3349       /* Another case that needs adjustment is an arithmetic or logical
3350          operation.  It's cost is usually one cycle, but we default it to
3351          two in the MD file.  The only case that it is actually two is
3352          for the address in loads, stores, and jumps.  */
3353
3354       if (dep_insn_type == TYPE_IADD || dep_insn_type == TYPE_ILOG)
3355         {
3356           switch (insn_type)
3357             {
3358             case TYPE_ILD:
3359             case TYPE_IST:
3360             case TYPE_FLD:
3361             case TYPE_FST:
3362             case TYPE_JSR:
3363               return cost;
3364             default:
3365               return 1;
3366             }
3367         }
3368
3369       /* The final case is when a compare feeds into an integer branch;
3370          the cost is only one cycle in that case.  */
3371
3372       if (dep_insn_type == TYPE_ICMP && insn_type == TYPE_IBR)
3373         return 1;
3374       break;
3375
3376     case PROCESSOR_EV5:
3377       /* And the lord DEC saith:  "A special bypass provides an effective
3378          latency of 0 cycles for an ICMP or ILOG insn producing the test
3379          operand of an IBR or ICMOV insn." */
3380
3381       if ((dep_insn_type == TYPE_ICMP || dep_insn_type == TYPE_ILOG)
3382           && (set = single_set (dep_insn)) != 0)
3383         {
3384           /* A branch only has one input.  This must be it.  */
3385           if (insn_type == TYPE_IBR)
3386             return 0;
3387           /* A conditional move has three, make sure it is the test.  */
3388           if (insn_type == TYPE_ICMOV
3389               && GET_CODE (set_src = PATTERN (insn)) == SET
3390               && GET_CODE (set_src = SET_SRC (set_src)) == IF_THEN_ELSE
3391               && rtx_equal_p (SET_DEST (set), XEXP (set_src, 0)))
3392             return 0;
3393         }
3394
3395       /* "The multiplier is unable to receive data from IEU bypass paths.
3396          The instruction issues at the expected time, but its latency is
3397          increased by the time it takes for the input data to become
3398          available to the multiplier" -- which happens in pipeline stage
3399          six, when results are comitted to the register file.  */
3400
3401       if (insn_type == TYPE_IMUL)
3402         {
3403           switch (dep_insn_type)
3404             {
3405             /* These insns produce their results in pipeline stage five.  */
3406             case TYPE_ILD:
3407             case TYPE_ICMOV:
3408             case TYPE_IMUL:
3409             case TYPE_MVI:
3410               return cost + 1;
3411
3412             /* Other integer insns produce results in pipeline stage four.  */
3413             default:
3414               return cost + 2;
3415             }
3416         }
3417       break;
3418
3419     case PROCESSOR_EV6:
3420       /* There is additional latency to move the result of (most) FP
3421          operations anywhere but the FP register file.  */
3422
3423       if ((insn_type == TYPE_FST || insn_type == TYPE_FTOI)
3424           && (dep_insn_type == TYPE_FADD ||
3425               dep_insn_type == TYPE_FMUL ||
3426               dep_insn_type == TYPE_FCMOV))
3427         return cost + 2;
3428
3429       break;
3430     }
3431
3432   /* Otherwise, return the default cost. */
3433   return cost;
3434 }
3435 \f
3436 /* Functions to save and restore alpha_return_addr_rtx.  */
3437
3438 static void
3439 alpha_init_machine_status (p)
3440      struct function *p;
3441 {
3442   p->machine =
3443     (struct machine_function *) xcalloc (1, sizeof (struct machine_function));
3444 }
3445
3446 static void
3447 alpha_mark_machine_status (p)
3448      struct function *p;
3449 {
3450   struct machine_function *machine = p->machine;
3451
3452   ggc_mark_rtx (machine->eh_epilogue_sp_ofs);
3453   ggc_mark_rtx (machine->ra_rtx);
3454 }
3455
3456 /* Start the ball rolling with RETURN_ADDR_RTX.  */
3457
3458 rtx
3459 alpha_return_addr (count, frame)
3460      int count;
3461      rtx frame ATTRIBUTE_UNUSED;
3462 {
3463   rtx init, reg;
3464
3465   if (count != 0)
3466     return const0_rtx;
3467
3468   reg = cfun->machine->ra_rtx;
3469   if (reg == NULL)
3470     {
3471       /* No rtx yet.  Invent one, and initialize it from $26 in
3472          the prologue.  */
3473       reg = gen_reg_rtx (Pmode);
3474       cfun->machine->ra_rtx = reg;
3475       init = gen_rtx_SET (VOIDmode, reg, gen_rtx_REG (Pmode, REG_RA));
3476
3477       /* Emit the insn to the prologue with the other argument copies.  */
3478       push_topmost_sequence ();
3479       emit_insn_after (init, get_insns ());
3480       pop_topmost_sequence ();
3481     }
3482
3483   return reg;
3484 }
3485
3486 static int
3487 alpha_ra_ever_killed ()
3488 {
3489   rtx top;
3490
3491 #ifdef ASM_OUTPUT_MI_THUNK
3492   if (current_function_is_thunk)
3493     return 0;
3494 #endif
3495   if (!cfun->machine->ra_rtx)
3496     return regs_ever_live[REG_RA];
3497
3498   push_topmost_sequence ();
3499   top = get_insns ();
3500   pop_topmost_sequence ();
3501
3502   return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL_RTX);
3503 }
3504
3505 \f
3506 /* Print an operand.  Recognize special options, documented below.  */
3507
3508 void
3509 print_operand (file, x, code)
3510     FILE *file;
3511     rtx x;
3512     int code;
3513 {
3514   int i;
3515
3516   switch (code)
3517     {
3518     case '~':
3519       /* Print the assembler name of the current function.  */
3520       assemble_name (file, alpha_fnname);
3521       break;
3522
3523     case '&':
3524       /* Generates fp-rounding mode suffix: nothing for normal, 'c' for
3525          chopped, 'm' for minus-infinity, and 'd' for dynamic rounding
3526          mode.  alpha_fprm controls which suffix is generated.  */
3527       switch (alpha_fprm)
3528         {
3529         case ALPHA_FPRM_NORM:
3530           break;
3531         case ALPHA_FPRM_MINF:
3532           fputc ('m', file);
3533           break;
3534         case ALPHA_FPRM_CHOP:
3535           fputc ('c', file);
3536           break;
3537         case ALPHA_FPRM_DYN:
3538           fputc ('d', file);
3539           break;
3540         default:
3541           abort ();
3542         }
3543       break;
3544
3545     case '\'':
3546       /* Generates trap-mode suffix for instructions that accept the su
3547          suffix only (cmpt et al).  */
3548       if (alpha_fptm >= ALPHA_FPTM_SU)
3549         fputs ("su", file);
3550       break;
3551
3552     case '`':
3553       /* Generates trap-mode suffix for instructions that accept the
3554          v and sv suffix.  The only instruction that needs this is cvtql.  */
3555       switch (alpha_fptm)
3556         {
3557         case ALPHA_FPTM_N:
3558           break;
3559         case ALPHA_FPTM_U:
3560           fputs ("v", file);
3561           break;
3562         case ALPHA_FPTM_SU:
3563         case ALPHA_FPTM_SUI:
3564           fputs ("sv", file);
3565           break;
3566         }
3567       break;
3568
3569     case '(':
3570       /* Generates trap-mode suffix for instructions that accept the
3571          v, sv, and svi suffix.  The only instruction that needs this
3572          is cvttq.  */
3573       switch (alpha_fptm)
3574         {
3575         case ALPHA_FPTM_N:
3576           break;
3577         case ALPHA_FPTM_U:
3578           fputs ("v", file);
3579           break;
3580         case ALPHA_FPTM_SU:
3581           fputs ("sv", file);
3582           break;
3583         case ALPHA_FPTM_SUI:
3584           fputs ("svi", file);
3585           break;
3586         }
3587       break;
3588
3589     case ')':
3590       /* Generates trap-mode suffix for instructions that accept the u, su,
3591          and sui suffix.  This is the bulk of the IEEE floating point
3592          instructions (addt et al).  */
3593       switch (alpha_fptm)
3594         {
3595         case ALPHA_FPTM_N:
3596           break;
3597         case ALPHA_FPTM_U:
3598           fputc ('u', file);
3599           break;
3600         case ALPHA_FPTM_SU:
3601           fputs ("su", file);
3602           break;
3603         case ALPHA_FPTM_SUI:
3604           fputs ("sui", file);
3605           break;
3606         }
3607       break;
3608
3609     case '+':
3610       /* Generates trap-mode suffix for instructions that accept the sui
3611          suffix (cvtqt and cvtqs).  */
3612       switch (alpha_fptm)
3613         {
3614         case ALPHA_FPTM_N:
3615         case ALPHA_FPTM_U:
3616         case ALPHA_FPTM_SU:     /* cvtqt/cvtqs can't cause underflow */
3617           break;
3618         case ALPHA_FPTM_SUI:
3619           fputs ("sui", file);
3620           break;
3621         }
3622       break;
3623
3624     case ',':
3625       /* Generates single precision instruction suffix.  */
3626       fprintf (file, "%c", (TARGET_FLOAT_VAX ? 'f' : 's'));
3627       break;
3628
3629     case '-':
3630       /* Generates double precision instruction suffix.  */
3631       fprintf (file, "%c", (TARGET_FLOAT_VAX ? 'g' : 't'));
3632       break;
3633
3634     case 'r':
3635       /* If this operand is the constant zero, write it as "$31".  */
3636       if (GET_CODE (x) == REG)
3637         fprintf (file, "%s", reg_names[REGNO (x)]);
3638       else if (x == CONST0_RTX (GET_MODE (x)))
3639         fprintf (file, "$31");
3640       else
3641         output_operand_lossage ("invalid %%r value");
3642
3643       break;
3644
3645     case 'R':
3646       /* Similar, but for floating-point.  */
3647       if (GET_CODE (x) == REG)
3648         fprintf (file, "%s", reg_names[REGNO (x)]);
3649       else if (x == CONST0_RTX (GET_MODE (x)))
3650         fprintf (file, "$f31");
3651       else
3652         output_operand_lossage ("invalid %%R value");
3653
3654       break;
3655
3656     case 'N':
3657       /* Write the 1's complement of a constant.  */
3658       if (GET_CODE (x) != CONST_INT)
3659         output_operand_lossage ("invalid %%N value");
3660
3661       fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
3662       break;
3663
3664     case 'P':
3665       /* Write 1 << C, for a constant C.  */
3666       if (GET_CODE (x) != CONST_INT)
3667         output_operand_lossage ("invalid %%P value");
3668
3669       fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT) 1 << INTVAL (x));
3670       break;
3671
3672     case 'h':
3673       /* Write the high-order 16 bits of a constant, sign-extended.  */
3674       if (GET_CODE (x) != CONST_INT)
3675         output_operand_lossage ("invalid %%h value");
3676
3677       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16);
3678       break;
3679
3680     case 'L':
3681       /* Write the low-order 16 bits of a constant, sign-extended.  */
3682       if (GET_CODE (x) != CONST_INT)
3683         output_operand_lossage ("invalid %%L value");
3684
3685       fprintf (file, HOST_WIDE_INT_PRINT_DEC,
3686                (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000));
3687       break;
3688
3689     case 'm':
3690       /* Write mask for ZAP insn.  */
3691       if (GET_CODE (x) == CONST_DOUBLE)
3692         {
3693           HOST_WIDE_INT mask = 0;
3694           HOST_WIDE_INT value;
3695
3696           value = CONST_DOUBLE_LOW (x);
3697           for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
3698                i++, value >>= 8)
3699             if (value & 0xff)
3700               mask |= (1 << i);
3701
3702           value = CONST_DOUBLE_HIGH (x);
3703           for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
3704                i++, value >>= 8)
3705             if (value & 0xff)
3706               mask |= (1 << (i + sizeof (int)));
3707
3708           fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask & 0xff);
3709         }
3710
3711       else if (GET_CODE (x) == CONST_INT)
3712         {
3713           HOST_WIDE_INT mask = 0, value = INTVAL (x);
3714
3715           for (i = 0; i < 8; i++, value >>= 8)
3716             if (value & 0xff)
3717               mask |= (1 << i);
3718
3719           fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask);
3720         }
3721       else
3722         output_operand_lossage ("invalid %%m value");
3723       break;
3724
3725     case 'M':
3726       /* 'b', 'w', 'l', or 'q' as the value of the constant.  */
3727       if (GET_CODE (x) != CONST_INT
3728           || (INTVAL (x) != 8 && INTVAL (x) != 16
3729               && INTVAL (x) != 32 && INTVAL (x) != 64))
3730         output_operand_lossage ("invalid %%M value");
3731
3732       fprintf (file, "%s",
3733                (INTVAL (x) == 8 ? "b"
3734                 : INTVAL (x) == 16 ? "w"
3735                 : INTVAL (x) == 32 ? "l"
3736                 : "q"));
3737       break;
3738
3739     case 'U':
3740       /* Similar, except do it from the mask.  */
3741       if (GET_CODE (x) == CONST_INT && INTVAL (x) == 0xff)
3742         fprintf (file, "b");
3743       else if (GET_CODE (x) == CONST_INT && INTVAL (x) == 0xffff)
3744         fprintf (file, "w");
3745       else if (GET_CODE (x) == CONST_INT && INTVAL (x) == 0xffffffff)
3746         fprintf (file, "l");
3747 #if HOST_BITS_PER_WIDE_INT == 32
3748       else if (GET_CODE (x) == CONST_DOUBLE
3749                && CONST_DOUBLE_HIGH (x) == 0
3750                && CONST_DOUBLE_LOW (x) == -1)
3751         fprintf (file, "l");
3752       else if (GET_CODE (x) == CONST_DOUBLE
3753                && CONST_DOUBLE_HIGH (x) == -1
3754                && CONST_DOUBLE_LOW (x) == -1)
3755         fprintf (file, "q");
3756 #else
3757       else if (GET_CODE (x) == CONST_INT && INTVAL (x) == -1)
3758         fprintf (file, "q");
3759       else if (GET_CODE (x) == CONST_DOUBLE
3760                && CONST_DOUBLE_HIGH (x) == 0
3761                && CONST_DOUBLE_LOW (x) == -1)
3762         fprintf (file, "q");
3763 #endif
3764       else
3765         output_operand_lossage ("invalid %%U value");
3766       break;
3767
3768     case 's':
3769       /* Write the constant value divided by 8.  */
3770       if (GET_CODE (x) != CONST_INT
3771           && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
3772           && (INTVAL (x) & 7) != 8)
3773         output_operand_lossage ("invalid %%s value");
3774
3775       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
3776       break;
3777
3778     case 'S':
3779       /* Same, except compute (64 - c) / 8 */
3780
3781       if (GET_CODE (x) != CONST_INT
3782           && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
3783           && (INTVAL (x) & 7) != 8)
3784         output_operand_lossage ("invalid %%s value");
3785
3786       fprintf (file, HOST_WIDE_INT_PRINT_DEC, (64 - INTVAL (x)) / 8);
3787       break;
3788
3789     case 'C': case 'D': case 'c': case 'd':
3790       /* Write out comparison name.  */
3791       {
3792         enum rtx_code c = GET_CODE (x);
3793
3794         if (GET_RTX_CLASS (c) != '<')
3795           output_operand_lossage ("invalid %%C value");
3796
3797         else if (code == 'D')
3798           c = reverse_condition (c);
3799         else if (code == 'c')
3800           c = swap_condition (c);
3801         else if (code == 'd')
3802           c = swap_condition (reverse_condition (c));
3803
3804         if (c == LEU)
3805           fprintf (file, "ule");
3806         else if (c == LTU)
3807           fprintf (file, "ult");
3808         else if (c == UNORDERED)
3809           fprintf (file, "un");
3810         else
3811           fprintf (file, "%s", GET_RTX_NAME (c));
3812       }
3813       break;
3814
3815     case 'E':
3816       /* Write the divide or modulus operator.  */
3817       switch (GET_CODE (x))
3818         {
3819         case DIV:
3820           fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q");
3821           break;
3822         case UDIV:
3823           fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q");
3824           break;
3825         case MOD:
3826           fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q");
3827           break;
3828         case UMOD:
3829           fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q");
3830           break;
3831         default:
3832           output_operand_lossage ("invalid %%E value");
3833           break;
3834         }
3835       break;
3836
3837     case 'A':
3838       /* Write "_u" for unaligned access.  */
3839       if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == AND)
3840         fprintf (file, "_u");
3841       break;
3842
3843     case 0:
3844       if (GET_CODE (x) == REG)
3845         fprintf (file, "%s", reg_names[REGNO (x)]);
3846       else if (GET_CODE (x) == MEM)
3847         output_address (XEXP (x, 0));
3848       else
3849         output_addr_const (file, x);
3850       break;
3851
3852     default:
3853       output_operand_lossage ("invalid %%xn code");
3854     }
3855 }
3856
3857 void
3858 print_operand_address (file, addr)
3859     FILE *file;
3860      rtx addr;
3861 {
3862   int basereg = 31;
3863   HOST_WIDE_INT offset = 0;
3864
3865   if (GET_CODE (addr) == AND)
3866     addr = XEXP (addr, 0);
3867
3868   if (GET_CODE (addr) == PLUS
3869       && GET_CODE (XEXP (addr, 1)) == CONST_INT)
3870     {
3871       offset = INTVAL (XEXP (addr, 1));
3872       addr = XEXP (addr, 0);
3873     }
3874   if (GET_CODE (addr) == REG)
3875     basereg = REGNO (addr);
3876   else if (GET_CODE (addr) == SUBREG
3877            && GET_CODE (SUBREG_REG (addr)) == REG)
3878     basereg = REGNO (SUBREG_REG (addr)) + SUBREG_WORD (addr);
3879   else if (GET_CODE (addr) == CONST_INT)
3880     offset = INTVAL (addr);
3881   else
3882     abort ();
3883
3884   fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
3885   fprintf (file, "($%d)", basereg);
3886 }
3887 \f
3888 /* Emit RTL insns to initialize the variable parts of a trampoline at
3889    TRAMP. FNADDR is an RTX for the address of the function's pure
3890    code.  CXT is an RTX for the static chain value for the function.
3891
3892    The three offset parameters are for the individual template's
3893    layout.  A JMPOFS < 0 indicates that the trampoline does not
3894    contain instructions at all.
3895
3896    We assume here that a function will be called many more times than
3897    its address is taken (e.g., it might be passed to qsort), so we
3898    take the trouble to initialize the "hint" field in the JMP insn.
3899    Note that the hint field is PC (new) + 4 * bits 13:0.  */
3900
3901 void
3902 alpha_initialize_trampoline (tramp, fnaddr, cxt, fnofs, cxtofs, jmpofs)
3903      rtx tramp, fnaddr, cxt;
3904      int fnofs, cxtofs, jmpofs;
3905 {
3906   rtx temp, temp1, addr;
3907   /* VMS really uses DImode pointers in memory at this point.  */
3908   enum machine_mode mode = TARGET_OPEN_VMS ? Pmode : ptr_mode;
3909
3910 #ifdef POINTERS_EXTEND_UNSIGNED
3911   fnaddr = convert_memory_address (mode, fnaddr);
3912   cxt = convert_memory_address (mode, cxt);
3913 #endif
3914
3915   /* Store function address and CXT.  */
3916   addr = memory_address (mode, plus_constant (tramp, fnofs));
3917   emit_move_insn (gen_rtx_MEM (mode, addr), fnaddr);
3918   addr = memory_address (mode, plus_constant (tramp, cxtofs));
3919   emit_move_insn (gen_rtx_MEM (mode, addr), cxt);
3920
3921   /* This has been disabled since the hint only has a 32k range, and in
3922      no existing OS is the stack within 32k of the text segment. */
3923   if (0 && jmpofs >= 0)
3924     {
3925       /* Compute hint value.  */
3926       temp = force_operand (plus_constant (tramp, jmpofs+4), NULL_RTX);
3927       temp = expand_binop (DImode, sub_optab, fnaddr, temp, temp, 1,
3928                            OPTAB_WIDEN);
3929       temp = expand_shift (RSHIFT_EXPR, Pmode, temp,
3930                            build_int_2 (2, 0), NULL_RTX, 1);
3931       temp = expand_and (gen_lowpart (SImode, temp), GEN_INT (0x3fff), 0);
3932
3933       /* Merge in the hint.  */
3934       addr = memory_address (SImode, plus_constant (tramp, jmpofs));
3935       temp1 = force_reg (SImode, gen_rtx_MEM (SImode, addr));
3936       temp1 = expand_and (temp1, GEN_INT (0xffffc000), NULL_RTX);
3937       temp1 = expand_binop (SImode, ior_optab, temp1, temp, temp1, 1,
3938                             OPTAB_WIDEN);
3939       emit_move_insn (gen_rtx_MEM (SImode, addr), temp1);
3940     }
3941
3942 #ifdef TRANSFER_FROM_TRAMPOLINE
3943   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
3944                      0, VOIDmode, 1, addr, Pmode);
3945 #endif
3946
3947   if (jmpofs >= 0)
3948     emit_insn (gen_imb ());
3949 }
3950 \f
3951 /* Determine where to put an argument to a function.
3952    Value is zero to push the argument on the stack,
3953    or a hard register in which to store the argument.
3954
3955    MODE is the argument's machine mode.
3956    TYPE is the data type of the argument (as a tree).
3957     This is null for libcalls where that information may
3958     not be available.
3959    CUM is a variable of type CUMULATIVE_ARGS which gives info about
3960     the preceding args and about the function being called.
3961    NAMED is nonzero if this argument is a named parameter
3962     (otherwise it is an extra parameter matching an ellipsis).
3963
3964    On Alpha the first 6 words of args are normally in registers
3965    and the rest are pushed.  */
3966
3967 rtx
3968 function_arg (cum, mode, type, named)
3969      CUMULATIVE_ARGS cum;
3970      enum machine_mode mode;
3971      tree type;
3972      int named ATTRIBUTE_UNUSED;
3973 {
3974   int basereg;
3975   int num_args;
3976
3977 #ifndef OPEN_VMS
3978   if (cum >= 6)
3979     return NULL_RTX;
3980   num_args = cum;
3981
3982   /* VOID is passed as a special flag for "last argument".  */
3983   if (type == void_type_node)
3984     basereg = 16;
3985   else if (MUST_PASS_IN_STACK (mode, type))
3986     return NULL_RTX;
3987   else if (FUNCTION_ARG_PASS_BY_REFERENCE (cum, mode, type, named))
3988     basereg = 16;
3989 #else
3990   if (mode == VOIDmode)
3991     return alpha_arg_info_reg_val (cum);
3992
3993   num_args = cum.num_args;
3994   if (num_args >= 6 || MUST_PASS_IN_STACK (mode, type))
3995     return NULL_RTX;
3996 #endif /* OPEN_VMS */
3997   else if (TARGET_FPREGS
3998            && (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
3999                || GET_MODE_CLASS (mode) == MODE_FLOAT))
4000     basereg = 32 + 16;
4001   else
4002     basereg = 16;
4003
4004   return gen_rtx_REG (mode, num_args + basereg);
4005 }
4006
4007 tree
4008 alpha_build_va_list ()
4009 {
4010   tree base, ofs, record, type_decl;
4011
4012   if (TARGET_OPEN_VMS)
4013     return ptr_type_node;
4014
4015   record = make_lang_type (RECORD_TYPE);
4016   type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4017   TREE_CHAIN (record) = type_decl;
4018   TYPE_NAME (record) = type_decl;
4019
4020   /* C++? SET_IS_AGGR_TYPE (record, 1); */
4021
4022   ofs = build_decl (FIELD_DECL, get_identifier ("__offset"),
4023                     integer_type_node);
4024   DECL_FIELD_CONTEXT (ofs) = record;
4025
4026   base = build_decl (FIELD_DECL, get_identifier ("__base"),
4027                      ptr_type_node);
4028   DECL_FIELD_CONTEXT (base) = record;
4029   TREE_CHAIN (base) = ofs;
4030
4031   TYPE_FIELDS (record) = base;
4032   layout_type (record);
4033
4034   return record;
4035 }
4036
4037 void
4038 alpha_va_start (stdarg_p, valist, nextarg)
4039      int stdarg_p;
4040      tree valist;
4041      rtx nextarg ATTRIBUTE_UNUSED;
4042 {
4043   HOST_WIDE_INT offset;
4044   tree t, offset_field, base_field;
4045
4046   if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK)
4047     return;
4048
4049   if (TARGET_OPEN_VMS)
4050     std_expand_builtin_va_start (stdarg_p, valist, nextarg);
4051
4052   /* For Unix, SETUP_INCOMING_VARARGS moves the starting address base
4053      up by 48, storing fp arg registers in the first 48 bytes, and the
4054      integer arg registers in the next 48 bytes.  This is only done,
4055      however, if any integer registers need to be stored.
4056
4057      If no integer registers need be stored, then we must subtract 48
4058      in order to account for the integer arg registers which are counted
4059      in argsize above, but which are not actually stored on the stack.  */
4060
4061   if (NUM_ARGS <= 5 + stdarg_p)
4062     offset = 6 * UNITS_PER_WORD;
4063   else
4064     offset = -6 * UNITS_PER_WORD;
4065
4066   base_field = TYPE_FIELDS (TREE_TYPE (valist));
4067   offset_field = TREE_CHAIN (base_field);
4068
4069   base_field = build (COMPONENT_REF, TREE_TYPE (base_field),
4070                       valist, base_field);
4071   offset_field = build (COMPONENT_REF, TREE_TYPE (offset_field),
4072                         valist, offset_field);
4073
4074   t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
4075   t = build (PLUS_EXPR, ptr_type_node, t, build_int_2 (offset, 0));
4076   t = build (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t);
4077   TREE_SIDE_EFFECTS (t) = 1;
4078   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4079
4080   t = build_int_2 (NUM_ARGS*UNITS_PER_WORD, 0);
4081   t = build (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t);
4082   TREE_SIDE_EFFECTS (t) = 1;
4083   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4084 }
4085
4086 rtx
4087 alpha_va_arg (valist, type)
4088      tree valist, type;
4089 {
4090   HOST_WIDE_INT tsize;
4091   rtx addr;
4092   tree t;
4093   tree offset_field, base_field, addr_tree, addend;
4094   tree wide_type, wide_ofs;
4095
4096   if (TARGET_OPEN_VMS)
4097     return std_expand_builtin_va_arg (valist, type);
4098
4099   tsize = ((TREE_INT_CST_LOW (TYPE_SIZE (type)) / BITS_PER_UNIT + 7) / 8) * 8;
4100
4101   base_field = TYPE_FIELDS (TREE_TYPE (valist));
4102   offset_field = TREE_CHAIN (base_field);
4103
4104   base_field = build (COMPONENT_REF, TREE_TYPE (base_field),
4105                       valist, base_field);
4106   offset_field = build (COMPONENT_REF, TREE_TYPE (offset_field),
4107                         valist, offset_field);
4108
4109   wide_type = make_signed_type (64);
4110   wide_ofs = save_expr (build1 (CONVERT_EXPR, wide_type, offset_field));
4111
4112   addend = wide_ofs;
4113   if (FLOAT_TYPE_P (type))
4114     {
4115       tree fpaddend, cond;
4116
4117       fpaddend = fold (build (PLUS_EXPR, TREE_TYPE (addend),
4118                               addend, build_int_2 (-6*8, 0)));
4119
4120       cond = fold (build (LT_EXPR, integer_type_node,
4121                           wide_ofs, build_int_2 (6*8, 0)));
4122
4123       addend = fold (build (COND_EXPR, TREE_TYPE (addend), cond,
4124                             fpaddend, addend));
4125     }
4126
4127   addr_tree = build (PLUS_EXPR, TREE_TYPE (base_field),
4128                      base_field, addend);
4129
4130   addr = expand_expr (addr_tree, NULL_RTX, Pmode, EXPAND_NORMAL);
4131   addr = copy_to_reg (addr);
4132
4133   t = build (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field,
4134              build (PLUS_EXPR, TREE_TYPE (offset_field),
4135                     offset_field, build_int_2 (tsize, 0)));
4136   TREE_SIDE_EFFECTS (t) = 1;
4137   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4138
4139   return addr;
4140 }
4141 \f
4142 /* This page contains routines that are used to determine what the function
4143    prologue and epilogue code will do and write them out.  */
4144
4145 /* Compute the size of the save area in the stack.  */
4146
4147 /* These variables are used for communication between the following functions.
4148    They indicate various things about the current function being compiled
4149    that are used to tell what kind of prologue, epilogue and procedure
4150    descriptior to generate. */
4151
4152 /* Nonzero if we need a stack procedure.  */
4153 static int vms_is_stack_procedure;
4154
4155 /* Register number (either FP or SP) that is used to unwind the frame.  */
4156 static int vms_unwind_regno;
4157
4158 /* Register number used to save FP.  We need not have one for RA since
4159    we don't modify it for register procedures.  This is only defined
4160    for register frame procedures.  */
4161 static int vms_save_fp_regno;
4162
4163 /* Register number used to reference objects off our PV.  */
4164 static int vms_base_regno;
4165
4166 /* Compute register masks for saved registers.  */
4167
4168 static void
4169 alpha_sa_mask (imaskP, fmaskP)
4170     unsigned long *imaskP;
4171     unsigned long *fmaskP;
4172 {
4173   unsigned long imask = 0;
4174   unsigned long fmask = 0;
4175   int i;
4176
4177 #ifdef ASM_OUTPUT_MI_THUNK
4178   if (!current_function_is_thunk)
4179 #endif
4180     {
4181       if (TARGET_OPEN_VMS && vms_is_stack_procedure)
4182         imask |= (1L << HARD_FRAME_POINTER_REGNUM);
4183
4184       /* One for every register we have to save.  */
4185       for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4186         if (! fixed_regs[i] && ! call_used_regs[i]
4187             && regs_ever_live[i] && i != REG_RA)
4188           {
4189             if (i < 32)
4190               imask |= (1L << i);
4191             else
4192               fmask |= (1L << (i - 32));
4193           }
4194
4195       if (imask || fmask || alpha_ra_ever_killed ())
4196         imask |= (1L << REG_RA);
4197     }
4198
4199   *imaskP = imask;
4200   *fmaskP = fmask;
4201 }
4202
4203 int
4204 alpha_sa_size ()
4205 {
4206   int sa_size = 0;
4207   int i;
4208
4209 #ifdef ASM_OUTPUT_MI_THUNK
4210   if (current_function_is_thunk)
4211     sa_size = 0;
4212   else
4213 #endif
4214     {
4215       /* One for every register we have to save.  */
4216       for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4217         if (! fixed_regs[i] && ! call_used_regs[i]
4218             && regs_ever_live[i] && i != REG_RA)
4219           sa_size++;
4220     }
4221
4222   if (TARGET_OPEN_VMS)
4223     {
4224       /* Start by assuming we can use a register procedure if we don't
4225          make any calls (REG_RA not used) or need to save any
4226          registers and a stack procedure if we do.  */
4227       vms_is_stack_procedure = sa_size != 0 || alpha_ra_ever_killed ();
4228
4229       /* Decide whether to refer to objects off our PV via FP or PV.
4230          If we need FP for something else or if we receive a nonlocal
4231          goto (which expects PV to contain the value), we must use PV.
4232          Otherwise, start by assuming we can use FP.  */
4233       vms_base_regno = (frame_pointer_needed
4234                         || current_function_has_nonlocal_label
4235                         || vms_is_stack_procedure
4236                         || current_function_outgoing_args_size
4237                         ? REG_PV : HARD_FRAME_POINTER_REGNUM);
4238
4239       /* If we want to copy PV into FP, we need to find some register
4240          in which to save FP.  */
4241
4242       vms_save_fp_regno = -1;
4243       if (vms_base_regno == HARD_FRAME_POINTER_REGNUM)
4244         for (i = 0; i < 32; i++)
4245           if (! fixed_regs[i] && call_used_regs[i] && ! regs_ever_live[i])
4246             vms_save_fp_regno = i;
4247
4248       if (vms_save_fp_regno == -1)
4249         vms_base_regno = REG_PV, vms_is_stack_procedure = 1;
4250
4251       /* Stack unwinding should be done via FP unless we use it for PV.  */
4252       vms_unwind_regno = (vms_base_regno == REG_PV
4253                           ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
4254
4255       /* If this is a stack procedure, allow space for saving FP and RA.  */
4256       if (vms_is_stack_procedure)
4257         sa_size += 2;
4258     }
4259   else
4260     {
4261       /* If some registers were saved but not RA, RA must also be saved,
4262          so leave space for it.  */
4263       if (sa_size != 0 || alpha_ra_ever_killed ())
4264         sa_size++;
4265
4266       /* Our size must be even (multiple of 16 bytes).  */
4267       if (sa_size & 1)
4268         sa_size++;
4269     }
4270
4271   return sa_size * 8;
4272 }
4273
4274 int
4275 alpha_pv_save_size ()
4276 {
4277   alpha_sa_size ();
4278   return vms_is_stack_procedure ? 8 : 0;
4279 }
4280
4281 int
4282 alpha_using_fp ()
4283 {
4284   alpha_sa_size ();
4285   return vms_unwind_regno == HARD_FRAME_POINTER_REGNUM;
4286 }
4287
4288 int
4289 vms_valid_decl_attribute_p (decl, attributes, identifier, args)
4290      tree decl ATTRIBUTE_UNUSED;
4291      tree attributes ATTRIBUTE_UNUSED;
4292      tree identifier;
4293      tree args;
4294 {
4295   if (is_attribute_p ("overlaid", identifier))
4296     return (args == NULL_TREE);
4297   return 0;
4298 }
4299
4300 static int
4301 alpha_does_function_need_gp ()
4302 {
4303   rtx insn;
4304
4305   /* We never need a GP for Windows/NT or VMS.  */
4306   if (TARGET_WINDOWS_NT || TARGET_OPEN_VMS)
4307     return 0;
4308
4309   if (TARGET_PROFILING_NEEDS_GP && profile_flag)
4310     return 1;
4311
4312 #ifdef ASM_OUTPUT_MI_THUNK
4313   if (current_function_is_thunk)
4314     return 1;
4315 #endif
4316
4317   /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first.
4318      Even if we are a static function, we still need to do this in case
4319      our address is taken and passed to something like qsort.  */
4320
4321   push_topmost_sequence ();
4322   insn = get_insns ();
4323   pop_topmost_sequence ();
4324
4325   for (; insn; insn = NEXT_INSN (insn))
4326     if (INSN_P (insn)
4327         && GET_CODE (PATTERN (insn)) != USE
4328         && GET_CODE (PATTERN (insn)) != CLOBBER)
4329       {
4330         enum attr_type type = get_attr_type (insn);
4331         if (type == TYPE_LDSYM || type == TYPE_JSR)
4332           return 1;
4333       }
4334
4335   return 0;
4336 }
4337
4338 /* Write a version stamp.  Don't write anything if we are running as a
4339    cross-compiler.  Otherwise, use the versions in /usr/include/stamp.h.  */
4340
4341 #ifdef HAVE_STAMP_H
4342 #include <stamp.h>
4343 #endif
4344
4345 void
4346 alpha_write_verstamp (file)
4347      FILE *file ATTRIBUTE_UNUSED;
4348 {
4349 #ifdef MS_STAMP
4350   fprintf (file, "\t.verstamp %d %d\n", MS_STAMP, LS_STAMP);
4351 #endif
4352 }
4353 \f
4354 /* Helper function to set RTX_FRAME_RELATED_P on instructions, including
4355    sequences.  */
4356
4357 static rtx
4358 set_frame_related_p ()
4359 {
4360   rtx seq = gen_sequence ();
4361   end_sequence ();
4362
4363   if (GET_CODE (seq) == SEQUENCE)
4364     {
4365       int i = XVECLEN (seq, 0);
4366       while (--i >= 0)
4367         RTX_FRAME_RELATED_P (XVECEXP (seq, 0, i)) = 1;
4368      return emit_insn (seq);
4369     }
4370   else
4371     {
4372       seq = emit_insn (seq);
4373       RTX_FRAME_RELATED_P (seq) = 1;
4374       return seq;
4375     }
4376 }
4377
4378 #define FRP(exp)  (start_sequence (), exp, set_frame_related_p ())
4379
4380 /* Write function prologue.  */
4381
4382 /* On vms we have two kinds of functions:
4383
4384    - stack frame (PROC_STACK)
4385         these are 'normal' functions with local vars and which are
4386         calling other functions
4387    - register frame (PROC_REGISTER)
4388         keeps all data in registers, needs no stack
4389
4390    We must pass this to the assembler so it can generate the
4391    proper pdsc (procedure descriptor)
4392    This is done with the '.pdesc' command.
4393
4394    On not-vms, we don't really differentiate between the two, as we can
4395    simply allocate stack without saving registers.  */
4396
4397 void
4398 alpha_expand_prologue ()
4399 {
4400   /* Registers to save.  */
4401   unsigned long imask = 0;
4402   unsigned long fmask = 0;
4403   /* Stack space needed for pushing registers clobbered by us.  */
4404   HOST_WIDE_INT sa_size;
4405   /* Complete stack size needed.  */
4406   HOST_WIDE_INT frame_size;
4407   /* Offset from base reg to register save area.  */
4408   HOST_WIDE_INT reg_offset;
4409   rtx sa_reg, mem;
4410   int i;
4411
4412   sa_size = alpha_sa_size ();
4413
4414   frame_size = get_frame_size ();
4415   if (TARGET_OPEN_VMS)
4416     frame_size = ALPHA_ROUND (sa_size
4417                               + (vms_is_stack_procedure ? 8 : 0)
4418                               + frame_size
4419                               + current_function_pretend_args_size);
4420   else
4421     frame_size = (ALPHA_ROUND (current_function_outgoing_args_size)
4422                   + sa_size
4423                   + ALPHA_ROUND (frame_size
4424                                  + current_function_pretend_args_size));
4425
4426   if (TARGET_OPEN_VMS)
4427     reg_offset = 8;
4428   else
4429     reg_offset = ALPHA_ROUND (current_function_outgoing_args_size);
4430
4431   alpha_sa_mask (&imask, &fmask);
4432
4433   /* Emit an insn to reload GP, if needed.  */
4434   if (!TARGET_OPEN_VMS && !TARGET_WINDOWS_NT)
4435     {
4436       alpha_function_needs_gp = alpha_does_function_need_gp ();
4437       if (alpha_function_needs_gp)
4438         emit_insn (gen_prologue_ldgp ());
4439     }
4440
4441   /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert
4442      the call to mcount ourselves, rather than having the linker do it
4443      magically in response to -pg.  Since _mcount has special linkage,
4444      don't represent the call as a call.  */
4445   if (TARGET_PROFILING_NEEDS_GP && profile_flag)
4446     emit_insn (gen_prologue_mcount ());
4447
4448   /* Adjust the stack by the frame size.  If the frame size is > 4096
4449      bytes, we need to be sure we probe somewhere in the first and last
4450      4096 bytes (we can probably get away without the latter test) and
4451      every 8192 bytes in between.  If the frame size is > 32768, we
4452      do this in a loop.  Otherwise, we generate the explicit probe
4453      instructions.
4454
4455      Note that we are only allowed to adjust sp once in the prologue.  */
4456
4457   if (frame_size <= 32768)
4458     {
4459       if (frame_size > 4096)
4460         {
4461           int probed = 4096;
4462
4463           do
4464             emit_insn (gen_probe_stack (GEN_INT (-probed)));
4465           while ((probed += 8192) < frame_size);
4466
4467           /* We only have to do this probe if we aren't saving registers.  */
4468           if (sa_size == 0 && probed + 4096 < frame_size)
4469             emit_insn (gen_probe_stack (GEN_INT (-frame_size)));
4470         }
4471
4472       if (frame_size != 0)
4473         {
4474           FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
4475                                       GEN_INT (-frame_size))));
4476         }
4477     }
4478   else
4479     {
4480       /* Here we generate code to set R22 to SP + 4096 and set R23 to the
4481          number of 8192 byte blocks to probe.  We then probe each block
4482          in the loop and then set SP to the proper location.  If the
4483          amount remaining is > 4096, we have to do one more probe if we
4484          are not saving any registers.  */
4485
4486       HOST_WIDE_INT blocks = (frame_size + 4096) / 8192;
4487       HOST_WIDE_INT leftover = frame_size + 4096 - blocks * 8192;
4488       rtx ptr = gen_rtx_REG (DImode, 22);
4489       rtx count = gen_rtx_REG (DImode, 23);
4490       rtx seq;
4491
4492       emit_move_insn (count, GEN_INT (blocks));
4493       emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096)));
4494
4495       /* Because of the difficulty in emitting a new basic block this
4496          late in the compilation, generate the loop as a single insn.  */
4497       emit_insn (gen_prologue_stack_probe_loop (count, ptr));
4498
4499       if (leftover > 4096 && sa_size == 0)
4500         {
4501           rtx last = gen_rtx_MEM (DImode, plus_constant (ptr, -leftover));
4502           MEM_VOLATILE_P (last) = 1;
4503           emit_move_insn (last, const0_rtx);
4504         }
4505
4506       if (TARGET_WINDOWS_NT)
4507         {
4508           /* For NT stack unwind (done by 'reverse execution'), it's
4509              not OK to take the result of a loop, even though the value
4510              is already in ptr, so we reload it via a single operation
4511              and subtract it to sp.
4512
4513              Yes, that's correct -- we have to reload the whole constant
4514              into a temporary via ldah+lda then subtract from sp.  To
4515              ensure we get ldah+lda, we use a special pattern.  */
4516
4517           HOST_WIDE_INT lo, hi;
4518           lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
4519           hi = frame_size - lo;
4520
4521           emit_move_insn (ptr, GEN_INT (hi));
4522           emit_insn (gen_nt_lda (ptr, GEN_INT (lo)));
4523           seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx,
4524                                        ptr));
4525         }
4526       else
4527         {
4528           seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr,
4529                                        GEN_INT (-leftover)));
4530         }
4531
4532       /* This alternative is special, because the DWARF code cannot
4533          possibly intuit through the loop above.  So we invent this
4534          note it looks at instead.  */
4535       RTX_FRAME_RELATED_P (seq) = 1;
4536       REG_NOTES (seq)
4537         = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4538                              gen_rtx_SET (VOIDmode, stack_pointer_rtx,
4539                                gen_rtx_PLUS (Pmode, stack_pointer_rtx,
4540                                              GEN_INT (-frame_size))),
4541                              REG_NOTES (seq));
4542     }
4543
4544   /* Cope with very large offsets to the register save area.  */
4545   sa_reg = stack_pointer_rtx;
4546   if (reg_offset + sa_size > 0x8000)
4547     {
4548       int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
4549       HOST_WIDE_INT bias;
4550
4551       if (low + sa_size <= 0x8000)
4552         bias = reg_offset - low, reg_offset = low;
4553       else
4554         bias = reg_offset, reg_offset = 0;
4555
4556       sa_reg = gen_rtx_REG (DImode, 24);
4557       FRP (emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, GEN_INT (bias))));
4558     }
4559
4560   /* Save regs in stack order.  Beginning with VMS PV.  */
4561   if (TARGET_OPEN_VMS && vms_is_stack_procedure)
4562     {
4563       mem = gen_rtx_MEM (DImode, stack_pointer_rtx);
4564       MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
4565       FRP (emit_move_insn (mem, gen_rtx_REG (DImode, REG_PV)));
4566     }
4567
4568   /* Save register RA next.  */
4569   if (imask & (1L << REG_RA))
4570     {
4571       mem = gen_rtx_MEM (DImode, plus_constant (sa_reg, reg_offset));
4572       MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
4573       FRP (emit_move_insn (mem, gen_rtx_REG (DImode, REG_RA)));
4574       imask &= ~(1L << REG_RA);
4575       reg_offset += 8;
4576     }
4577
4578   /* Now save any other registers required to be saved.  */
4579   for (i = 0; i < 32; i++)
4580     if (imask & (1L << i))
4581       {
4582         mem = gen_rtx_MEM (DImode, plus_constant (sa_reg, reg_offset));
4583         MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
4584         FRP (emit_move_insn (mem, gen_rtx_REG (DImode, i)));
4585         reg_offset += 8;
4586       }
4587
4588   for (i = 0; i < 32; i++)
4589     if (fmask & (1L << i))
4590       {
4591         mem = gen_rtx_MEM (DFmode, plus_constant (sa_reg, reg_offset));
4592         MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
4593         FRP (emit_move_insn (mem, gen_rtx_REG (DFmode, i+32)));
4594         reg_offset += 8;
4595       }
4596
4597   if (TARGET_OPEN_VMS)
4598     {
4599       if (!vms_is_stack_procedure)
4600         {
4601           /* Register frame procedures fave the fp.  */
4602           FRP (emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno),
4603                                hard_frame_pointer_rtx));
4604         }
4605
4606       if (vms_base_regno != REG_PV)
4607         FRP (emit_move_insn (gen_rtx_REG (DImode, vms_base_regno),
4608                              gen_rtx_REG (DImode, REG_PV)));
4609
4610       if (vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
4611         {
4612           FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
4613         }
4614
4615       /* If we have to allocate space for outgoing args, do it now.  */
4616       if (current_function_outgoing_args_size != 0)
4617         {
4618           FRP (emit_move_insn (stack_pointer_rtx,
4619                 plus_constant (hard_frame_pointer_rtx,
4620                  - ALPHA_ROUND (current_function_outgoing_args_size))));
4621         }
4622     }
4623   else
4624     {
4625       /* If we need a frame pointer, set it from the stack pointer.  */
4626       if (frame_pointer_needed)
4627         {
4628           if (TARGET_CAN_FAULT_IN_PROLOGUE)
4629             FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
4630           else
4631             {
4632               /* This must always be the last instruction in the
4633                  prologue, thus we emit a special move + clobber.  */
4634               FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx,
4635                                            stack_pointer_rtx, sa_reg)));
4636             }
4637         }
4638     }
4639
4640   /* The ABIs for VMS and OSF/1 say that while we can schedule insns into
4641      the prologue, for exception handling reasons, we cannot do this for
4642      any insn that might fault.  We could prevent this for mems with a
4643      (clobber:BLK (scratch)), but this doesn't work for fp insns.  So we
4644      have to prevent all such scheduling with a blockage.
4645
4646      Linux, on the other hand, never bothered to implement OSF/1's
4647      exception handling, and so doesn't care about such things.  Anyone
4648      planning to use dwarf2 frame-unwind info can also omit the blockage.  */
4649
4650   if (! TARGET_CAN_FAULT_IN_PROLOGUE)
4651     emit_insn (gen_blockage ());
4652 }
4653
4654 /* Output the textual info surrounding the prologue.  */
4655
4656 void
4657 alpha_start_function (file, fnname, decl)
4658      FILE *file;
4659      const char *fnname;
4660      tree decl ATTRIBUTE_UNUSED;
4661 {
4662   unsigned long imask = 0;
4663   unsigned long fmask = 0;
4664   /* Stack space needed for pushing registers clobbered by us.  */
4665   HOST_WIDE_INT sa_size;
4666   /* Complete stack size needed.  */
4667   HOST_WIDE_INT frame_size;
4668   /* Offset from base reg to register save area.  */
4669   HOST_WIDE_INT reg_offset;
4670   char *entry_label = (char *) alloca (strlen (fnname) + 6);
4671   int i;
4672
4673   alpha_fnname = fnname;
4674   sa_size = alpha_sa_size ();
4675
4676   frame_size = get_frame_size ();
4677   if (TARGET_OPEN_VMS)
4678     frame_size = ALPHA_ROUND (sa_size
4679                               + (vms_is_stack_procedure ? 8 : 0)
4680                               + frame_size
4681                               + current_function_pretend_args_size);
4682   else
4683     frame_size = (ALPHA_ROUND (current_function_outgoing_args_size)
4684                   + sa_size
4685                   + ALPHA_ROUND (frame_size
4686                                  + current_function_pretend_args_size));
4687
4688   if (TARGET_OPEN_VMS)
4689     reg_offset = 8;
4690   else
4691     reg_offset = ALPHA_ROUND (current_function_outgoing_args_size);
4692
4693   alpha_sa_mask (&imask, &fmask);
4694
4695   /* Ecoff can handle multiple .file directives, so put out file and lineno.
4696      We have to do that before the .ent directive as we cannot switch
4697      files within procedures with native ecoff because line numbers are
4698      linked to procedure descriptors.
4699      Outputting the lineno helps debugging of one line functions as they
4700      would otherwise get no line number at all. Please note that we would
4701      like to put out last_linenum from final.c, but it is not accessible.  */
4702
4703   if (write_symbols == SDB_DEBUG)
4704     {
4705       ASM_OUTPUT_SOURCE_FILENAME (file,
4706                                   DECL_SOURCE_FILE (current_function_decl));
4707       if (debug_info_level != DINFO_LEVEL_TERSE)
4708         ASM_OUTPUT_SOURCE_LINE (file,
4709                                 DECL_SOURCE_LINE (current_function_decl));
4710     }
4711
4712   /* Issue function start and label.  */
4713   if (TARGET_OPEN_VMS || !flag_inhibit_size_directive)
4714     {
4715       fputs ("\t.ent ", file);
4716       assemble_name (file, fnname);
4717       putc ('\n', file);
4718
4719       /* If the function needs GP, we'll write the "..ng" label there.
4720          Otherwise, do it here.  */
4721       if (! TARGET_OPEN_VMS && ! TARGET_WINDOWS_NT
4722           && ! alpha_function_needs_gp)
4723         {
4724           putc ('$', file);
4725           assemble_name (file, fnname);
4726           fputs ("..ng:\n", file);
4727         }
4728     }
4729
4730   strcpy (entry_label, fnname);
4731   if (TARGET_OPEN_VMS)
4732     strcat (entry_label, "..en");
4733   ASM_OUTPUT_LABEL (file, entry_label);
4734   inside_function = TRUE;
4735
4736   if (TARGET_OPEN_VMS)
4737     fprintf (file, "\t.base $%d\n", vms_base_regno);
4738
4739   if (!TARGET_OPEN_VMS && TARGET_IEEE_CONFORMANT
4740       && !flag_inhibit_size_directive)
4741     {
4742       /* Set flags in procedure descriptor to request IEEE-conformant
4743          math-library routines.  The value we set it to is PDSC_EXC_IEEE
4744          (/usr/include/pdsc.h). */
4745       fputs ("\t.eflag 48\n", file);
4746     }
4747
4748   /* Set up offsets to alpha virtual arg/local debugging pointer.  */
4749   alpha_auto_offset = -frame_size + current_function_pretend_args_size;
4750   alpha_arg_offset = -frame_size + 48;
4751
4752   /* Describe our frame.  If the frame size is larger than an integer,
4753      print it as zero to avoid an assembler error.  We won't be
4754      properly describing such a frame, but that's the best we can do.  */
4755   if (TARGET_OPEN_VMS)
4756     {
4757       fprintf (file, "\t.frame $%d,", vms_unwind_regno);
4758       fprintf (file, HOST_WIDE_INT_PRINT_DEC,
4759                frame_size >= (1l << 31) ? 0 : frame_size);
4760       fputs (",$26,", file);
4761       fprintf (file, HOST_WIDE_INT_PRINT_DEC, reg_offset);
4762       fputs ("\n", file);
4763     }
4764   else if (!flag_inhibit_size_directive)
4765     {
4766       fprintf (file, "\t.frame $%d,",
4767                (frame_pointer_needed
4768                 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM));
4769       fprintf (file, HOST_WIDE_INT_PRINT_DEC,
4770                frame_size >= (1l << 31) ? 0 : frame_size);
4771       fprintf (file, ",$26,%d\n", current_function_pretend_args_size);
4772     }
4773
4774   /* Describe which registers were spilled.  */
4775   if (TARGET_OPEN_VMS)
4776     {
4777       if (imask)
4778         /* ??? Does VMS care if mask contains ra?  The old code did'nt
4779            set it, so I don't here.  */
4780         fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1L << REG_RA));
4781       if (fmask)
4782         fprintf (file, "\t.fmask 0x%lx,0\n", fmask);
4783       if (!vms_is_stack_procedure)
4784         fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno);
4785     }
4786   else if (!flag_inhibit_size_directive)
4787     {
4788       if (imask)
4789         {
4790           fprintf (file, "\t.mask 0x%lx,", imask);
4791           fprintf (file, HOST_WIDE_INT_PRINT_DEC,
4792                    frame_size >= (1l << 31) ? 0 : reg_offset - frame_size);
4793           putc ('\n', file);
4794
4795           for (i = 0; i < 32; ++i)
4796             if (imask & (1L << i))
4797               reg_offset += 8;
4798         }
4799
4800       if (fmask)
4801         {
4802           fprintf (file, "\t.fmask 0x%lx,", fmask);
4803           fprintf (file, HOST_WIDE_INT_PRINT_DEC,
4804                    frame_size >= (1l << 31) ? 0 : reg_offset - frame_size);
4805           putc ('\n', file);
4806         }
4807     }
4808
4809 #ifdef OPEN_VMS
4810   /* Ifdef'ed cause readonly_section and link_section are only
4811      available then.  */
4812   readonly_section ();
4813   fprintf (file, "\t.align 3\n");
4814   assemble_name (file, fnname); fputs ("..na:\n", file);
4815   fputs ("\t.ascii \"", file);
4816   assemble_name (file, fnname);
4817   fputs ("\\0\"\n", file);
4818
4819   link_section ();
4820   fprintf (file, "\t.align 3\n");
4821   fputs ("\t.name ", file);
4822   assemble_name (file, fnname);
4823   fputs ("..na\n", file);
4824   ASM_OUTPUT_LABEL (file, fnname);
4825   fprintf (file, "\t.pdesc ");
4826   assemble_name (file, fnname);
4827   fprintf (file, "..en,%s\n", vms_is_stack_procedure ? "stack" : "reg");
4828   alpha_need_linkage (fnname, 1);
4829   text_section ();
4830 #endif
4831 }
4832
4833 /* Emit the .prologue note at the scheduled end of the prologue.  */
4834
4835 void
4836 output_end_prologue (file)
4837      FILE *file;
4838 {
4839   if (TARGET_OPEN_VMS)
4840     fputs ("\t.prologue\n", file);
4841   else if (TARGET_WINDOWS_NT)
4842     fputs ("\t.prologue 0\n", file);
4843   else if (!flag_inhibit_size_directive)
4844     fprintf (file, "\t.prologue %d\n", alpha_function_needs_gp);
4845 }
4846
4847 /* Write function epilogue.  */
4848
4849 /* ??? At some point we will want to support full unwind, and so will
4850    need to mark the epilogue as well.  At the moment, we just confuse
4851    dwarf2out.  */
4852 #undef FRP
4853 #define FRP(exp) exp
4854
4855 void
4856 alpha_expand_epilogue ()
4857 {
4858   /* Registers to save.  */
4859   unsigned long imask = 0;
4860   unsigned long fmask = 0;
4861   /* Stack space needed for pushing registers clobbered by us.  */
4862   HOST_WIDE_INT sa_size;
4863   /* Complete stack size needed.  */
4864   HOST_WIDE_INT frame_size;
4865   /* Offset from base reg to register save area.  */
4866   HOST_WIDE_INT reg_offset;
4867   int fp_is_frame_pointer, fp_offset;
4868   rtx sa_reg, sa_reg_exp = NULL;
4869   rtx sp_adj1, sp_adj2, mem;
4870   rtx eh_ofs;
4871   int i;
4872
4873   sa_size = alpha_sa_size ();
4874
4875   frame_size = get_frame_size ();
4876   if (TARGET_OPEN_VMS)
4877     frame_size = ALPHA_ROUND (sa_size
4878                               + (vms_is_stack_procedure ? 8 : 0)
4879                               + frame_size
4880                               + current_function_pretend_args_size);
4881   else
4882     frame_size = (ALPHA_ROUND (current_function_outgoing_args_size)
4883                   + sa_size
4884                   + ALPHA_ROUND (frame_size
4885                                  + current_function_pretend_args_size));
4886
4887   if (TARGET_OPEN_VMS)
4888     reg_offset = 8;
4889   else
4890     reg_offset = ALPHA_ROUND (current_function_outgoing_args_size);
4891
4892   alpha_sa_mask (&imask, &fmask);
4893
4894   fp_is_frame_pointer = ((TARGET_OPEN_VMS && vms_is_stack_procedure)
4895                          || (!TARGET_OPEN_VMS && frame_pointer_needed));
4896   fp_offset = 0;
4897   sa_reg = stack_pointer_rtx;
4898
4899   eh_ofs = cfun->machine->eh_epilogue_sp_ofs;
4900   if (sa_size)
4901     {
4902       /* If we have a frame pointer, restore SP from it.  */
4903       if ((TARGET_OPEN_VMS
4904            && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
4905           || (!TARGET_OPEN_VMS && frame_pointer_needed))
4906         {
4907           FRP (emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx));
4908         }
4909
4910       /* Cope with very large offsets to the register save area.  */
4911       if (reg_offset + sa_size > 0x8000)
4912         {
4913           int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
4914           HOST_WIDE_INT bias;
4915
4916           if (low + sa_size <= 0x8000)
4917             bias = reg_offset - low, reg_offset = low;
4918           else
4919             bias = reg_offset, reg_offset = 0;
4920
4921           sa_reg = gen_rtx_REG (DImode, 22);
4922           sa_reg_exp = plus_constant (stack_pointer_rtx, bias);
4923
4924           FRP (emit_move_insn (sa_reg, sa_reg_exp));
4925         }
4926
4927       /* Restore registers in order, excepting a true frame pointer. */
4928
4929       if (! eh_ofs)
4930         {
4931           mem = gen_rtx_MEM (DImode, plus_constant(sa_reg, reg_offset));
4932           MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
4933           FRP (emit_move_insn (gen_rtx_REG (DImode, REG_RA), mem));
4934         }
4935       reg_offset += 8;
4936       imask &= ~(1L << REG_RA);
4937
4938       for (i = 0; i < 32; ++i)
4939         if (imask & (1L << i))
4940           {
4941             if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer)
4942               fp_offset = reg_offset;
4943             else
4944               {
4945                 mem = gen_rtx_MEM (DImode, plus_constant(sa_reg, reg_offset));
4946                 MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
4947                 FRP (emit_move_insn (gen_rtx_REG (DImode, i), mem));
4948               }
4949             reg_offset += 8;
4950           }
4951
4952       for (i = 0; i < 32; ++i)
4953         if (fmask & (1L << i))
4954           {
4955             mem = gen_rtx_MEM (DFmode, plus_constant(sa_reg, reg_offset));
4956             MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
4957             FRP (emit_move_insn (gen_rtx_REG (DFmode, i+32), mem));
4958             reg_offset += 8;
4959           }
4960     }
4961
4962   if (frame_size || eh_ofs)
4963     {
4964       sp_adj1 = stack_pointer_rtx;
4965
4966       if (eh_ofs)
4967         {
4968           sp_adj1 = gen_rtx_REG (DImode, 23);
4969           emit_move_insn (sp_adj1,
4970                           gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs));
4971         }
4972
4973       /* If the stack size is large, begin computation into a temporary
4974          register so as not to interfere with a potential fp restore,
4975          which must be consecutive with an SP restore.  */
4976       if (frame_size < 32768)
4977         sp_adj2 = GEN_INT (frame_size);
4978       else if (frame_size < 0x40007fffL)
4979         {
4980           int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
4981
4982           sp_adj2 = plus_constant (sp_adj1, frame_size - low);
4983           if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2))
4984             sp_adj1 = sa_reg;
4985           else
4986             {
4987               sp_adj1 = gen_rtx_REG (DImode, 23);
4988               FRP (emit_move_insn (sp_adj1, sp_adj2));
4989             }
4990           sp_adj2 = GEN_INT (low);
4991         }
4992       else
4993         {
4994           rtx tmp = gen_rtx_REG (DImode, 23);
4995           FRP (sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3));
4996           if (!sp_adj2)
4997             {
4998               /* We can't drop new things to memory this late, afaik,
4999                  so build it up by pieces.  */
5000               FRP (sp_adj2 = alpha_emit_set_long_const (tmp, frame_size,
5001                                                         -(frame_size < 0)));
5002               if (!sp_adj2)
5003                 abort ();
5004             }
5005         }
5006
5007       /* From now on, things must be in order.  So emit blockages.  */
5008
5009       /* Restore the frame pointer.  */
5010       if (fp_is_frame_pointer)
5011         {
5012           emit_insn (gen_blockage ());
5013           mem = gen_rtx_MEM (DImode, plus_constant(sa_reg, fp_offset));
5014           MEM_ALIAS_SET (mem) = alpha_sr_alias_set;
5015           FRP (emit_move_insn (hard_frame_pointer_rtx, mem));
5016         }
5017       else if (TARGET_OPEN_VMS)
5018         {
5019           emit_insn (gen_blockage ());
5020           FRP (emit_move_insn (hard_frame_pointer_rtx,
5021                                gen_rtx_REG (DImode, vms_save_fp_regno)));
5022         }
5023
5024       /* Restore the stack pointer.  */
5025       emit_insn (gen_blockage ());
5026       FRP (emit_move_insn (stack_pointer_rtx,
5027                            gen_rtx_PLUS (DImode, sp_adj1, sp_adj2)));
5028     }
5029   else
5030     {
5031       if (TARGET_OPEN_VMS && !vms_is_stack_procedure)
5032         {
5033           emit_insn (gen_blockage ());
5034           FRP (emit_move_insn (hard_frame_pointer_rtx,
5035                                gen_rtx_REG (DImode, vms_save_fp_regno)));
5036         }
5037     }
5038 }
5039
5040 /* Output the rest of the textual info surrounding the epilogue.  */
5041
5042 void
5043 alpha_end_function (file, fnname, decl)
5044      FILE *file;
5045      const char *fnname;
5046      tree decl ATTRIBUTE_UNUSED;
5047 {
5048   /* End the function.  */
5049   if (!flag_inhibit_size_directive)
5050     {
5051       fputs ("\t.end ", file);
5052       assemble_name (file, fnname);
5053       putc ('\n', file);
5054     }
5055   inside_function = FALSE;
5056
5057   /* Show that we know this function if it is called again.
5058
5059      Don't do this for global functions in object files destined for a
5060      shared library because the function may be overridden by the application
5061      or other libraries.  Similarly, don't do this for weak functions.  */
5062
5063   if (!DECL_WEAK (current_function_decl)
5064       && (!flag_pic || !TREE_PUBLIC (current_function_decl)))
5065     SYMBOL_REF_FLAG (XEXP (DECL_RTL (current_function_decl), 0)) = 1;
5066 }
5067 \f
5068 /* Debugging support.  */
5069
5070 #include "gstab.h"
5071
5072 /* Count the number of sdb related labels are generated (to find block
5073    start and end boundaries).  */
5074
5075 int sdb_label_count = 0;
5076
5077 /* Next label # for each statement.  */
5078
5079 static int sym_lineno = 0;
5080
5081 /* Count the number of .file directives, so that .loc is up to date.  */
5082
5083 static int num_source_filenames = 0;
5084
5085 /* Name of the file containing the current function.  */
5086
5087 static const char *current_function_file = "";
5088
5089 /* Offsets to alpha virtual arg/local debugging pointers.  */
5090
5091 long alpha_arg_offset;
5092 long alpha_auto_offset;
5093 \f
5094 /* Emit a new filename to a stream.  */
5095
5096 void
5097 alpha_output_filename (stream, name)
5098      FILE *stream;
5099      const char *name;
5100 {
5101   static int first_time = TRUE;
5102   char ltext_label_name[100];
5103
5104   if (first_time)
5105     {
5106       first_time = FALSE;
5107       ++num_source_filenames;
5108       current_function_file = name;
5109       fprintf (stream, "\t.file\t%d ", num_source_filenames);
5110       output_quoted_string (stream, name);
5111       fprintf (stream, "\n");
5112       if (!TARGET_GAS && write_symbols == DBX_DEBUG)
5113         fprintf (stream, "\t#@stabs\n");
5114     }
5115
5116   else if (write_symbols == DBX_DEBUG)
5117     {
5118       ASM_GENERATE_INTERNAL_LABEL (ltext_label_name, "Ltext", 0);
5119       fprintf (stream, "%s", ASM_STABS_OP);
5120       output_quoted_string (stream, name);
5121       fprintf (stream, ",%d,0,0,%s\n", N_SOL, &ltext_label_name[1]);
5122     }
5123
5124   else if (name != current_function_file
5125            && strcmp (name, current_function_file) != 0)
5126     {
5127       if (inside_function && ! TARGET_GAS)
5128         fprintf (stream, "\t#.file\t%d ", num_source_filenames);
5129       else
5130         {
5131           ++num_source_filenames;
5132           current_function_file = name;
5133           fprintf (stream, "\t.file\t%d ", num_source_filenames);
5134         }
5135
5136       output_quoted_string (stream, name);
5137       fprintf (stream, "\n");
5138     }
5139 }
5140 \f
5141 /* Emit a linenumber to a stream.  */
5142
5143 void
5144 alpha_output_lineno (stream, line)
5145      FILE *stream;
5146      int line;
5147 {
5148   if (write_symbols == DBX_DEBUG)
5149     {
5150       /* mips-tfile doesn't understand .stabd directives.  */
5151       ++sym_lineno;
5152       fprintf (stream, "$LM%d:\n%s%d,0,%d,$LM%d\n",
5153                sym_lineno, ASM_STABN_OP, N_SLINE, line, sym_lineno);
5154     }
5155   else
5156     fprintf (stream, "\n\t.loc\t%d %d\n", num_source_filenames, line);
5157 }
5158 \f
5159 /* Structure to show the current status of registers and memory.  */
5160
5161 struct shadow_summary
5162 {
5163   struct {
5164     unsigned int i     : 31;    /* Mask of int regs */
5165     unsigned int fp    : 31;    /* Mask of fp regs */
5166     unsigned int mem   :  1;    /* mem == imem | fpmem */
5167   } used, defd;
5168 };
5169
5170 static void summarize_insn PARAMS ((rtx, struct shadow_summary *, int));
5171 static void alpha_handle_trap_shadows PARAMS ((rtx));
5172
5173 /* Summary the effects of expression X on the machine.  Update SUM, a pointer
5174    to the summary structure.  SET is nonzero if the insn is setting the
5175    object, otherwise zero.  */
5176
5177 static void
5178 summarize_insn (x, sum, set)
5179      rtx x;
5180      struct shadow_summary *sum;
5181      int set;
5182 {
5183   const char *format_ptr;
5184   int i, j;
5185
5186   if (x == 0)
5187     return;
5188
5189   switch (GET_CODE (x))
5190     {
5191       /* ??? Note that this case would be incorrect if the Alpha had a
5192          ZERO_EXTRACT in SET_DEST.  */
5193     case SET:
5194       summarize_insn (SET_SRC (x), sum, 0);
5195       summarize_insn (SET_DEST (x), sum, 1);
5196       break;
5197
5198     case CLOBBER:
5199       summarize_insn (XEXP (x, 0), sum, 1);
5200       break;
5201
5202     case USE:
5203       summarize_insn (XEXP (x, 0), sum, 0);
5204       break;
5205
5206     case ASM_OPERANDS:
5207       for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
5208         summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0);
5209       break;
5210
5211     case PARALLEL:
5212       for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
5213         summarize_insn (XVECEXP (x, 0, i), sum, 0);
5214       break;
5215
5216     case SUBREG:
5217       summarize_insn (SUBREG_REG (x), sum, 0);
5218       break;
5219
5220     case REG:
5221       {
5222         int regno = REGNO (x);
5223         unsigned long mask = ((unsigned long) 1) << (regno % 32);
5224
5225         if (regno == 31 || regno == 63)
5226           break;
5227
5228         if (set)
5229           {
5230             if (regno < 32)
5231               sum->defd.i |= mask;
5232             else
5233               sum->defd.fp |= mask;
5234           }
5235         else
5236           {
5237             if (regno < 32)
5238               sum->used.i  |= mask;
5239             else
5240               sum->used.fp |= mask;
5241           }
5242         }
5243       break;
5244
5245     case MEM:
5246       if (set)
5247         sum->defd.mem = 1;
5248       else
5249         sum->used.mem = 1;
5250
5251       /* Find the regs used in memory address computation: */
5252       summarize_insn (XEXP (x, 0), sum, 0);
5253       break;
5254
5255     case CONST_INT:   case CONST_DOUBLE:
5256     case SYMBOL_REF:  case LABEL_REF:     case CONST:
5257     case SCRATCH:
5258       break;
5259
5260       /* Handle common unary and binary ops for efficiency.  */
5261     case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
5262     case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
5263     case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
5264     case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
5265     case NE:       case EQ:      case GE:      case GT:        case LE:
5266     case LT:       case GEU:     case GTU:     case LEU:       case LTU:
5267       summarize_insn (XEXP (x, 0), sum, 0);
5268       summarize_insn (XEXP (x, 1), sum, 0);
5269       break;
5270
5271     case NEG:  case NOT:  case SIGN_EXTEND:  case ZERO_EXTEND:
5272     case TRUNCATE:  case FLOAT_EXTEND:  case FLOAT_TRUNCATE:  case FLOAT:
5273     case FIX:  case UNSIGNED_FLOAT:  case UNSIGNED_FIX:  case ABS:
5274     case SQRT:  case FFS:
5275       summarize_insn (XEXP (x, 0), sum, 0);
5276       break;
5277
5278     default:
5279       format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5280       for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5281         switch (format_ptr[i])
5282           {
5283           case 'e':
5284             summarize_insn (XEXP (x, i), sum, 0);
5285             break;
5286
5287           case 'E':
5288             for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5289               summarize_insn (XVECEXP (x, i, j), sum, 0);
5290             break;
5291
5292           case 'i':
5293             break;
5294
5295           default:
5296             abort ();
5297           }
5298     }
5299 }
5300
5301 /* Ensure a sufficient number of `trapb' insns are in the code when
5302    the user requests code with a trap precision of functions or
5303    instructions.
5304
5305    In naive mode, when the user requests a trap-precision of
5306    "instruction", a trapb is needed after every instruction that may
5307    generate a trap.  This ensures that the code is resumption safe but
5308    it is also slow.
5309
5310    When optimizations are turned on, we delay issuing a trapb as long
5311    as possible.  In this context, a trap shadow is the sequence of
5312    instructions that starts with a (potentially) trap generating
5313    instruction and extends to the next trapb or call_pal instruction
5314    (but GCC never generates call_pal by itself).  We can delay (and
5315    therefore sometimes omit) a trapb subject to the following
5316    conditions:
5317
5318    (a) On entry to the trap shadow, if any Alpha register or memory
5319    location contains a value that is used as an operand value by some
5320    instruction in the trap shadow (live on entry), then no instruction
5321    in the trap shadow may modify the register or memory location.
5322
5323    (b) Within the trap shadow, the computation of the base register
5324    for a memory load or store instruction may not involve using the
5325    result of an instruction that might generate an UNPREDICTABLE
5326    result.
5327
5328    (c) Within the trap shadow, no register may be used more than once
5329    as a destination register.  (This is to make life easier for the
5330    trap-handler.)
5331
5332    (d) The trap shadow may not include any branch instructions.  */
5333
5334 static void
5335 alpha_handle_trap_shadows (insns)
5336      rtx insns;
5337 {
5338   struct shadow_summary shadow;
5339   int trap_pending, exception_nesting;
5340   rtx i, n;
5341
5342   trap_pending = 0;
5343   exception_nesting = 0;
5344   shadow.used.i = 0;
5345   shadow.used.fp = 0;
5346   shadow.used.mem = 0;
5347   shadow.defd = shadow.used;
5348
5349   for (i = insns; i ; i = NEXT_INSN (i))
5350     {
5351       if (GET_CODE (i) == NOTE)
5352         {
5353           switch (NOTE_LINE_NUMBER (i))
5354             {
5355             case NOTE_INSN_EH_REGION_BEG:
5356               exception_nesting++;
5357               if (trap_pending)
5358                 goto close_shadow;
5359               break;
5360
5361             case NOTE_INSN_EH_REGION_END:
5362               exception_nesting--;
5363               if (trap_pending)
5364                 goto close_shadow;
5365               break;
5366
5367             case NOTE_INSN_EPILOGUE_BEG:
5368               if (trap_pending && alpha_tp >= ALPHA_TP_FUNC)
5369                 goto close_shadow;
5370               break;
5371             }
5372         }
5373       else if (trap_pending)
5374         {
5375           if (alpha_tp == ALPHA_TP_FUNC)
5376             {
5377               if (GET_CODE (i) == JUMP_INSN
5378                   && GET_CODE (PATTERN (i)) == RETURN)
5379                 goto close_shadow;
5380             }
5381           else if (alpha_tp == ALPHA_TP_INSN)
5382             {
5383               if (optimize > 0)
5384                 {
5385                   struct shadow_summary sum;
5386
5387                   sum.used.i = 0;
5388                   sum.used.fp = 0;
5389                   sum.used.mem = 0;
5390                   sum.defd = sum.used;
5391
5392                   switch (GET_CODE (i))
5393                     {
5394                     case INSN:
5395                       /* Annoyingly, get_attr_trap will abort on these.  */
5396                       if (GET_CODE (PATTERN (i)) == USE
5397                           || GET_CODE (PATTERN (i)) == CLOBBER)
5398                         break;
5399
5400                       summarize_insn (PATTERN (i), &sum, 0);
5401
5402                       if ((sum.defd.i & shadow.defd.i)
5403                           || (sum.defd.fp & shadow.defd.fp))
5404                         {
5405                           /* (c) would be violated */
5406                           goto close_shadow;
5407                         }
5408
5409                       /* Combine shadow with summary of current insn: */
5410                       shadow.used.i   |= sum.used.i;
5411                       shadow.used.fp  |= sum.used.fp;
5412                       shadow.used.mem |= sum.used.mem;
5413                       shadow.defd.i   |= sum.defd.i;
5414                       shadow.defd.fp  |= sum.defd.fp;
5415                       shadow.defd.mem |= sum.defd.mem;
5416
5417                       if ((sum.defd.i & shadow.used.i)
5418                           || (sum.defd.fp & shadow.used.fp)
5419                           || (sum.defd.mem & shadow.used.mem))
5420                         {
5421                           /* (a) would be violated (also takes care of (b))  */
5422                           if (get_attr_trap (i) == TRAP_YES
5423                               && ((sum.defd.i & sum.used.i)
5424                                   || (sum.defd.fp & sum.used.fp)))
5425                             abort ();
5426
5427                           goto close_shadow;
5428                         }
5429                       break;
5430
5431                     case JUMP_INSN:
5432                     case CALL_INSN:
5433                     case CODE_LABEL:
5434                       goto close_shadow;
5435
5436                     default:
5437                       abort ();
5438                     }
5439                 }
5440               else
5441                 {
5442                 close_shadow:
5443                   n = emit_insn_before (gen_trapb (), i);
5444                   PUT_MODE (n, TImode);
5445                   PUT_MODE (i, TImode);
5446                   trap_pending = 0;
5447                   shadow.used.i = 0;
5448                   shadow.used.fp = 0;
5449                   shadow.used.mem = 0;
5450                   shadow.defd = shadow.used;
5451                 }
5452             }
5453         }
5454
5455       if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC)
5456           && GET_CODE (i) == INSN
5457           && GET_CODE (PATTERN (i)) != USE
5458           && GET_CODE (PATTERN (i)) != CLOBBER
5459           && get_attr_trap (i) == TRAP_YES)
5460         {
5461           if (optimize && !trap_pending)
5462             summarize_insn (PATTERN (i), &shadow, 0);
5463           trap_pending = 1;
5464         }
5465     }
5466 }
5467 \f
5468 /* Alpha can only issue instruction groups simultaneously if they are
5469    suitibly aligned.  This is very processor-specific.  */
5470
5471 enum alphaev4_pipe {
5472   EV4_STOP = 0,
5473   EV4_IB0 = 1,
5474   EV4_IB1 = 2,
5475   EV4_IBX = 4
5476 };
5477
5478 enum alphaev5_pipe {
5479   EV5_STOP = 0,
5480   EV5_NONE = 1,
5481   EV5_E01 = 2,
5482   EV5_E0 = 4,
5483   EV5_E1 = 8,
5484   EV5_FAM = 16,
5485   EV5_FA = 32,
5486   EV5_FM = 64
5487 };
5488
5489 static enum alphaev4_pipe alphaev4_insn_pipe PARAMS ((rtx));
5490 static enum alphaev5_pipe alphaev5_insn_pipe PARAMS ((rtx));
5491 static rtx alphaev4_next_group PARAMS ((rtx, int *, int *));
5492 static rtx alphaev5_next_group PARAMS ((rtx, int *, int *));
5493 static rtx alphaev4_next_nop PARAMS ((int *));
5494 static rtx alphaev5_next_nop PARAMS ((int *));
5495
5496 static void alpha_align_insns
5497   PARAMS ((rtx, unsigned int, rtx (*)(rtx, int *, int *), rtx (*)(int *)));
5498
5499 static enum alphaev4_pipe
5500 alphaev4_insn_pipe (insn)
5501      rtx insn;
5502 {
5503   if (recog_memoized (insn) < 0)
5504     return EV4_STOP;
5505   if (get_attr_length (insn) != 4)
5506     return EV4_STOP;
5507
5508   switch (get_attr_type (insn))
5509     {
5510     case TYPE_ILD:
5511     case TYPE_FLD:
5512       return EV4_IBX;
5513
5514     case TYPE_LDSYM:
5515     case TYPE_IADD:
5516     case TYPE_ILOG:
5517     case TYPE_ICMOV:
5518     case TYPE_ICMP:
5519     case TYPE_IST:
5520     case TYPE_FST:
5521     case TYPE_SHIFT:
5522     case TYPE_IMUL:
5523     case TYPE_FBR:
5524       return EV4_IB0;
5525
5526     case TYPE_MISC:
5527     case TYPE_IBR:
5528     case TYPE_JSR:
5529     case TYPE_FCPYS:
5530     case TYPE_FCMOV:
5531     case TYPE_FADD:
5532     case TYPE_FDIV:
5533     case TYPE_FMUL:
5534       return EV4_IB1;
5535
5536     default:
5537       abort ();
5538     }
5539 }
5540
5541 static enum alphaev5_pipe
5542 alphaev5_insn_pipe (insn)
5543      rtx insn;
5544 {
5545   if (recog_memoized (insn) < 0)
5546     return EV5_STOP;
5547   if (get_attr_length (insn) != 4)
5548     return EV5_STOP;
5549
5550   switch (get_attr_type (insn))
5551     {
5552     case TYPE_ILD:
5553     case TYPE_FLD:
5554     case TYPE_LDSYM:
5555     case TYPE_IADD:
5556     case TYPE_ILOG:
5557     case TYPE_ICMOV:
5558     case TYPE_ICMP:
5559       return EV5_E01;
5560
5561     case TYPE_IST:
5562     case TYPE_FST:
5563     case TYPE_SHIFT:
5564     case TYPE_IMUL:
5565     case TYPE_MISC:
5566     case TYPE_MVI:
5567       return EV5_E0;
5568
5569     case TYPE_IBR:
5570     case TYPE_JSR:
5571       return EV5_E1;
5572
5573     case TYPE_FCPYS:
5574       return EV5_FAM;
5575
5576     case TYPE_FBR:
5577     case TYPE_FCMOV:
5578     case TYPE_FADD:
5579     case TYPE_FDIV:
5580       return EV5_FA;
5581
5582     case TYPE_FMUL:
5583       return EV5_FM;
5584
5585     default:
5586       abort();
5587     }
5588 }
5589
5590 /* IN_USE is a mask of the slots currently filled within the insn group.
5591    The mask bits come from alphaev4_pipe above.  If EV4_IBX is set, then
5592    the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1.
5593
5594    LEN is, of course, the length of the group in bytes.  */
5595
5596 static rtx
5597 alphaev4_next_group (insn, pin_use, plen)
5598      rtx insn;
5599      int *pin_use, *plen;
5600 {
5601   int len, in_use;
5602
5603   len = in_use = 0;
5604
5605   if (! INSN_P (insn)
5606       || GET_CODE (PATTERN (insn)) == CLOBBER
5607       || GET_CODE (PATTERN (insn)) == USE)
5608     goto next_and_done;
5609
5610   while (1)
5611     {
5612       enum alphaev4_pipe pipe;
5613
5614       pipe = alphaev4_insn_pipe (insn);
5615       switch (pipe)
5616         {
5617         case EV4_STOP:
5618           /* Force complex instructions to start new groups.  */
5619           if (in_use)
5620             goto done;
5621
5622           /* If this is a completely unrecognized insn, its an asm.
5623              We don't know how long it is, so record length as -1 to
5624              signal a needed realignment.  */
5625           if (recog_memoized (insn) < 0)
5626             len = -1;
5627           else
5628             len = get_attr_length (insn);
5629           goto next_and_done;
5630
5631         case EV4_IBX:
5632           if (in_use & EV4_IB0)
5633             {
5634               if (in_use & EV4_IB1)
5635                 goto done;
5636               in_use |= EV4_IB1;
5637             }
5638           else
5639             in_use |= EV4_IB0 | EV4_IBX;
5640           break;
5641
5642         case EV4_IB0:
5643           if (in_use & EV4_IB0)
5644             {
5645               if (!(in_use & EV4_IBX) || (in_use & EV4_IB1))
5646                 goto done;
5647               in_use |= EV4_IB1;
5648             }
5649           in_use |= EV4_IB0;
5650           break;
5651
5652         case EV4_IB1:
5653           if (in_use & EV4_IB1)
5654             goto done;
5655           in_use |= EV4_IB1;
5656           break;
5657
5658         default:
5659           abort();
5660         }
5661       len += 4;
5662
5663       /* Haifa doesn't do well scheduling branches.  */
5664       if (GET_CODE (insn) == JUMP_INSN)
5665         goto next_and_done;
5666
5667     next:
5668       insn = next_nonnote_insn (insn);
5669
5670       if (!insn || ! INSN_P (insn))
5671         goto done;
5672
5673       /* Let Haifa tell us where it thinks insn group boundaries are.  */
5674       if (GET_MODE (insn) == TImode)
5675         goto done;
5676
5677       if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
5678         goto next;
5679     }
5680
5681  next_and_done:
5682   insn = next_nonnote_insn (insn);
5683
5684  done:
5685   *plen = len;
5686   *pin_use = in_use;
5687   return insn;
5688 }
5689
5690 /* IN_USE is a mask of the slots currently filled within the insn group.
5691    The mask bits come from alphaev5_pipe above.  If EV5_E01 is set, then
5692    the insn in EV5_E0 can be swapped by the hardware into EV5_E1.
5693
5694    LEN is, of course, the length of the group in bytes.  */
5695
5696 static rtx
5697 alphaev5_next_group (insn, pin_use, plen)
5698      rtx insn;
5699      int *pin_use, *plen;
5700 {
5701   int len, in_use;
5702
5703   len = in_use = 0;
5704
5705   if (! INSN_P (insn)
5706       || GET_CODE (PATTERN (insn)) == CLOBBER
5707       || GET_CODE (PATTERN (insn)) == USE)
5708     goto next_and_done;
5709
5710   while (1)
5711     {
5712       enum alphaev5_pipe pipe;
5713
5714       pipe = alphaev5_insn_pipe (insn);
5715       switch (pipe)
5716         {
5717         case EV5_STOP:
5718           /* Force complex instructions to start new groups.  */
5719           if (in_use)
5720             goto done;
5721
5722           /* If this is a completely unrecognized insn, its an asm.
5723              We don't know how long it is, so record length as -1 to
5724              signal a needed realignment.  */
5725           if (recog_memoized (insn) < 0)
5726             len = -1;
5727           else
5728             len = get_attr_length (insn);
5729           goto next_and_done;
5730
5731         /* ??? Most of the places below, we would like to abort, as
5732            it would indicate an error either in Haifa, or in the
5733            scheduling description.  Unfortunately, Haifa never
5734            schedules the last instruction of the BB, so we don't
5735            have an accurate TI bit to go off.  */
5736         case EV5_E01:
5737           if (in_use & EV5_E0)
5738             {
5739               if (in_use & EV5_E1)
5740                 goto done;
5741               in_use |= EV5_E1;
5742             }
5743           else
5744             in_use |= EV5_E0 | EV5_E01;
5745           break;
5746
5747         case EV5_E0:
5748           if (in_use & EV5_E0)
5749             {
5750               if (!(in_use & EV5_E01) || (in_use & EV5_E1))
5751                 goto done;
5752               in_use |= EV5_E1;
5753             }
5754           in_use |= EV5_E0;
5755           break;
5756
5757         case EV5_E1:
5758           if (in_use & EV5_E1)
5759             goto done;
5760           in_use |= EV5_E1;
5761           break;
5762
5763         case EV5_FAM:
5764           if (in_use & EV5_FA)
5765             {
5766               if (in_use & EV5_FM)
5767                 goto done;
5768               in_use |= EV5_FM;
5769             }
5770           else
5771             in_use |= EV5_FA | EV5_FAM;
5772           break;
5773
5774         case EV5_FA:
5775           if (in_use & EV5_FA)
5776             goto done;
5777           in_use |= EV5_FA;
5778           break;
5779
5780         case EV5_FM:
5781           if (in_use & EV5_FM)
5782             goto done;
5783           in_use |= EV5_FM;
5784           break;
5785
5786         case EV5_NONE:
5787           break;
5788
5789         default:
5790           abort();
5791         }
5792       len += 4;
5793
5794       /* Haifa doesn't do well scheduling branches.  */
5795       /* ??? If this is predicted not-taken, slotting continues, except
5796          that no more IBR, FBR, or JSR insns may be slotted.  */
5797       if (GET_CODE (insn) == JUMP_INSN)
5798         goto next_and_done;
5799
5800     next:
5801       insn = next_nonnote_insn (insn);
5802
5803       if (!insn || ! INSN_P (insn))
5804         goto done;
5805
5806       /* Let Haifa tell us where it thinks insn group boundaries are.  */
5807       if (GET_MODE (insn) == TImode)
5808         goto done;
5809
5810       if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
5811         goto next;
5812     }
5813
5814  next_and_done:
5815   insn = next_nonnote_insn (insn);
5816
5817  done:
5818   *plen = len;
5819   *pin_use = in_use;
5820   return insn;
5821 }
5822
5823 static rtx
5824 alphaev4_next_nop (pin_use)
5825      int *pin_use;
5826 {
5827   int in_use = *pin_use;
5828   rtx nop;
5829
5830   if (!(in_use & EV4_IB0))
5831     {
5832       in_use |= EV4_IB0;
5833       nop = gen_nop ();
5834     }
5835   else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX)
5836     {
5837       in_use |= EV4_IB1;
5838       nop = gen_nop ();
5839     }
5840   else if (TARGET_FP && !(in_use & EV4_IB1))
5841     {
5842       in_use |= EV4_IB1;
5843       nop = gen_fnop ();
5844     }
5845   else
5846     nop = gen_unop ();
5847
5848   *pin_use = in_use;
5849   return nop;
5850 }
5851
5852 static rtx
5853 alphaev5_next_nop (pin_use)
5854      int *pin_use;
5855 {
5856   int in_use = *pin_use;
5857   rtx nop;
5858
5859   if (!(in_use & EV5_E1))
5860     {
5861       in_use |= EV5_E1;
5862       nop = gen_nop ();
5863     }
5864   else if (TARGET_FP && !(in_use & EV5_FA))
5865     {
5866       in_use |= EV5_FA;
5867       nop = gen_fnop ();
5868     }
5869   else if (TARGET_FP && !(in_use & EV5_FM))
5870     {
5871       in_use |= EV5_FM;
5872       nop = gen_fnop ();
5873     }
5874   else
5875     nop = gen_unop ();
5876
5877   *pin_use = in_use;
5878   return nop;
5879 }
5880
5881 /* The instruction group alignment main loop.  */
5882
5883 static void
5884 alpha_align_insns (insns, max_align, next_group, next_nop)
5885      rtx insns;
5886      unsigned int max_align;
5887      rtx (*next_group) PARAMS ((rtx, int *, int *));
5888      rtx (*next_nop) PARAMS ((int *));
5889 {
5890   /* ALIGN is the known alignment for the insn group.  */
5891   unsigned int align;
5892   /* OFS is the offset of the current insn in the insn group.  */
5893   int ofs;
5894   int prev_in_use, in_use, len;
5895   rtx i, next;
5896
5897   /* Let shorten branches care for assigning alignments to code labels.  */
5898   shorten_branches (insns);
5899
5900   align = (FUNCTION_BOUNDARY / BITS_PER_UNIT < max_align
5901            ? FUNCTION_BOUNDARY / BITS_PER_UNIT : max_align);
5902
5903   ofs = prev_in_use = 0;
5904   i = insns;
5905   if (GET_CODE (i) == NOTE)
5906     i = next_nonnote_insn (i);
5907
5908   while (i)
5909     {
5910       next = (*next_group) (i, &in_use, &len);
5911
5912       /* When we see a label, resync alignment etc.  */
5913       if (GET_CODE (i) == CODE_LABEL)
5914         {
5915           unsigned int new_align = 1 << label_to_alignment (i);
5916
5917           if (new_align >= align)
5918             {
5919               align = new_align < max_align ? new_align : max_align;
5920               ofs = 0;
5921             }
5922
5923           else if (ofs & (new_align-1))
5924             ofs = (ofs | (new_align-1)) + 1;
5925           if (len != 0)
5926             abort();
5927         }
5928
5929       /* Handle complex instructions special.  */
5930       else if (in_use == 0)
5931         {
5932           /* Asms will have length < 0.  This is a signal that we have
5933              lost alignment knowledge.  Assume, however, that the asm
5934              will not mis-align instructions.  */
5935           if (len < 0)
5936             {
5937               ofs = 0;
5938               align = 4;
5939               len = 0;
5940             }
5941         }
5942
5943       /* If the known alignment is smaller than the recognized insn group,
5944          realign the output.  */
5945       else if (align < len)
5946         {
5947           unsigned int new_log_align = len > 8 ? 4 : 3;
5948           rtx where;
5949
5950           where = prev_nonnote_insn (i);
5951           if (!where || GET_CODE (where) != CODE_LABEL)
5952             where = i;
5953
5954           emit_insn_before (gen_realign (GEN_INT (new_log_align)), where);
5955           align = 1 << new_log_align;
5956           ofs = 0;
5957         }
5958
5959       /* If the group won't fit in the same INT16 as the previous,
5960          we need to add padding to keep the group together.  Rather
5961          than simply leaving the insn filling to the assembler, we
5962          can make use of the knowledge of what sorts of instructions
5963          were issued in the previous group to make sure that all of
5964          the added nops are really free.  */
5965       else if (ofs + len > align)
5966         {
5967           int nop_count = (align - ofs) / 4;
5968           rtx where;
5969
5970           /* Insert nops before labels and branches to truely merge the
5971              execution of the nops with the previous instruction group.  */
5972           where = prev_nonnote_insn (i);
5973           if (where)
5974             {
5975               if (GET_CODE (where) == CODE_LABEL)
5976                 {
5977                   rtx where2 = prev_nonnote_insn (where);
5978                   if (where2 && GET_CODE (where2) == JUMP_INSN)
5979                     where = where2;
5980                 }
5981               else if (GET_CODE (where) != JUMP_INSN)
5982                 where = i;
5983             }
5984           else
5985             where = i;
5986
5987           do
5988             emit_insn_before ((*next_nop)(&prev_in_use), where);
5989           while (--nop_count);
5990           ofs = 0;
5991         }
5992
5993       ofs = (ofs + len) & (align - 1);
5994       prev_in_use = in_use;
5995       i = next;
5996     }
5997 }
5998 \f
5999 /* Machine dependant reorg pass.  */
6000
6001 void
6002 alpha_reorg (insns)
6003      rtx insns;
6004 {
6005   if (alpha_tp != ALPHA_TP_PROG || flag_exceptions)
6006     alpha_handle_trap_shadows (insns);
6007
6008   /* Due to the number of extra trapb insns, don't bother fixing up
6009      alignment when trap precision is instruction.  Moreover, we can
6010      only do our job when sched2 is run.  */
6011   if (optimize && !optimize_size
6012       && alpha_tp != ALPHA_TP_INSN
6013       && flag_schedule_insns_after_reload)
6014     {
6015       if (alpha_cpu == PROCESSOR_EV4)
6016         alpha_align_insns (insns, 8, alphaev4_next_group, alphaev4_next_nop);
6017       else if (alpha_cpu == PROCESSOR_EV5)
6018         alpha_align_insns (insns, 16, alphaev5_next_group, alphaev5_next_nop);
6019     }
6020 }
6021 \f
6022 /* Check a floating-point value for validity for a particular machine mode.  */
6023
6024 static const char * const float_strings[] =
6025 {
6026   /* These are for FLOAT_VAX.  */
6027    "1.70141173319264430e+38", /* 2^127 (2^24 - 1) / 2^24 */
6028   "-1.70141173319264430e+38",
6029    "2.93873587705571877e-39", /* 2^-128 */
6030   "-2.93873587705571877e-39",
6031   /* These are for the default broken IEEE mode, which traps
6032      on infinity or denormal numbers.  */
6033    "3.402823466385288598117e+38", /* 2^128 (1 - 2^-24) */
6034   "-3.402823466385288598117e+38",
6035    "1.1754943508222875079687e-38", /* 2^-126 */
6036   "-1.1754943508222875079687e-38",
6037 };
6038
6039 static REAL_VALUE_TYPE float_values[8];
6040 static int inited_float_values = 0;
6041
6042 int
6043 check_float_value (mode, d, overflow)
6044      enum machine_mode mode;
6045      REAL_VALUE_TYPE *d;
6046      int overflow ATTRIBUTE_UNUSED;
6047 {
6048
6049   if (TARGET_IEEE || TARGET_IEEE_CONFORMANT || TARGET_IEEE_WITH_INEXACT)
6050     return 0;
6051
6052   if (inited_float_values == 0)
6053     {
6054       int i;
6055       for (i = 0; i < 8; i++)
6056         float_values[i] = REAL_VALUE_ATOF (float_strings[i], DFmode);
6057
6058       inited_float_values = 1;
6059     }
6060
6061   if (mode == SFmode)
6062     {
6063       REAL_VALUE_TYPE r;
6064       REAL_VALUE_TYPE *fvptr;
6065
6066       if (TARGET_FLOAT_VAX)
6067         fvptr = &float_values[0];
6068       else
6069         fvptr = &float_values[4];
6070
6071       memcpy (&r, d, sizeof (REAL_VALUE_TYPE));
6072       if (REAL_VALUES_LESS (fvptr[0], r))
6073         {
6074           bcopy ((char *) &fvptr[0], (char *) d,
6075                  sizeof (REAL_VALUE_TYPE));
6076           return 1;
6077         }
6078       else if (REAL_VALUES_LESS (r, fvptr[1]))
6079         {
6080           bcopy ((char *) &fvptr[1], (char *) d,
6081                  sizeof (REAL_VALUE_TYPE));
6082           return 1;
6083         }
6084       else if (REAL_VALUES_LESS (dconst0, r)
6085                 && REAL_VALUES_LESS (r, fvptr[2]))
6086         {
6087           bcopy ((char *) &dconst0, (char *) d, sizeof (REAL_VALUE_TYPE));
6088           return 1;
6089         }
6090       else if (REAL_VALUES_LESS (r, dconst0)
6091                 && REAL_VALUES_LESS (fvptr[3], r))
6092         {
6093           bcopy ((char *) &dconst0, (char *) d, sizeof (REAL_VALUE_TYPE));
6094           return 1;
6095         }
6096     }
6097
6098   return 0;
6099 }
6100
6101 #if OPEN_VMS
6102
6103 /* Return the VMS argument type corresponding to MODE.  */
6104
6105 enum avms_arg_type
6106 alpha_arg_type (mode)
6107      enum machine_mode mode;
6108 {
6109   switch (mode)
6110     {
6111     case SFmode:
6112       return TARGET_FLOAT_VAX ? FF : FS;
6113     case DFmode:
6114       return TARGET_FLOAT_VAX ? FD : FT;
6115     default:
6116       return I64;
6117     }
6118 }
6119
6120 /* Return an rtx for an integer representing the VMS Argument Information
6121    register value.  */
6122
6123 rtx
6124 alpha_arg_info_reg_val (cum)
6125      CUMULATIVE_ARGS cum;
6126 {
6127   unsigned HOST_WIDE_INT regval = cum.num_args;
6128   int i;
6129
6130   for (i = 0; i < 6; i++)
6131     regval |= ((int) cum.atypes[i]) << (i * 3 + 8);
6132
6133   return GEN_INT (regval);
6134 }
6135 \f
6136 #include <splay-tree.h>
6137
6138 /* Structure to collect function names for final output
6139    in link section.  */
6140
6141 enum links_kind {KIND_UNUSED, KIND_LOCAL, KIND_EXTERN};
6142
6143 struct alpha_links
6144 {
6145   rtx linkage;
6146   enum links_kind kind;
6147 };
6148
6149 static splay_tree alpha_links;
6150
6151 static int mark_alpha_links_node        PARAMS ((splay_tree_node, void *));
6152 static void mark_alpha_links            PARAMS ((void *));
6153 static int alpha_write_one_linkage      PARAMS ((splay_tree_node, void *));
6154
6155 /* Protect alpha_links from garbage collection.  */
6156
6157 static int
6158 mark_alpha_links_node (node, data)
6159      splay_tree_node node;
6160      void *data ATTRIBUTE_UNUSED;
6161 {
6162   struct alpha_links *links = (struct alpha_links *) node->value;
6163   ggc_mark_rtx (links->linkage);
6164   return 0;
6165 }
6166
6167 static void
6168 mark_alpha_links (ptr)
6169      void *ptr;
6170 {
6171   splay_tree tree = *(splay_tree *) ptr;
6172   splay_tree_foreach (tree, mark_alpha_links_node, NULL);
6173 }
6174
6175 /* Make (or fake) .linkage entry for function call.
6176
6177    IS_LOCAL is 0 if name is used in call, 1 if name is used in definition.
6178
6179    Return an SYMBOL_REF rtx for the linkage.  */
6180
6181 rtx
6182 alpha_need_linkage (name, is_local)
6183     const char *name;
6184     int is_local;
6185 {
6186   splay_tree_node node;
6187   struct alpha_links *al;
6188
6189   if (name[0] == '*')
6190     name++;
6191
6192   if (alpha_links)
6193     {
6194       /* Is this name already defined?  */
6195
6196       node = splay_tree_lookup (alpha_links, (splay_tree_key) name);
6197       if (node)
6198         {
6199           al = (struct alpha_links *) node->value;
6200           if (is_local)
6201             {
6202               /* Defined here but external assumed.  */
6203               if (al->kind == KIND_EXTERN)
6204                 al->kind = KIND_LOCAL;
6205             }
6206           else
6207             {
6208               /* Used here but unused assumed.  */
6209               if (al->kind == KIND_UNUSED)
6210                 al->kind = KIND_LOCAL;
6211             }
6212           return al->linkage;
6213         }
6214     }
6215   else
6216     {
6217       alpha_links = splay_tree_new ((splay_tree_compare_fn) strcmp,
6218                                     (splay_tree_delete_key_fn) free,
6219                                     (splay_tree_delete_key_fn) free);
6220       ggc_add_root (&alpha_links, 1, 1, mark_alpha_links);
6221     }
6222
6223   al = (struct alpha_links *) xmalloc (sizeof (struct alpha_links));
6224   name = xstrdup (name);
6225
6226   /* Assume external if no definition.  */
6227   al->kind = (is_local ? KIND_UNUSED : KIND_EXTERN);
6228
6229   /* Ensure we have an IDENTIFIER so assemble_name can mark it used.  */
6230   get_identifier (name);
6231
6232   /* Construct a SYMBOL_REF for us to call.  */
6233   {
6234     size_t name_len = strlen (name);
6235     char *linksym = ggc_alloc_string (NULL, name_len + 6);
6236
6237     linksym[0] = '$';
6238     memcpy (linksym + 1, name, name_len);
6239     memcpy (linksym + 1 + name_len, "..lk", 5);
6240     al->linkage = gen_rtx_SYMBOL_REF (Pmode, linksym);
6241   }
6242
6243   splay_tree_insert (alpha_links, (splay_tree_key) name,
6244                      (splay_tree_value) al);
6245
6246   return al->linkage;
6247 }
6248
6249 static int
6250 alpha_write_one_linkage (node, data)
6251      splay_tree_node node;
6252      void *data;
6253 {
6254   const char *name = (const char *) node->key;
6255   struct alpha_links *links = (struct alpha_links *) node->value;
6256   FILE *stream = (FILE *) data;
6257
6258   if (links->kind == KIND_UNUSED
6259       || ! TREE_SYMBOL_REFERENCED (get_identifier (name)))
6260     return 0;
6261
6262   fprintf (stream, "$%s..lk:\n", name);
6263   if (links->kind == KIND_LOCAL)
6264     {
6265       /* Local and used, build linkage pair.  */
6266       fprintf (stream, "\t.quad %s..en\n", name);
6267       fprintf (stream, "\t.quad %s\n", name);
6268     }
6269   else
6270     {
6271       /* External and used, request linkage pair.  */
6272       fprintf (stream, "\t.linkage %s\n", name);
6273     }
6274
6275   return 0;
6276 }
6277
6278 void
6279 alpha_write_linkage (stream)
6280     FILE *stream;
6281 {
6282   readonly_section ();
6283   fprintf (stream, "\t.align 3\n");
6284   splay_tree_foreach (alpha_links, alpha_write_one_linkage, stream);
6285 }
6286
6287 #else
6288
6289 rtx
6290 alpha_need_linkage (name, is_local)
6291      const char *name ATTRIBUTE_UNUSED;
6292      int is_local ATTRIBUTE_UNUSED;
6293 {
6294   return NULL_RTX;
6295 }
6296
6297 #endif /* OPEN_VMS */