gcc/fwprop.c

   1 /* RTL-based forward propagation pass for GNU compiler.
   2    Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
   3    Free Software Foundation, Inc.
   4    Contributed by Paolo Bonzini and Steven Bosscher.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27 #include "toplev.h"
  28
  29 #include "sparseset.h"
  30 #include "timevar.h"
  31 #include "rtl.h"
  32 #include "tm_p.h"
  33 #include "insn-config.h"
  34 #include "recog.h"
  35 #include "flags.h"
  36 #include "obstack.h"
  37 #include "basic-block.h"
  38 #include "output.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "cfgloop.h"
  42 #include "tree-pass.h"
  43 #include "domwalk.h"
  44 #include "emit-rtl.h"
  45
  46
  47 /* This pass does simple forward propagation and simplification when an
  48    operand of an insn can only come from a single def.  This pass uses
  49    df.c, so it is global.  However, we only do limited analysis of
  50    available expressions.
  51
  52    1) The pass tries to propagate the source of the def into the use,
  53    and checks if the result is independent of the substituted value.
  54    For example, the high word of a (zero_extend:DI (reg:SI M)) is always
  55    zero, independent of the source register.
  56
  57    In particular, we propagate constants into the use site.  Sometimes
  58    RTL expansion did not put the constant in the same insn on purpose,
  59    to satisfy a predicate, and the result will fail to be recognized;
  60    but this happens rarely and in this case we can still create a
  61    REG_EQUAL note.  For multi-word operations, this
  62
  63       (set (subreg:SI (reg:DI 120) 0) (const_int 0))
  64       (set (subreg:SI (reg:DI 120) 4) (const_int -1))
  65       (set (subreg:SI (reg:DI 122) 0)
  66          (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
  67       (set (subreg:SI (reg:DI 122) 4)
  68          (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
  69
  70    can be simplified to the much simpler
  71
  72       (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
  73       (set (subreg:SI (reg:DI 122) 4) (const_int -1))
  74
  75    This particular propagation is also effective at putting together
  76    complex addressing modes.  We are more aggressive inside MEMs, in
  77    that all definitions are propagated if the use is in a MEM; if the
  78    result is a valid memory address we check address_cost to decide
  79    whether the substitution is worthwhile.
  80
  81    2) The pass propagates register copies.  This is not as effective as
  82    the copy propagation done by CSE's canon_reg, which works by walking
  83    the instruction chain, it can help the other transformations.
  84
  85    We should consider removing this optimization, and instead reorder the
  86    RTL passes, because GCSE does this transformation too.  With some luck,
  87    the CSE pass at the end of rest_of_handle_gcse could also go away.
  88
  89    3) The pass looks for paradoxical subregs that are actually unnecessary.
  90    Things like this:
  91
  92      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
  93      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
  94      (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
  95                                 (subreg:SI (reg:QI 121) 0)))
  96
  97    are very common on machines that can only do word-sized operations.
  98    For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
  99    if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
 100    we can replace the paradoxical subreg with simply (reg:WIDE M).  The
 101    above will simplify this to
 102
 103      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
 104      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
 105      (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
 106
 107    where the first two insns are now dead.
 108
 109    We used to use reaching definitions to find which uses have a
 110    single reaching definition (sounds obvious...), but this is too
 111    complex a problem in nasty testcases like PR33928.  Now we use the
 112    multiple definitions problem in df-problems.c.  The similarity
 113    between that problem and SSA form creation is taken further, in
 114    that fwprop does a dominator walk to create its chains; however,
 115    instead of creating a PHI function where multiple definitions meet
 116    I just punt and record only singleton use-def chains, which is
 117    all that is needed by fwprop.  */
 118
 119
 120 static int num_changes;
 121
 122 DEF_VEC_P(df_ref);
 123 DEF_VEC_ALLOC_P(df_ref,heap);
 124 static VEC(df_ref,heap) *use_def_ref;
 125 static VEC(df_ref,heap) *reg_defs;
 126 static VEC(df_ref,heap) *reg_defs_stack;
 127
 128 /* The MD bitmaps are trimmed to include only live registers to cut
 129    memory usage on testcases like insn-recog.c.  Track live registers
 130    in the basic block and do not perform forward propagation if the
 131    destination is a dead pseudo occurring in a note.  */
 132 static bitmap local_md;
 133 static bitmap local_lr;
 134
 135 /* Return the only def in USE's use-def chain, or NULL if there is
 136    more than one def in the chain.  */
 137
 138 static inline df_ref
 139 get_def_for_use (df_ref use)
 140 {
 141   return VEC_index (df_ref, use_def_ref, DF_REF_ID (use));
 142 }
 143
 144
 145 /* Update the reg_defs vector with non-partial definitions in DEF_REC.
 146    TOP_FLAG says which artificials uses should be used, when DEF_REC
 147    is an artificial def vector.  LOCAL_MD is modified as after a
 148    df_md_simulate_* function; we do more or less the same processing
 149    done there, so we do not use those functions.  */
 150
 151 #define DF_MD_GEN_FLAGS \
 152         (DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER)
 153
 154 static void
 155 process_defs (df_ref *def_rec, int top_flag)
 156 {
 157   df_ref def;
 158   while ((def = *def_rec++) != NULL)
 159     {
 160       df_ref curr_def = VEC_index (df_ref, reg_defs, DF_REF_REGNO (def));
 161       unsigned int dregno;
 162
 163       if ((DF_REF_FLAGS (def) & DF_REF_AT_TOP) != top_flag)
 164         continue;
 165
 166       dregno = DF_REF_REGNO (def);
 167       if (curr_def)
 168         VEC_safe_push (df_ref, heap, reg_defs_stack, curr_def);
 169       else
 170         {
 171           /* Do not store anything if "transitioning" from NULL to NULL.  But
 172              otherwise, push a special entry on the stack to tell the
 173              leave_block callback that the entry in reg_defs was NULL.  */
 174           if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
 175             ;
 176           else
 177             VEC_safe_push (df_ref, heap, reg_defs_stack, def);
 178         }
 179
 180       if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
 181         {
 182           bitmap_set_bit (local_md, dregno);
 183           VEC_replace (df_ref, reg_defs, dregno, NULL);
 184         }
 185       else
 186         {
 187           bitmap_clear_bit (local_md, dregno);
 188           VEC_replace (df_ref, reg_defs, dregno, def);
 189         }
 190     }
 191 }
 192
 193
 194 /* Fill the use_def_ref vector with values for the uses in USE_REC,
 195    taking reaching definitions info from LOCAL_MD and REG_DEFS.
 196    TOP_FLAG says which artificials uses should be used, when USE_REC
 197    is an artificial use vector.  */
 198
 199 static void
 200 process_uses (df_ref *use_rec, int top_flag)
 201 {
 202   df_ref use;
 203   while ((use = *use_rec++) != NULL)
 204     if ((DF_REF_FLAGS (use) & DF_REF_AT_TOP) == top_flag)
 205       {
 206         unsigned int uregno = DF_REF_REGNO (use);
 207         if (VEC_index (df_ref, reg_defs, uregno)
 208             && !bitmap_bit_p (local_md, uregno)
 209             && bitmap_bit_p (local_lr, uregno))
 210           VEC_replace (df_ref, use_def_ref, DF_REF_ID (use),
 211                        VEC_index (df_ref, reg_defs, uregno));
 212       }
 213 }
 214
 215
 216 static void
 217 single_def_use_enter_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
 218                             basic_block bb)
 219 {
 220   int bb_index = bb->index;
 221   struct df_md_bb_info *md_bb_info = df_md_get_bb_info (bb_index);
 222   struct df_lr_bb_info *lr_bb_info = df_lr_get_bb_info (bb_index);
 223   rtx insn;
 224
 225   bitmap_copy (local_md, &md_bb_info->in);
 226   bitmap_copy (local_lr, &lr_bb_info->in);
 227
 228   /* Push a marker for the leave_block callback.  */
 229   VEC_safe_push (df_ref, heap, reg_defs_stack, NULL);
 230
 231   process_uses (df_get_artificial_uses (bb_index), DF_REF_AT_TOP);
 232   process_defs (df_get_artificial_defs (bb_index), DF_REF_AT_TOP);
 233
 234   /* We don't call df_simulate_initialize_forwards, as it may overestimate
 235      the live registers if there are unused artificial defs.  We prefer
 236      liveness to be underestimated.  */
 237
 238   FOR_BB_INSNS (bb, insn)
 239     if (INSN_P (insn))
 240       {
 241         unsigned int uid = INSN_UID (insn);
 242         process_uses (DF_INSN_UID_USES (uid), 0);
 243         process_uses (DF_INSN_UID_EQ_USES (uid), 0);
 244         process_defs (DF_INSN_UID_DEFS (uid), 0);
 245         df_simulate_one_insn_forwards (bb, insn, local_lr);
 246       }
 247
 248   process_uses (df_get_artificial_uses (bb_index), 0);
 249   process_defs (df_get_artificial_defs (bb_index), 0);
 250 }
 251
 252 /* Pop the definitions created in this basic block when leaving its
 253    dominated parts.  */
 254
 255 static void
 256 single_def_use_leave_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
 257                             basic_block bb ATTRIBUTE_UNUSED)
 258 {
 259   df_ref saved_def;
 260   while ((saved_def = VEC_pop (df_ref, reg_defs_stack)) != NULL)
 261     {
 262       unsigned int dregno = DF_REF_REGNO (saved_def);
 263
 264       /* See also process_defs.  */
 265       if (saved_def == VEC_index (df_ref, reg_defs, dregno))
 266         VEC_replace (df_ref, reg_defs, dregno, NULL);
 267       else
 268         VEC_replace (df_ref, reg_defs, dregno, saved_def);
 269     }
 270 }
 271
 272
 273 /* Build a vector holding the reaching definitions of uses reached by a
 274    single dominating definition.  */
 275
 276 static void
 277 build_single_def_use_links (void)
 278 {
 279   struct dom_walk_data walk_data;
 280
 281   /* We use the multiple definitions problem to compute our restricted
 282      use-def chains.  */
 283   df_set_flags (DF_EQ_NOTES);
 284   df_md_add_problem ();
 285   df_note_add_problem ();
 286   df_analyze ();
 287   df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
 288
 289   use_def_ref = VEC_alloc (df_ref, heap, DF_USES_TABLE_SIZE ());
 290   VEC_safe_grow_cleared (df_ref, heap, use_def_ref, DF_USES_TABLE_SIZE ());
 291
 292   reg_defs = VEC_alloc (df_ref, heap, max_reg_num ());
 293   VEC_safe_grow_cleared (df_ref, heap, reg_defs, max_reg_num ());
 294
 295   reg_defs_stack = VEC_alloc (df_ref, heap, n_basic_blocks * 10);
 296   local_md = BITMAP_ALLOC (NULL);
 297   local_lr = BITMAP_ALLOC (NULL);
 298
 299   /* Walk the dominator tree looking for single reaching definitions
 300      dominating the uses.  This is similar to how SSA form is built.  */
 301   walk_data.dom_direction = CDI_DOMINATORS;
 302   walk_data.initialize_block_local_data = NULL;
 303   walk_data.before_dom_children = single_def_use_enter_block;
 304   walk_data.after_dom_children = single_def_use_leave_block;
 305
 306   init_walk_dominator_tree (&walk_data);
 307   walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
 308   fini_walk_dominator_tree (&walk_data);
 309
 310   BITMAP_FREE (local_lr);
 311   BITMAP_FREE (local_md);
 312   VEC_free (df_ref, heap, reg_defs);
 313   VEC_free (df_ref, heap, reg_defs_stack);
 314 }
 315
 316 \f
 317 /* Do not try to replace constant addresses or addresses of local and
 318    argument slots.  These MEM expressions are made only once and inserted
 319    in many instructions, as well as being used to control symbol table
 320    output.  It is not safe to clobber them.
 321
 322    There are some uncommon cases where the address is already in a register
 323    for some reason, but we cannot take advantage of that because we have
 324    no easy way to unshare the MEM.  In addition, looking up all stack
 325    addresses is costly.  */
 326
 327 static bool
 328 can_simplify_addr (rtx addr)
 329 {
 330   rtx reg;
 331
 332   if (CONSTANT_ADDRESS_P (addr))
 333     return false;
 334
 335   if (GET_CODE (addr) == PLUS)
 336     reg = XEXP (addr, 0);
 337   else
 338     reg = addr;
 339
 340   return (!REG_P (reg)
 341           || (REGNO (reg) != FRAME_POINTER_REGNUM
 342               && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
 343               && REGNO (reg) != ARG_POINTER_REGNUM));
 344 }
 345
 346 /* Returns a canonical version of X for the address, from the point of view,
 347    that all multiplications are represented as MULT instead of the multiply
 348    by a power of 2 being represented as ASHIFT.
 349
 350    Every ASHIFT we find has been made by simplify_gen_binary and was not
 351    there before, so it is not shared.  So we can do this in place.  */
 352
 353 static void
 354 canonicalize_address (rtx x)
 355 {
 356   for (;;)
 357     switch (GET_CODE (x))
 358       {
 359       case ASHIFT:
 360         if (CONST_INT_P (XEXP (x, 1))
 361             && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (GET_MODE (x))
 362             && INTVAL (XEXP (x, 1)) >= 0)
 363           {
 364             HOST_WIDE_INT shift = INTVAL (XEXP (x, 1));
 365             PUT_CODE (x, MULT);
 366             XEXP (x, 1) = gen_int_mode ((HOST_WIDE_INT) 1 << shift,
 367                                         GET_MODE (x));
 368           }
 369
 370         x = XEXP (x, 0);
 371         break;
 372
 373       case PLUS:
 374         if (GET_CODE (XEXP (x, 0)) == PLUS
 375             || GET_CODE (XEXP (x, 0)) == ASHIFT
 376             || GET_CODE (XEXP (x, 0)) == CONST)
 377           canonicalize_address (XEXP (x, 0));
 378
 379         x = XEXP (x, 1);
 380         break;
 381
 382       case CONST:
 383         x = XEXP (x, 0);
 384         break;
 385
 386       default:
 387         return;
 388       }
 389 }
 390
 391 /* OLD is a memory address.  Return whether it is good to use NEW instead,
 392    for a memory access in the given MODE.  */
 393
 394 static bool
 395 should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode,
 396                         addr_space_t as, bool speed)
 397 {
 398   int gain;
 399
 400   if (rtx_equal_p (old_rtx, new_rtx)
 401       || !memory_address_addr_space_p (mode, new_rtx, as))
 402     return false;
 403
 404   /* Copy propagation is always ok.  */
 405   if (REG_P (old_rtx) && REG_P (new_rtx))
 406     return true;
 407
 408   /* Prefer the new address if it is less expensive.  */
 409   gain = (address_cost (old_rtx, mode, as, speed)
 410           - address_cost (new_rtx, mode, as, speed));
 411
 412   /* If the addresses have equivalent cost, prefer the new address
 413      if it has the highest `rtx_cost'.  That has the potential of
 414      eliminating the most insns without additional costs, and it
 415      is the same that cse.c used to do.  */
 416   if (gain == 0)
 417     gain = rtx_cost (new_rtx, SET, speed) - rtx_cost (old_rtx, SET, speed);
 418
 419   return (gain > 0);
 420 }
 421
 422
 423 /* Flags for the last parameter of propagate_rtx_1.  */
 424
 425 enum {
 426   /* If PR_CAN_APPEAR is true, propagate_rtx_1 always returns true;
 427      if it is false, propagate_rtx_1 returns false if, for at least
 428      one occurrence OLD, it failed to collapse the result to a constant.
 429      For example, (mult:M (reg:M A) (minus:M (reg:M B) (reg:M A))) may
 430      collapse to zero if replacing (reg:M B) with (reg:M A).
 431
 432      PR_CAN_APPEAR is disregarded inside MEMs: in that case,
 433      propagate_rtx_1 just tries to make cheaper and valid memory
 434      addresses.  */
 435   PR_CAN_APPEAR = 1,
 436
 437   /* If PR_HANDLE_MEM is not set, propagate_rtx_1 won't attempt any replacement
 438      outside memory addresses.  This is needed because propagate_rtx_1 does
 439      not do any analysis on memory; thus it is very conservative and in general
 440      it will fail if non-read-only MEMs are found in the source expression.
 441
 442      PR_HANDLE_MEM is set when the source of the propagation was not
 443      another MEM.  Then, it is safe not to treat non-read-only MEMs as
 444      ``opaque'' objects.  */
 445   PR_HANDLE_MEM = 2,
 446
 447   /* Set when costs should be optimized for speed.  */
 448   PR_OPTIMIZE_FOR_SPEED = 4
 449 };
 450
 451
 452 /* Replace all occurrences of OLD in *PX with NEW and try to simplify the
 453    resulting expression.  Replace *PX with a new RTL expression if an
 454    occurrence of OLD was found.
 455
 456    This is only a wrapper around simplify-rtx.c: do not add any pattern
 457    matching code here.  (The sole exception is the handling of LO_SUM, but
 458    that is because there is no simplify_gen_* function for LO_SUM).  */
 459
 460 static bool
 461 propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
 462 {
 463   rtx x = *px, tem = NULL_RTX, op0, op1, op2;
 464   enum rtx_code code = GET_CODE (x);
 465   enum machine_mode mode = GET_MODE (x);
 466   enum machine_mode op_mode;
 467   bool can_appear = (flags & PR_CAN_APPEAR) != 0;
 468   bool valid_ops = true;
 469
 470   if (!(flags & PR_HANDLE_MEM) && MEM_P (x) && !MEM_READONLY_P (x))
 471     {
 472       /* If unsafe, change MEMs to CLOBBERs or SCRATCHes (to preserve whether
 473          they have side effects or not).  */
 474       *px = (side_effects_p (x)
 475              ? gen_rtx_CLOBBER (GET_MODE (x), const0_rtx)
 476              : gen_rtx_SCRATCH (GET_MODE (x)));
 477       return false;
 478     }
 479
 480   /* If X is OLD_RTX, return NEW_RTX.  But not if replacing only within an
 481      address, and we are *not* inside one.  */
 482   if (x == old_rtx)
 483     {
 484       *px = new_rtx;
 485       return can_appear;
 486     }
 487
 488   /* If this is an expression, try recursive substitution.  */
 489   switch (GET_RTX_CLASS (code))
 490     {
 491     case RTX_UNARY:
 492       op0 = XEXP (x, 0);
 493       op_mode = GET_MODE (op0);
 494       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 495       if (op0 == XEXP (x, 0))
 496         return true;
 497       tem = simplify_gen_unary (code, mode, op0, op_mode);
 498       break;
 499
 500     case RTX_BIN_ARITH:
 501     case RTX_COMM_ARITH:
 502       op0 = XEXP (x, 0);
 503       op1 = XEXP (x, 1);
 504       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 505       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 506       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 507         return true;
 508       tem = simplify_gen_binary (code, mode, op0, op1);
 509       break;
 510
 511     case RTX_COMPARE:
 512     case RTX_COMM_COMPARE:
 513       op0 = XEXP (x, 0);
 514       op1 = XEXP (x, 1);
 515       op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
 516       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 517       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 518       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 519         return true;
 520       tem = simplify_gen_relational (code, mode, op_mode, op0, op1);
 521       break;
 522
 523     case RTX_TERNARY:
 524     case RTX_BITFIELD_OPS:
 525       op0 = XEXP (x, 0);
 526       op1 = XEXP (x, 1);
 527       op2 = XEXP (x, 2);
 528       op_mode = GET_MODE (op0);
 529       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 530       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 531       valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags);
 532       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2))
 533         return true;
 534       if (op_mode == VOIDmode)
 535         op_mode = GET_MODE (op0);
 536       tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2);
 537       break;
 538
 539     case RTX_EXTRA:
 540       /* The only case we try to handle is a SUBREG.  */
 541       if (code == SUBREG)
 542         {
 543           op0 = XEXP (x, 0);
 544           valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 545           if (op0 == XEXP (x, 0))
 546             return true;
 547           tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
 548                                      SUBREG_BYTE (x));
 549         }
 550       break;
 551
 552     case RTX_OBJ:
 553       if (code == MEM && x != new_rtx)
 554         {
 555           rtx new_op0;
 556           op0 = XEXP (x, 0);
 557
 558           /* There are some addresses that we cannot work on.  */
 559           if (!can_simplify_addr (op0))
 560             return true;
 561
 562           op0 = new_op0 = targetm.delegitimize_address (op0);
 563           valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx,
 564                                         flags | PR_CAN_APPEAR);
 565
 566           /* Dismiss transformation that we do not want to carry on.  */
 567           if (!valid_ops
 568               || new_op0 == op0
 569               || !(GET_MODE (new_op0) == GET_MODE (op0)
 570                    || GET_MODE (new_op0) == VOIDmode))
 571             return true;
 572
 573           canonicalize_address (new_op0);
 574
 575           /* Copy propagations are always ok.  Otherwise check the costs.  */
 576           if (!(REG_P (old_rtx) && REG_P (new_rtx))
 577               && !should_replace_address (op0, new_op0, GET_MODE (x),
 578                                           MEM_ADDR_SPACE (x),
 579                                           flags & PR_OPTIMIZE_FOR_SPEED))
 580             return true;
 581
 582           tem = replace_equiv_address_nv (x, new_op0);
 583         }
 584
 585       else if (code == LO_SUM)
 586         {
 587           op0 = XEXP (x, 0);
 588           op1 = XEXP (x, 1);
 589
 590           /* The only simplification we do attempts to remove references to op0
 591              or make it constant -- in both cases, op0's invalidity will not
 592              make the result invalid.  */
 593           propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR);
 594           valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 595           if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 596             return true;
 597
 598           /* (lo_sum (high x) x) -> x  */
 599           if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1))
 600             tem = op1;
 601           else
 602             tem = gen_rtx_LO_SUM (mode, op0, op1);
 603
 604           /* OP1 is likely not a legitimate address, otherwise there would have
 605              been no LO_SUM.  We want it to disappear if it is invalid, return
 606              false in that case.  */
 607           return memory_address_p (mode, tem);
 608         }
 609
 610       else if (code == REG)
 611         {
 612           if (rtx_equal_p (x, old_rtx))
 613             {
 614               *px = new_rtx;
 615               return can_appear;
 616             }
 617         }
 618       break;
 619
 620     default:
 621       break;
 622     }
 623
 624   /* No change, no trouble.  */
 625   if (tem == NULL_RTX)
 626     return true;
 627
 628   *px = tem;
 629
 630   /* The replacement we made so far is valid, if all of the recursive
 631      replacements were valid, or we could simplify everything to
 632      a constant.  */
 633   return valid_ops || can_appear || CONSTANT_P (tem);
 634 }
 635
 636
 637 /* for_each_rtx traversal function that returns 1 if BODY points to
 638    a non-constant mem.  */
 639
 640 static int
 641 varying_mem_p (rtx *body, void *data ATTRIBUTE_UNUSED)
 642 {
 643   rtx x = *body;
 644   return MEM_P (x) && !MEM_READONLY_P (x);
 645 }
 646
 647
 648 /* Replace all occurrences of OLD in X with NEW and try to simplify the
 649    resulting expression (in mode MODE).  Return a new expression if it is
 650    a constant, otherwise X.
 651
 652    Simplifications where occurrences of NEW collapse to a constant are always
 653    accepted.  All simplifications are accepted if NEW is a pseudo too.
 654    Otherwise, we accept simplifications that have a lower or equal cost.  */
 655
 656 static rtx
 657 propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx,
 658                bool speed)
 659 {
 660   rtx tem;
 661   bool collapsed;
 662   int flags;
 663
 664   if (REG_P (new_rtx) && REGNO (new_rtx) < FIRST_PSEUDO_REGISTER)
 665     return NULL_RTX;
 666
 667   flags = 0;
 668   if (REG_P (new_rtx) || CONSTANT_P (new_rtx))
 669     flags |= PR_CAN_APPEAR;
 670   if (!for_each_rtx (&new_rtx, varying_mem_p, NULL))
 671     flags |= PR_HANDLE_MEM;
 672
 673   if (speed)
 674     flags |= PR_OPTIMIZE_FOR_SPEED;
 675
 676   tem = x;
 677   collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags);
 678   if (tem == x || !collapsed)
 679     return NULL_RTX;
 680
 681   /* gen_lowpart_common will not be able to process VOIDmode entities other
 682      than CONST_INTs.  */
 683   if (GET_MODE (tem) == VOIDmode && !CONST_INT_P (tem))
 684     return NULL_RTX;
 685
 686   if (GET_MODE (tem) == VOIDmode)
 687     tem = rtl_hooks.gen_lowpart_no_emit (mode, tem);
 688   else
 689     gcc_assert (GET_MODE (tem) == mode);
 690
 691   return tem;
 692 }
 693
 694
 695 \f
 696
 697 /* Return true if the register from reference REF is killed
 698    between FROM to (but not including) TO.  */
 699
 700 static bool
 701 local_ref_killed_between_p (df_ref ref, rtx from, rtx to)
 702 {
 703   rtx insn;
 704
 705   for (insn = from; insn != to; insn = NEXT_INSN (insn))
 706     {
 707       df_ref *def_rec;
 708       if (!INSN_P (insn))
 709         continue;
 710
 711       for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
 712         {
 713           df_ref def = *def_rec;
 714           if (DF_REF_REGNO (ref) == DF_REF_REGNO (def))
 715             return true;
 716         }
 717     }
 718   return false;
 719 }
 720
 721
 722 /* Check if the given DEF is available in INSN.  This would require full
 723    computation of available expressions; we check only restricted conditions:
 724    - if DEF is the sole definition of its register, go ahead;
 725    - in the same basic block, we check for no definitions killing the
 726      definition of DEF_INSN;
 727    - if USE's basic block has DEF's basic block as the sole predecessor,
 728      we check if the definition is killed after DEF_INSN or before
 729      TARGET_INSN insn, in their respective basic blocks.  */
 730 static bool
 731 use_killed_between (df_ref use, rtx def_insn, rtx target_insn)
 732 {
 733   basic_block def_bb = BLOCK_FOR_INSN (def_insn);
 734   basic_block target_bb = BLOCK_FOR_INSN (target_insn);
 735   int regno;
 736   df_ref def;
 737
 738   /* We used to have a def reaching a use that is _before_ the def,
 739      with the def not dominating the use even though the use and def
 740      are in the same basic block, when a register may be used
 741      uninitialized in a loop.  This should not happen anymore since
 742      we do not use reaching definitions, but still we test for such
 743      cases and assume that DEF is not available.  */
 744   if (def_bb == target_bb
 745       ? DF_INSN_LUID (def_insn) >= DF_INSN_LUID (target_insn)
 746       : !dominated_by_p (CDI_DOMINATORS, target_bb, def_bb))
 747     return true;
 748
 749   /* Check if the reg in USE has only one definition.  We already
 750      know that this definition reaches use, or we wouldn't be here.
 751      However, this is invalid for hard registers because if they are
 752      live at the beginning of the function it does not mean that we
 753      have an uninitialized access.  */
 754   regno = DF_REF_REGNO (use);
 755   def = DF_REG_DEF_CHAIN (regno);
 756   if (def
 757       && DF_REF_NEXT_REG (def) == NULL
 758       && regno >= FIRST_PSEUDO_REGISTER)
 759     return false;
 760
 761   /* Check locally if we are in the same basic block.  */
 762   if (def_bb == target_bb)
 763     return local_ref_killed_between_p (use, def_insn, target_insn);
 764
 765   /* Finally, if DEF_BB is the sole predecessor of TARGET_BB.  */
 766   if (single_pred_p (target_bb)
 767       && single_pred (target_bb) == def_bb)
 768     {
 769       df_ref x;
 770
 771       /* See if USE is killed between DEF_INSN and the last insn in the
 772          basic block containing DEF_INSN.  */
 773       x = df_bb_regno_last_def_find (def_bb, regno);
 774       if (x && DF_INSN_LUID (DF_REF_INSN (x)) >= DF_INSN_LUID (def_insn))
 775         return true;
 776
 777       /* See if USE is killed between TARGET_INSN and the first insn in the
 778          basic block containing TARGET_INSN.  */
 779       x = df_bb_regno_first_def_find (target_bb, regno);
 780       if (x && DF_INSN_LUID (DF_REF_INSN (x)) < DF_INSN_LUID (target_insn))
 781         return true;
 782
 783       return false;
 784     }
 785
 786   /* Otherwise assume the worst case.  */
 787   return true;
 788 }
 789
 790
 791 /* Check if all uses in DEF_INSN can be used in TARGET_INSN.  This
 792    would require full computation of available expressions;
 793    we check only restricted conditions, see use_killed_between.  */
 794 static bool
 795 all_uses_available_at (rtx def_insn, rtx target_insn)
 796 {
 797   df_ref *use_rec;
 798   struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn);
 799   rtx def_set = single_set (def_insn);
 800
 801   gcc_assert (def_set);
 802
 803   /* If target_insn comes right after def_insn, which is very common
 804      for addresses, we can use a quicker test.  */
 805   if (NEXT_INSN (def_insn) == target_insn
 806       && REG_P (SET_DEST (def_set)))
 807     {
 808       rtx def_reg = SET_DEST (def_set);
 809
 810       /* If the insn uses the reg that it defines, the substitution is
 811          invalid.  */
 812       for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++)
 813         {
 814           df_ref use = *use_rec;
 815           if (rtx_equal_p (DF_REF_REG (use), def_reg))
 816             return false;
 817         }
 818       for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++)
 819         {
 820           df_ref use = *use_rec;
 821           if (rtx_equal_p (DF_REF_REG (use), def_reg))
 822             return false;
 823         }
 824     }
 825   else
 826     {
 827       rtx def_reg = REG_P (SET_DEST (def_set)) ? SET_DEST (def_set) : NULL_RTX;
 828
 829       /* Look at all the uses of DEF_INSN, and see if they are not
 830          killed between DEF_INSN and TARGET_INSN.  */
 831       for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++)
 832         {
 833           df_ref use = *use_rec;
 834           if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
 835             return false;
 836           if (use_killed_between (use, def_insn, target_insn))
 837             return false;
 838         }
 839       for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++)
 840         {
 841           df_ref use = *use_rec;
 842           if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
 843             return false;
 844           if (use_killed_between (use, def_insn, target_insn))
 845             return false;
 846         }
 847     }
 848
 849   return true;
 850 }
 851
 852 \f
 853 static df_ref *active_defs;
 854 #ifdef ENABLE_CHECKING
 855 static sparseset active_defs_check;
 856 #endif
 857
 858 /* Fill the ACTIVE_DEFS array with the use->def link for the registers
 859    mentioned in USE_REC.  Register the valid entries in ACTIVE_DEFS_CHECK
 860    too, for checking purposes.  */
 861
 862 static void
 863 register_active_defs (df_ref *use_rec)
 864 {
 865   while (*use_rec)
 866     {
 867       df_ref use = *use_rec++;
 868       df_ref def = get_def_for_use (use);
 869       int regno = DF_REF_REGNO (use);
 870
 871 #ifdef ENABLE_CHECKING
 872       sparseset_set_bit (active_defs_check, regno);
 873 #endif
 874       active_defs[regno] = def;
 875     }
 876 }
 877
 878
 879 /* Build the use->def links that we use to update the dataflow info
 880    for new uses.  Note that building the links is very cheap and if
 881    it were done earlier, they could be used to rule out invalid
 882    propagations (in addition to what is done in all_uses_available_at).
 883    I'm not doing this yet, though.  */
 884
 885 static void
 886 update_df_init (rtx def_insn, rtx insn)
 887 {
 888 #ifdef ENABLE_CHECKING
 889   sparseset_clear (active_defs_check);
 890 #endif
 891   register_active_defs (DF_INSN_USES (def_insn));
 892   register_active_defs (DF_INSN_USES (insn));
 893   register_active_defs (DF_INSN_EQ_USES (insn));
 894 }
 895
 896
 897 /* Update the USE_DEF_REF array for the given use, using the active definitions
 898    in the ACTIVE_DEFS array to match pseudos to their def. */
 899
 900 static inline void
 901 update_uses (df_ref *use_rec)
 902 {
 903   while (*use_rec)
 904     {
 905       df_ref use = *use_rec++;
 906       int regno = DF_REF_REGNO (use);
 907
 908       /* Set up the use-def chain.  */
 909       if (DF_REF_ID (use) >= (int) VEC_length (df_ref, use_def_ref))
 910         VEC_safe_grow_cleared (df_ref, heap, use_def_ref,
 911                                DF_REF_ID (use) + 1);
 912
 913 #ifdef ENABLE_CHECKING
 914       gcc_assert (sparseset_bit_p (active_defs_check, regno));
 915 #endif
 916       VEC_replace (df_ref, use_def_ref, DF_REF_ID (use), active_defs[regno]);
 917     }
 918 }
 919
 920
 921 /* Update the USE_DEF_REF array for the uses in INSN.  Only update note
 922    uses if NOTES_ONLY is true.  */
 923
 924 static void
 925 update_df (rtx insn, rtx note)
 926 {
 927   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
 928
 929   if (note)
 930     {
 931       df_uses_create (&XEXP (note, 0), insn, DF_REF_IN_NOTE);
 932       df_notes_rescan (insn);
 933     }
 934   else
 935     {
 936       df_uses_create (&PATTERN (insn), insn, 0);
 937       df_insn_rescan (insn);
 938       update_uses (DF_INSN_INFO_USES (insn_info));
 939     }
 940
 941   update_uses (DF_INSN_INFO_EQ_USES (insn_info));
 942 }
 943
 944
 945 /* Try substituting NEW into LOC, which originated from forward propagation
 946    of USE's value from DEF_INSN.  SET_REG_EQUAL says whether we are
 947    substituting the whole SET_SRC, so we can set a REG_EQUAL note if the
 948    new insn is not recognized.  Return whether the substitution was
 949    performed.  */
 950
 951 static bool
 952 try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx def_insn, bool set_reg_equal)
 953 {
 954   rtx insn = DF_REF_INSN (use);
 955   rtx set = single_set (insn);
 956   rtx note = NULL_RTX;
 957   bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
 958   int old_cost = 0;
 959   bool ok;
 960
 961   update_df_init (def_insn, insn);
 962
 963   /* forward_propagate_subreg may be operating on an instruction with
 964      multiple sets.  If so, assume the cost of the new instruction is
 965      not greater than the old one.  */
 966   if (set)
 967     old_cost = rtx_cost (SET_SRC (set), SET, speed);
 968   if (dump_file)
 969     {
 970       fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn));
 971       print_inline_rtx (dump_file, *loc, 2);
 972       fprintf (dump_file, "\n with ");
 973       print_inline_rtx (dump_file, new_rtx, 2);
 974       fprintf (dump_file, "\n");
 975     }
 976
 977   validate_unshare_change (insn, loc, new_rtx, true);
 978   if (!verify_changes (0))
 979     {
 980       if (dump_file)
 981         fprintf (dump_file, "Changes to insn %d not recognized\n",
 982                  INSN_UID (insn));
 983       ok = false;
 984     }
 985
 986   else if (DF_REF_TYPE (use) == DF_REF_REG_USE
 987            && set
 988            && rtx_cost (SET_SRC (set), SET, speed) > old_cost)
 989     {
 990       if (dump_file)
 991         fprintf (dump_file, "Changes to insn %d not profitable\n",
 992                  INSN_UID (insn));
 993       ok = false;
 994     }
 995
 996   else
 997     {
 998       if (dump_file)
 999         fprintf (dump_file, "Changed insn %d\n", INSN_UID (insn));
1000       ok = true;
1001     }
1002
1003   if (ok)
1004     {
1005       confirm_change_group ();
1006       num_changes++;
1007     }
1008   else
1009     {
1010       cancel_changes (0);
1011
1012       /* Can also record a simplified value in a REG_EQUAL note,
1013          making a new one if one does not already exist.  */
1014       if (set_reg_equal)
1015         {
1016           if (dump_file)
1017             fprintf (dump_file, " Setting REG_EQUAL note\n");
1018
1019           note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new_rtx));
1020         }
1021     }
1022
1023   if ((ok || note) && !CONSTANT_P (new_rtx))
1024     update_df (insn, note);
1025
1026   return ok;
1027 }
1028
1029 /* For the given single_set INSN, containing SRC known to be a
1030    ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
1031    is redundant due to the register being set by a LOAD_EXTEND_OP
1032    load from memory.  */
1033
1034 static bool
1035 free_load_extend (rtx src, rtx insn)
1036 {
1037   rtx reg;
1038   df_ref *use_vec;
1039   df_ref use = 0, def;
1040
1041   reg = XEXP (src, 0);
1042 #ifdef LOAD_EXTEND_OP
1043   if (LOAD_EXTEND_OP (GET_MODE (reg)) != GET_CODE (src))
1044 #endif
1045     return false;
1046
1047   for (use_vec = DF_INSN_USES (insn); *use_vec; use_vec++)
1048     {
1049       use = *use_vec;
1050
1051       if (!DF_REF_IS_ARTIFICIAL (use)
1052           && DF_REF_TYPE (use) == DF_REF_REG_USE
1053           && DF_REF_REG (use) == reg)
1054         break;
1055     }
1056   if (!use)
1057     return false;
1058
1059   def = get_def_for_use (use);
1060   if (!def)
1061     return false;
1062
1063   if (DF_REF_IS_ARTIFICIAL (def))
1064     return false;
1065
1066   if (NONJUMP_INSN_P (DF_REF_INSN (def)))
1067     {
1068       rtx patt = PATTERN (DF_REF_INSN (def));
1069
1070       if (GET_CODE (patt) == SET
1071           && GET_CODE (SET_SRC (patt)) == MEM
1072           && rtx_equal_p (SET_DEST (patt), reg))
1073         return true;
1074     }
1075   return false;
1076 }
1077
1078 /* If USE is a subreg, see if it can be replaced by a pseudo.  */
1079
1080 static bool
1081 forward_propagate_subreg (df_ref use, rtx def_insn, rtx def_set)
1082 {
1083   rtx use_reg = DF_REF_REG (use);
1084   rtx use_insn, src;
1085
1086   /* Only consider subregs... */
1087   enum machine_mode use_mode = GET_MODE (use_reg);
1088   if (GET_CODE (use_reg) != SUBREG
1089       || !REG_P (SET_DEST (def_set)))
1090     return false;
1091
1092   /* If this is a paradoxical SUBREG...  */
1093   if (GET_MODE_SIZE (use_mode)
1094       > GET_MODE_SIZE (GET_MODE (SUBREG_REG (use_reg))))
1095     {
1096       /* If this is a paradoxical SUBREG, we have no idea what value the
1097          extra bits would have.  However, if the operand is equivalent to
1098          a SUBREG whose operand is the same as our mode, and all the modes
1099          are within a word, we can just use the inner operand because
1100          these SUBREGs just say how to treat the register.  */
1101       use_insn = DF_REF_INSN (use);
1102       src = SET_SRC (def_set);
1103       if (GET_CODE (src) == SUBREG
1104           && REG_P (SUBREG_REG (src))
1105           && GET_MODE (SUBREG_REG (src)) == use_mode
1106           && subreg_lowpart_p (src)
1107           && all_uses_available_at (def_insn, use_insn))
1108         return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src),
1109                                  def_insn, false);
1110     }
1111
1112   /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
1113      is the low part of the reg being extended then just use the inner
1114      operand.  Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
1115      be removed due to it matching a LOAD_EXTEND_OP load from memory.  */
1116   else if (subreg_lowpart_p (use_reg))
1117     {
1118       use_insn = DF_REF_INSN (use);
1119       src = SET_SRC (def_set);
1120       if ((GET_CODE (src) == ZERO_EXTEND
1121            || GET_CODE (src) == SIGN_EXTEND)
1122           && REG_P (XEXP (src, 0))
1123           && GET_MODE (XEXP (src, 0)) == use_mode
1124           && !free_load_extend (src, def_insn)
1125           && all_uses_available_at (def_insn, use_insn))
1126         return try_fwprop_subst (use, DF_REF_LOC (use), XEXP (src, 0),
1127                                  def_insn, false);
1128     }
1129
1130   return false;
1131 }
1132
1133 /* Try to replace USE with SRC (defined in DEF_INSN) in __asm.  */
1134
1135 static bool
1136 forward_propagate_asm (df_ref use, rtx def_insn, rtx def_set, rtx reg)
1137 {
1138   rtx use_insn = DF_REF_INSN (use), src, use_pat, asm_operands, new_rtx, *loc;
1139   int speed_p, i;
1140   df_ref *use_vec;
1141
1142   gcc_assert ((DF_REF_FLAGS (use) & DF_REF_IN_NOTE) == 0);
1143
1144   src = SET_SRC (def_set);
1145   use_pat = PATTERN (use_insn);
1146
1147   /* In __asm don't replace if src might need more registers than
1148      reg, as that could increase register pressure on the __asm.  */
1149   use_vec = DF_INSN_USES (def_insn);
1150   if (use_vec[0] && use_vec[1])
1151     return false;
1152
1153   update_df_init (def_insn, use_insn);
1154   speed_p = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn));
1155   asm_operands = NULL_RTX;
1156   switch (GET_CODE (use_pat))
1157     {
1158     case ASM_OPERANDS:
1159       asm_operands = use_pat;
1160       break;
1161     case SET:
1162       if (MEM_P (SET_DEST (use_pat)))
1163         {
1164           loc = &SET_DEST (use_pat);
1165           new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1166           if (new_rtx)
1167             validate_unshare_change (use_insn, loc, new_rtx, true);
1168         }
1169       asm_operands = SET_SRC (use_pat);
1170       break;
1171     case PARALLEL:
1172       for (i = 0; i < XVECLEN (use_pat, 0); i++)
1173         if (GET_CODE (XVECEXP (use_pat, 0, i)) == SET)
1174           {
1175             if (MEM_P (SET_DEST (XVECEXP (use_pat, 0, i))))
1176               {
1177                 loc = &SET_DEST (XVECEXP (use_pat, 0, i));
1178                 new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg,
1179                                          src, speed_p);
1180                 if (new_rtx)
1181                   validate_unshare_change (use_insn, loc, new_rtx, true);
1182               }
1183             asm_operands = SET_SRC (XVECEXP (use_pat, 0, i));
1184           }
1185         else if (GET_CODE (XVECEXP (use_pat, 0, i)) == ASM_OPERANDS)
1186           asm_operands = XVECEXP (use_pat, 0, i);
1187       break;
1188     default:
1189       gcc_unreachable ();
1190     }
1191
1192   gcc_assert (asm_operands && GET_CODE (asm_operands) == ASM_OPERANDS);
1193   for (i = 0; i < ASM_OPERANDS_INPUT_LENGTH (asm_operands); i++)
1194     {
1195       loc = &ASM_OPERANDS_INPUT (asm_operands, i);
1196       new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1197       if (new_rtx)
1198         validate_unshare_change (use_insn, loc, new_rtx, true);
1199     }
1200
1201   if (num_changes_pending () == 0 || !apply_change_group ())
1202     return false;
1203
1204   update_df (use_insn, NULL);
1205   num_changes++;
1206   return true;
1207 }
1208
1209 /* Try to replace USE with SRC (defined in DEF_INSN) and simplify the
1210    result.  */
1211
1212 static bool
1213 forward_propagate_and_simplify (df_ref use, rtx def_insn, rtx def_set)
1214 {
1215   rtx use_insn = DF_REF_INSN (use);
1216   rtx use_set = single_set (use_insn);
1217   rtx src, reg, new_rtx, *loc;
1218   bool set_reg_equal;
1219   enum machine_mode mode;
1220   int asm_use = -1;
1221
1222   if (INSN_CODE (use_insn) < 0)
1223     asm_use = asm_noperands (PATTERN (use_insn));
1224
1225   if (!use_set && asm_use < 0 && !DEBUG_INSN_P (use_insn))
1226     return false;
1227
1228   /* Do not propagate into PC, CC0, etc.  */
1229   if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
1230     return false;
1231
1232   /* If def and use are subreg, check if they match.  */
1233   reg = DF_REF_REG (use);
1234   if (GET_CODE (reg) == SUBREG
1235       && GET_CODE (SET_DEST (def_set)) == SUBREG
1236       && (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg)
1237           || GET_MODE (SET_DEST (def_set)) != GET_MODE (reg)))
1238     return false;
1239
1240   /* Check if the def had a subreg, but the use has the whole reg.  */
1241   if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG)
1242     return false;
1243
1244   /* Check if the use has a subreg, but the def had the whole reg.  Unlike the
1245      previous case, the optimization is possible and often useful indeed.  */
1246   if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set)))
1247     reg = SUBREG_REG (reg);
1248
1249   /* Check if the substitution is valid (last, because it's the most
1250      expensive check!).  */
1251   src = SET_SRC (def_set);
1252   if (!CONSTANT_P (src) && !all_uses_available_at (def_insn, use_insn))
1253     return false;
1254
1255   /* Check if the def is loading something from the constant pool; in this
1256      case we would undo optimization such as compress_float_constant.
1257      Still, we can set a REG_EQUAL note.  */
1258   if (MEM_P (src) && MEM_READONLY_P (src))
1259     {
1260       rtx x = avoid_constant_pool_reference (src);
1261       if (x != src && use_set)
1262         {
1263           rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1264           rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (use_set);
1265           rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
1266           if (old_rtx != new_rtx)
1267             set_unique_reg_note (use_insn, REG_EQUAL, copy_rtx (new_rtx));
1268         }
1269       return false;
1270     }
1271
1272   if (asm_use >= 0)
1273     return forward_propagate_asm (use, def_insn, def_set, reg);
1274
1275   /* Else try simplifying.  */
1276
1277   if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE)
1278     {
1279       loc = &SET_DEST (use_set);
1280       set_reg_equal = false;
1281     }
1282   else if (!use_set)
1283     {
1284       loc = &INSN_VAR_LOCATION_LOC (use_insn);
1285       set_reg_equal = false;
1286     }
1287   else
1288     {
1289       rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1290       if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1291         loc = &XEXP (note, 0);
1292       else
1293         loc = &SET_SRC (use_set);
1294
1295       /* Do not replace an existing REG_EQUAL note if the insn is not
1296          recognized.  Either we're already replacing in the note, or we'll
1297          separately try plugging the definition in the note and simplifying.
1298          And only install a REQ_EQUAL note when the destination is a REG,
1299          as the note would be invalid otherwise.  */
1300       set_reg_equal = (note == NULL_RTX && REG_P (SET_DEST (use_set)));
1301     }
1302
1303   if (GET_MODE (*loc) == VOIDmode)
1304     mode = GET_MODE (SET_DEST (use_set));
1305   else
1306     mode = GET_MODE (*loc);
1307
1308   new_rtx = propagate_rtx (*loc, mode, reg, src,
1309                            optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)));
1310
1311   if (!new_rtx)
1312     return false;
1313
1314   return try_fwprop_subst (use, loc, new_rtx, def_insn, set_reg_equal);
1315 }
1316
1317
1318 /* Given a use USE of an insn, if it has a single reaching
1319    definition, try to forward propagate it into that insn.  */
1320
1321 static void
1322 forward_propagate_into (df_ref use)
1323 {
1324   df_ref def;
1325   rtx def_insn, def_set, use_insn;
1326   rtx parent;
1327
1328   if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
1329     return;
1330   if (DF_REF_IS_ARTIFICIAL (use))
1331     return;
1332
1333   /* Only consider uses that have a single definition.  */
1334   def = get_def_for_use (use);
1335   if (!def)
1336     return;
1337   if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE)
1338     return;
1339   if (DF_REF_IS_ARTIFICIAL (def))
1340     return;
1341
1342   /* Do not propagate loop invariant definitions inside the loop.  */
1343   if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father)
1344     return;
1345
1346   /* Check if the use is still present in the insn!  */
1347   use_insn = DF_REF_INSN (use);
1348   if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1349     parent = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1350   else
1351     parent = PATTERN (use_insn);
1352
1353   if (!reg_mentioned_p (DF_REF_REG (use), parent))
1354     return;
1355
1356   def_insn = DF_REF_INSN (def);
1357   if (multiple_sets (def_insn))
1358     return;
1359   def_set = single_set (def_insn);
1360   if (!def_set)
1361     return;
1362
1363   /* Only try one kind of propagation.  If two are possible, we'll
1364      do it on the following iterations.  */
1365   if (!forward_propagate_and_simplify (use, def_insn, def_set))
1366     forward_propagate_subreg (use, def_insn, def_set);
1367 }
1368
1369 \f
1370 static void
1371 fwprop_init (void)
1372 {
1373   num_changes = 0;
1374   calculate_dominance_info (CDI_DOMINATORS);
1375
1376   /* We do not always want to propagate into loops, so we have to find
1377      loops and be careful about them.  But we have to call flow_loops_find
1378      before df_analyze, because flow_loops_find may introduce new jump
1379      insns (sadly) if we are not working in cfglayout mode.  */
1380   loop_optimizer_init (0);
1381
1382   build_single_def_use_links ();
1383   df_set_flags (DF_DEFER_INSN_RESCAN);
1384
1385   active_defs = XNEWVEC (df_ref, max_reg_num ());
1386 #ifdef ENABLE_CHECKING
1387   active_defs_check = sparseset_alloc (max_reg_num ());
1388 #endif
1389 }
1390
1391 static void
1392 fwprop_done (void)
1393 {
1394   loop_optimizer_finalize ();
1395
1396   VEC_free (df_ref, heap, use_def_ref);
1397   free (active_defs);
1398 #ifdef ENABLE_CHECKING
1399   sparseset_free (active_defs_check);
1400 #endif
1401
1402   free_dominance_info (CDI_DOMINATORS);
1403   cleanup_cfg (0);
1404   delete_trivially_dead_insns (get_insns (), max_reg_num ());
1405
1406   if (dump_file)
1407     fprintf (dump_file,
1408              "\nNumber of successful forward propagations: %d\n\n",
1409              num_changes);
1410 }
1411
1412
1413 /* Main entry point.  */
1414
1415 static bool
1416 gate_fwprop (void)
1417 {
1418   return optimize > 0 && flag_forward_propagate;
1419 }
1420
1421 static unsigned int
1422 fwprop (void)
1423 {
1424   unsigned i;
1425
1426   fwprop_init ();
1427
1428   /* Go through all the uses.  df_uses_create will create new ones at the
1429      end, and we'll go through them as well.
1430
1431      Do not forward propagate addresses into loops until after unrolling.
1432      CSE did so because it was able to fix its own mess, but we are not.  */
1433
1434   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1435     {
1436       df_ref use = DF_USES_GET (i);
1437       if (use)
1438         if (DF_REF_TYPE (use) == DF_REF_REG_USE
1439             || DF_REF_BB (use)->loop_father == NULL
1440             /* The outer most loop is not really a loop.  */
1441             || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
1442           forward_propagate_into (use);
1443     }
1444
1445   fwprop_done ();
1446   return 0;
1447 }
1448
1449 struct rtl_opt_pass pass_rtl_fwprop =
1450 {
1451  {
1452   RTL_PASS,
1453   "fwprop1",                            /* name */
1454   gate_fwprop,                          /* gate */
1455   fwprop,                               /* execute */
1456   NULL,                                 /* sub */
1457   NULL,                                 /* next */
1458   0,                                    /* static_pass_number */
1459   TV_FWPROP,                            /* tv_id */
1460   0,                                    /* properties_required */
1461   0,                                    /* properties_provided */
1462   0,                                    /* properties_destroyed */
1463   0,                                    /* todo_flags_start */
1464   TODO_df_finish | TODO_verify_rtl_sharing |
1465   TODO_dump_func                        /* todo_flags_finish */
1466  }
1467 };
1468
1469 static unsigned int
1470 fwprop_addr (void)
1471 {
1472   unsigned i;
1473   fwprop_init ();
1474
1475   /* Go through all the uses.  df_uses_create will create new ones at the
1476      end, and we'll go through them as well.  */
1477   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1478     {
1479       df_ref use = DF_USES_GET (i);
1480       if (use)
1481         if (DF_REF_TYPE (use) != DF_REF_REG_USE
1482             && DF_REF_BB (use)->loop_father != NULL
1483             /* The outer most loop is not really a loop.  */
1484             && loop_outer (DF_REF_BB (use)->loop_father) != NULL)
1485           forward_propagate_into (use);
1486     }
1487
1488   fwprop_done ();
1489
1490   return 0;
1491 }
1492
1493 struct rtl_opt_pass pass_rtl_fwprop_addr =
1494 {
1495  {
1496   RTL_PASS,
1497   "fwprop2",                            /* name */
1498   gate_fwprop,                          /* gate */
1499   fwprop_addr,                          /* execute */
1500   NULL,                                 /* sub */
1501   NULL,                                 /* next */
1502   0,                                    /* static_pass_number */
1503   TV_FWPROP,                            /* tv_id */
1504   0,                                    /* properties_required */
1505   0,                                    /* properties_provided */
1506   0,                                    /* properties_destroyed */
1507   0,                                    /* todo_flags_start */
1508   TODO_df_finish | TODO_verify_rtl_sharing |
1509   TODO_dump_func                        /* todo_flags_finish */
1510  }
1511 };