gcc/gcse.c

   1 /* Global common subexpression elimination/Partial redundancy elimination
   2    and global constant/copy propagation for GNU compiler.
   3    Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
   4    2006, 2007, 2008, 2009 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 /* TODO
  23    - reordering of memory allocation and freeing to be more space efficient
  24    - do rough calc of how many regs are needed in each block, and a rough
  25      calc of how many regs are available in each class and use that to
  26      throttle back the code in cases where RTX_COST is minimal.
  27    - a store to the same address as a load does not kill the load if the
  28      source of the store is also the destination of the load.  Handling this
  29      allows more load motion, particularly out of loops.
  30
  31 */
  32
  33 /* References searched while implementing this.
  34
  35    Compilers Principles, Techniques and Tools
  36    Aho, Sethi, Ullman
  37    Addison-Wesley, 1988
  38
  39    Global Optimization by Suppression of Partial Redundancies
  40    E. Morel, C. Renvoise
  41    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  42
  43    A Portable Machine-Independent Global Optimizer - Design and Measurements
  44    Frederick Chow
  45    Stanford Ph.D. thesis, Dec. 1983
  46
  47    A Fast Algorithm for Code Movement Optimization
  48    D.M. Dhamdhere
  49    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  50
  51    A Solution to a Problem with Morel and Renvoise's
  52    Global Optimization by Suppression of Partial Redundancies
  53    K-H Drechsler, M.P. Stadel
  54    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  55
  56    Practical Adaptation of the Global Optimization
  57    Algorithm of Morel and Renvoise
  58    D.M. Dhamdhere
  59    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  60
  61    Efficiently Computing Static Single Assignment Form and the Control
  62    Dependence Graph
  63    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  64    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  65
  66    Lazy Code Motion
  67    J. Knoop, O. Ruthing, B. Steffen
  68    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  69
  70    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  71    Time for Reducible Flow Control
  72    Thomas Ball
  73    ACM Letters on Programming Languages and Systems,
  74    Vol. 2, Num. 1-4, Mar-Dec 1993
  75
  76    An Efficient Representation for Sparse Sets
  77    Preston Briggs, Linda Torczon
  78    ACM Letters on Programming Languages and Systems,
  79    Vol. 2, Num. 1-4, Mar-Dec 1993
  80
  81    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  82    K-H Drechsler, M.P. Stadel
  83    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  84
  85    Partial Dead Code Elimination
  86    J. Knoop, O. Ruthing, B. Steffen
  87    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  88
  89    Effective Partial Redundancy Elimination
  90    P. Briggs, K.D. Cooper
  91    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  92
  93    The Program Structure Tree: Computing Control Regions in Linear Time
  94    R. Johnson, D. Pearson, K. Pingali
  95    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  96
  97    Optimal Code Motion: Theory and Practice
  98    J. Knoop, O. Ruthing, B. Steffen
  99    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
 100
 101    The power of assignment motion
 102    J. Knoop, O. Ruthing, B. Steffen
 103    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 104
 105    Global code motion / global value numbering
 106    C. Click
 107    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 108
 109    Value Driven Redundancy Elimination
 110    L.T. Simpson
 111    Rice University Ph.D. thesis, Apr. 1996
 112
 113    Value Numbering
 114    L.T. Simpson
 115    Massively Scalar Compiler Project, Rice University, Sep. 1996
 116
 117    High Performance Compilers for Parallel Computing
 118    Michael Wolfe
 119    Addison-Wesley, 1996
 120
 121    Advanced Compiler Design and Implementation
 122    Steven Muchnick
 123    Morgan Kaufmann, 1997
 124
 125    Building an Optimizing Compiler
 126    Robert Morgan
 127    Digital Press, 1998
 128
 129    People wishing to speed up the code here should read:
 130      Elimination Algorithms for Data Flow Analysis
 131      B.G. Ryder, M.C. Paull
 132      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 133
 134      How to Analyze Large Programs Efficiently and Informatively
 135      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 136      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 137
 138    People wishing to do something different can find various possibilities
 139    in the above papers and elsewhere.
 140 */
 141
 142 #include "config.h"
 143 #include "system.h"
 144 #include "coretypes.h"
 145 #include "tm.h"
 146 #include "toplev.h"
 147
 148 #include "rtl.h"
 149 #include "tree.h"
 150 #include "tm_p.h"
 151 #include "regs.h"
 152 #include "hard-reg-set.h"
 153 #include "flags.h"
 154 #include "real.h"
 155 #include "insn-config.h"
 156 #include "recog.h"
 157 #include "basic-block.h"
 158 #include "output.h"
 159 #include "function.h"
 160 #include "expr.h"
 161 #include "except.h"
 162 #include "ggc.h"
 163 #include "params.h"
 164 #include "cselib.h"
 165 #include "intl.h"
 166 #include "obstack.h"
 167 #include "timevar.h"
 168 #include "tree-pass.h"
 169 #include "hashtab.h"
 170 #include "df.h"
 171 #include "dbgcnt.h"
 172 #include "target.h"
 173
 174 /* Propagate flow information through back edges and thus enable PRE's
 175    moving loop invariant calculations out of loops.
 176
 177    Originally this tended to create worse overall code, but several
 178    improvements during the development of PRE seem to have made following
 179    back edges generally a win.
 180
 181    Note much of the loop invariant code motion done here would normally
 182    be done by loop.c, which has more heuristics for when to move invariants
 183    out of loops.  At some point we might need to move some of those
 184    heuristics into gcse.c.  */
 185
 186 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 187    are a superset of those done by GCSE.
 188
 189    We perform the following steps:
 190
 191    1) Compute table of places where registers are set.
 192
 193    2) Perform copy/constant propagation.
 194
 195    3) Perform global cse using lazy code motion if not optimizing
 196       for size, or code hoisting if we are.
 197
 198    4) Perform another pass of copy/constant propagation.  Try to bypass
 199       conditional jumps if the condition can be computed from a value of
 200       an incoming edge.
 201
 202    5) Perform store motion.
 203
 204    Two passes of copy/constant propagation are done because the first one
 205    enables more GCSE and the second one helps to clean up the copies that
 206    GCSE creates.  This is needed more for PRE than for Classic because Classic
 207    GCSE will try to use an existing register containing the common
 208    subexpression rather than create a new one.  This is harder to do for PRE
 209    because of the code motion (which Classic GCSE doesn't do).
 210
 211    Expressions we are interested in GCSE-ing are of the form
 212    (set (pseudo-reg) (expression)).
 213    Function want_to_gcse_p says what these are.
 214
 215    In addition, expressions in REG_EQUAL notes are candidates for GXSE-ing.
 216    This allows PRE to hoist expressions that are expressed in multiple insns,
 217    such as comprex address calculations (e.g. for PIC code, or loads with a
 218    high part and as lowe part).
 219
 220    PRE handles moving invariant expressions out of loops (by treating them as
 221    partially redundant).
 222
 223    Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
 224    assignment) based GVN (global value numbering).  L. T. Simpson's paper
 225    (Rice University) on value numbering is a useful reference for this.
 226
 227    **********************
 228
 229    We used to support multiple passes but there are diminishing returns in
 230    doing so.  The first pass usually makes 90% of the changes that are doable.
 231    A second pass can make a few more changes made possible by the first pass.
 232    Experiments show any further passes don't make enough changes to justify
 233    the expense.
 234
 235    A study of spec92 using an unlimited number of passes:
 236    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 237    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 238    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 239
 240    It was found doing copy propagation between each pass enables further
 241    substitutions.
 242
 243    This study was done before expressions in REG_EQUAL notes were added as
 244    candidate expressions for optimization, and before the GIMPLE optimizers
 245    were added.  Probably, multiple passes is even less efficient now than
 246    at the time when the study was conducted.
 247
 248    PRE is quite expensive in complicated functions because the DFA can take
 249    a while to converge.  Hence we only perform one pass.
 250
 251    **********************
 252
 253    The steps for PRE are:
 254
 255    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 256
 257    2) Perform the data flow analysis for PRE.
 258
 259    3) Delete the redundant instructions
 260
 261    4) Insert the required copies [if any] that make the partially
 262       redundant instructions fully redundant.
 263
 264    5) For other reaching expressions, insert an instruction to copy the value
 265       to a newly created pseudo that will reach the redundant instruction.
 266
 267    The deletion is done first so that when we do insertions we
 268    know which pseudo reg to use.
 269
 270    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 271    argue it is not.  The number of iterations for the algorithm to converge
 272    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 273
 274    PRE GCSE depends heavily on the second CSE pass to clean up the copies
 275    we create.  To make an expression reach the place where it's redundant,
 276    the result of the expression is copied to a new register, and the redundant
 277    expression is deleted by replacing it with this new register.  Classic GCSE
 278    doesn't have this problem as much as it computes the reaching defs of
 279    each register in each block and thus can try to use an existing
 280    register.  */
 281 \f
 282 /* GCSE global vars.  */
 283
 284 /* Set to non-zero if CSE should run after all GCSE optimizations are done.  */
 285 int flag_rerun_cse_after_global_opts;
 286
 287 /* An obstack for our working variables.  */
 288 static struct obstack gcse_obstack;
 289
 290 struct reg_use {rtx reg_rtx; };
 291
 292 /* Hash table of expressions.  */
 293
 294 struct expr
 295 {
 296   /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
 297   rtx expr;
 298   /* Index in the available expression bitmaps.  */
 299   int bitmap_index;
 300   /* Next entry with the same hash.  */
 301   struct expr *next_same_hash;
 302   /* List of anticipatable occurrences in basic blocks in the function.
 303      An "anticipatable occurrence" is one that is the first occurrence in the
 304      basic block, the operands are not modified in the basic block prior
 305      to the occurrence and the output is not used between the start of
 306      the block and the occurrence.  */
 307   struct occr *antic_occr;
 308   /* List of available occurrence in basic blocks in the function.
 309      An "available occurrence" is one that is the last occurrence in the
 310      basic block and the operands are not modified by following statements in
 311      the basic block [including this insn].  */
 312   struct occr *avail_occr;
 313   /* Non-null if the computation is PRE redundant.
 314      The value is the newly created pseudo-reg to record a copy of the
 315      expression in all the places that reach the redundant copy.  */
 316   rtx reaching_reg;
 317 };
 318
 319 /* Occurrence of an expression.
 320    There is one per basic block.  If a pattern appears more than once the
 321    last appearance is used [or first for anticipatable expressions].  */
 322
 323 struct occr
 324 {
 325   /* Next occurrence of this expression.  */
 326   struct occr *next;
 327   /* The insn that computes the expression.  */
 328   rtx insn;
 329   /* Nonzero if this [anticipatable] occurrence has been deleted.  */
 330   char deleted_p;
 331   /* Nonzero if this [available] occurrence has been copied to
 332      reaching_reg.  */
 333   /* ??? This is mutually exclusive with deleted_p, so they could share
 334      the same byte.  */
 335   char copied_p;
 336 };
 337
 338 /* Expression and copy propagation hash tables.
 339    Each hash table is an array of buckets.
 340    ??? It is known that if it were an array of entries, structure elements
 341    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 342    not clear whether in the final analysis a sufficient amount of memory would
 343    be saved as the size of the available expression bitmaps would be larger
 344    [one could build a mapping table without holes afterwards though].
 345    Someday I'll perform the computation and figure it out.  */
 346
 347 struct hash_table_d
 348 {
 349   /* The table itself.
 350      This is an array of `expr_hash_table_size' elements.  */
 351   struct expr **table;
 352
 353   /* Size of the hash table, in elements.  */
 354   unsigned int size;
 355
 356   /* Number of hash table elements.  */
 357   unsigned int n_elems;
 358
 359   /* Whether the table is expression of copy propagation one.  */
 360   int set_p;
 361 };
 362
 363 /* Expression hash table.  */
 364 static struct hash_table_d expr_hash_table;
 365
 366 /* Copy propagation hash table.  */
 367 static struct hash_table_d set_hash_table;
 368
 369 /* This is a list of expressions which are MEMs and will be used by load
 370    or store motion.
 371    Load motion tracks MEMs which aren't killed by
 372    anything except itself. (i.e., loads and stores to a single location).
 373    We can then allow movement of these MEM refs with a little special
 374    allowance. (all stores copy the same value to the reaching reg used
 375    for the loads).  This means all values used to store into memory must have
 376    no side effects so we can re-issue the setter value.
 377    Store Motion uses this structure as an expression table to track stores
 378    which look interesting, and might be moveable towards the exit block.  */
 379
 380 struct ls_expr
 381 {
 382   struct expr * expr;           /* Gcse expression reference for LM.  */
 383   rtx pattern;                  /* Pattern of this mem.  */
 384   rtx pattern_regs;             /* List of registers mentioned by the mem.  */
 385   rtx loads;                    /* INSN list of loads seen.  */
 386   rtx stores;                   /* INSN list of stores seen.  */
 387   struct ls_expr * next;        /* Next in the list.  */
 388   int invalid;                  /* Invalid for some reason.  */
 389   int index;                    /* If it maps to a bitmap index.  */
 390   unsigned int hash_index;      /* Index when in a hash table.  */
 391   rtx reaching_reg;             /* Register to use when re-writing.  */
 392 };
 393
 394 /* Array of implicit set patterns indexed by basic block index.  */
 395 static rtx *implicit_sets;
 396
 397 /* Head of the list of load/store memory refs.  */
 398 static struct ls_expr * pre_ldst_mems = NULL;
 399
 400 /* Hashtable for the load/store memory refs.  */
 401 static htab_t pre_ldst_table = NULL;
 402
 403 /* Bitmap containing one bit for each register in the program.
 404    Used when performing GCSE to track which registers have been set since
 405    the start of the basic block.  */
 406 static regset reg_set_bitmap;
 407
 408 /* Array, indexed by basic block number for a list of insns which modify
 409    memory within that block.  */
 410 static rtx * modify_mem_list;
 411 static bitmap modify_mem_list_set;
 412
 413 /* This array parallels modify_mem_list, but is kept canonicalized.  */
 414 static rtx * canon_modify_mem_list;
 415
 416 /* Bitmap indexed by block numbers to record which blocks contain
 417    function calls.  */
 418 static bitmap blocks_with_calls;
 419
 420 /* Various variables for statistics gathering.  */
 421
 422 /* Memory used in a pass.
 423    This isn't intended to be absolutely precise.  Its intent is only
 424    to keep an eye on memory usage.  */
 425 static int bytes_used;
 426
 427 /* GCSE substitutions made.  */
 428 static int gcse_subst_count;
 429 /* Number of copy instructions created.  */
 430 static int gcse_create_count;
 431 /* Number of local constants propagated.  */
 432 static int local_const_prop_count;
 433 /* Number of local copies propagated.  */
 434 static int local_copy_prop_count;
 435 /* Number of global constants propagated.  */
 436 static int global_const_prop_count;
 437 /* Number of global copies propagated.  */
 438 static int global_copy_prop_count;
 439 \f
 440 /* For available exprs */
 441 static sbitmap *ae_kill;
 442 \f
 443 static void compute_can_copy (void);
 444 static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
 445 static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
 446 static void *gcse_alloc (unsigned long);
 447 static void alloc_gcse_mem (void);
 448 static void free_gcse_mem (void);
 449 static void hash_scan_insn (rtx, struct hash_table_d *);
 450 static void hash_scan_set (rtx, rtx, struct hash_table_d *);
 451 static void hash_scan_clobber (rtx, rtx, struct hash_table_d *);
 452 static void hash_scan_call (rtx, rtx, struct hash_table_d *);
 453 static int want_to_gcse_p (rtx);
 454 static bool gcse_constant_p (const_rtx);
 455 static int oprs_unchanged_p (const_rtx, const_rtx, int);
 456 static int oprs_anticipatable_p (const_rtx, const_rtx);
 457 static int oprs_available_p (const_rtx, const_rtx);
 458 static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int,
 459                                   struct hash_table_d *);
 460 static void insert_set_in_table (rtx, rtx, struct hash_table_d *);
 461 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
 462 static unsigned int hash_set (int, int);
 463 static int expr_equiv_p (const_rtx, const_rtx);
 464 static void record_last_reg_set_info (rtx, int);
 465 static void record_last_mem_set_info (rtx);
 466 static void record_last_set_info (rtx, const_rtx, void *);
 467 static void compute_hash_table (struct hash_table_d *);
 468 static void alloc_hash_table (struct hash_table_d *, int);
 469 static void free_hash_table (struct hash_table_d *);
 470 static void compute_hash_table_work (struct hash_table_d *);
 471 static void dump_hash_table (FILE *, const char *, struct hash_table_d *);
 472 static struct expr *lookup_set (unsigned int, struct hash_table_d *);
 473 static struct expr *next_set (unsigned int, struct expr *);
 474 static void reset_opr_set_tables (void);
 475 static int oprs_not_set_p (const_rtx, const_rtx);
 476 static void mark_call (rtx);
 477 static void mark_set (rtx, rtx);
 478 static void mark_clobber (rtx, rtx);
 479 static void mark_oprs_set (rtx);
 480 static void alloc_cprop_mem (int, int);
 481 static void free_cprop_mem (void);
 482 static void compute_transp (const_rtx, int, sbitmap *, int);
 483 static void compute_transpout (void);
 484 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
 485                                       struct hash_table_d *);
 486 static void compute_cprop_data (void);
 487 static void find_used_regs (rtx *, void *);
 488 static int try_replace_reg (rtx, rtx, rtx);
 489 static struct expr *find_avail_set (int, rtx);
 490 static int cprop_jump (basic_block, rtx, rtx, rtx, rtx);
 491 static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
 492 static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
 493 static void canon_list_insert (rtx, const_rtx, void *);
 494 static int cprop_insn (rtx);
 495 static void find_implicit_sets (void);
 496 static int one_cprop_pass (void);
 497 static bool constprop_register (rtx, rtx, rtx);
 498 static struct expr *find_bypass_set (int, int);
 499 static bool reg_killed_on_edge (const_rtx, const_edge);
 500 static int bypass_block (basic_block, rtx, rtx);
 501 static int bypass_conditional_jumps (void);
 502 static void alloc_pre_mem (int, int);
 503 static void free_pre_mem (void);
 504 static void compute_pre_data (void);
 505 static int pre_expr_reaches_here_p (basic_block, struct expr *,
 506                                     basic_block);
 507 static void insert_insn_end_basic_block (struct expr *, basic_block, int);
 508 static void pre_insert_copy_insn (struct expr *, rtx);
 509 static void pre_insert_copies (void);
 510 static int pre_delete (void);
 511 static int pre_gcse (void);
 512 static int one_pre_gcse_pass (void);
 513 static void add_label_notes (rtx, rtx);
 514 static void alloc_code_hoist_mem (int, int);
 515 static void free_code_hoist_mem (void);
 516 static void compute_code_hoist_vbeinout (void);
 517 static void compute_code_hoist_data (void);
 518 static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *);
 519 static int hoist_code (void);
 520 static int one_code_hoisting_pass (void);
 521 static rtx process_insert_insn (struct expr *);
 522 static int pre_edge_insert (struct edge_list *, struct expr **);
 523 static int pre_expr_reaches_here_p_work (basic_block, struct expr *,
 524                                          basic_block, char *);
 525 static struct ls_expr * ldst_entry (rtx);
 526 static void free_ldst_entry (struct ls_expr *);
 527 static void free_ldst_mems (void);
 528 static void print_ldst_list (FILE *);
 529 static struct ls_expr * find_rtx_in_ldst (rtx);
 530 static inline struct ls_expr * first_ls_expr (void);
 531 static inline struct ls_expr * next_ls_expr (struct ls_expr *);
 532 static int simple_mem (const_rtx);
 533 static void invalidate_any_buried_refs (rtx);
 534 static void compute_ld_motion_mems (void);
 535 static void trim_ld_motion_mems (void);
 536 static void update_ld_motion_stores (struct expr *);
 537 static void free_insn_expr_list_list (rtx *);
 538 static void clear_modify_mem_tables (void);
 539 static void free_modify_mem_tables (void);
 540 static rtx gcse_emit_move_after (rtx, rtx, rtx);
 541 static void local_cprop_find_used_regs (rtx *, void *);
 542 static bool do_local_cprop (rtx, rtx);
 543 static int local_cprop_pass (void);
 544 static bool is_too_expensive (const char *);
 545
 546 #define GNEW(T)                 ((T *) gmalloc (sizeof (T)))
 547 #define GCNEW(T)                ((T *) gcalloc (1, sizeof (T)))
 548
 549 #define GNEWVEC(T, N)           ((T *) gmalloc (sizeof (T) * (N)))
 550 #define GCNEWVEC(T, N)          ((T *) gcalloc ((N), sizeof (T)))
 551
 552 #define GNEWVAR(T, S)           ((T *) gmalloc ((S)))
 553 #define GCNEWVAR(T, S)          ((T *) gcalloc (1, (S)))
 554
 555 #define GOBNEW(T)               ((T *) gcse_alloc (sizeof (T)))
 556 #define GOBNEWVAR(T, S)         ((T *) gcse_alloc ((S)))
 557 \f
 558 /* Misc. utilities.  */
 559
 560 /* Nonzero for each mode that supports (set (reg) (reg)).
 561    This is trivially true for integer and floating point values.
 562    It may or may not be true for condition codes.  */
 563 static char can_copy[(int) NUM_MACHINE_MODES];
 564
 565 /* Compute which modes support reg/reg copy operations.  */
 566
 567 static void
 568 compute_can_copy (void)
 569 {
 570   int i;
 571 #ifndef AVOID_CCMODE_COPIES
 572   rtx reg, insn;
 573 #endif
 574   memset (can_copy, 0, NUM_MACHINE_MODES);
 575
 576   start_sequence ();
 577   for (i = 0; i < NUM_MACHINE_MODES; i++)
 578     if (GET_MODE_CLASS (i) == MODE_CC)
 579       {
 580 #ifdef AVOID_CCMODE_COPIES
 581         can_copy[i] = 0;
 582 #else
 583         reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 584         insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 585         if (recog (PATTERN (insn), insn, NULL) >= 0)
 586           can_copy[i] = 1;
 587 #endif
 588       }
 589     else
 590       can_copy[i] = 1;
 591
 592   end_sequence ();
 593 }
 594
 595 /* Returns whether the mode supports reg/reg copy operations.  */
 596
 597 bool
 598 can_copy_p (enum machine_mode mode)
 599 {
 600   static bool can_copy_init_p = false;
 601
 602   if (! can_copy_init_p)
 603     {
 604       compute_can_copy ();
 605       can_copy_init_p = true;
 606     }
 607
 608   return can_copy[mode] != 0;
 609 }
 610
 611 \f
 612 /* Cover function to xmalloc to record bytes allocated.  */
 613
 614 static void *
 615 gmalloc (size_t size)
 616 {
 617   bytes_used += size;
 618   return xmalloc (size);
 619 }
 620
 621 /* Cover function to xcalloc to record bytes allocated.  */
 622
 623 static void *
 624 gcalloc (size_t nelem, size_t elsize)
 625 {
 626   bytes_used += nelem * elsize;
 627   return xcalloc (nelem, elsize);
 628 }
 629
 630 /* Cover function to obstack_alloc.  */
 631
 632 static void *
 633 gcse_alloc (unsigned long size)
 634 {
 635   bytes_used += size;
 636   return obstack_alloc (&gcse_obstack, size);
 637 }
 638
 639 /* Allocate memory for the reg/memory set tracking tables.
 640    This is called at the start of each pass.  */
 641
 642 static void
 643 alloc_gcse_mem (void)
 644 {
 645   /* Allocate vars to track sets of regs.  */
 646   reg_set_bitmap = BITMAP_ALLOC (NULL);
 647
 648   /* Allocate array to keep a list of insns which modify memory in each
 649      basic block.  */
 650   modify_mem_list = GCNEWVEC (rtx, last_basic_block);
 651   canon_modify_mem_list = GCNEWVEC (rtx, last_basic_block);
 652   modify_mem_list_set = BITMAP_ALLOC (NULL);
 653   blocks_with_calls = BITMAP_ALLOC (NULL);
 654 }
 655
 656 /* Free memory allocated by alloc_gcse_mem.  */
 657
 658 static void
 659 free_gcse_mem (void)
 660 {
 661   free_modify_mem_tables ();
 662   BITMAP_FREE (modify_mem_list_set);
 663   BITMAP_FREE (blocks_with_calls);
 664 }
 665 \f
 666 /* Compute the local properties of each recorded expression.
 667
 668    Local properties are those that are defined by the block, irrespective of
 669    other blocks.
 670
 671    An expression is transparent in a block if its operands are not modified
 672    in the block.
 673
 674    An expression is computed (locally available) in a block if it is computed
 675    at least once and expression would contain the same value if the
 676    computation was moved to the end of the block.
 677
 678    An expression is locally anticipatable in a block if it is computed at
 679    least once and expression would contain the same value if the computation
 680    was moved to the beginning of the block.
 681
 682    We call this routine for cprop, pre and code hoisting.  They all compute
 683    basically the same information and thus can easily share this code.
 684
 685    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
 686    properties.  If NULL, then it is not necessary to compute or record that
 687    particular property.
 688
 689    TABLE controls which hash table to look at.  If it is  set hash table,
 690    additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
 691    ABSALTERED.  */
 692
 693 static void
 694 compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
 695                           struct hash_table_d *table)
 696 {
 697   unsigned int i;
 698
 699   /* Initialize any bitmaps that were passed in.  */
 700   if (transp)
 701     {
 702       if (table->set_p)
 703         sbitmap_vector_zero (transp, last_basic_block);
 704       else
 705         sbitmap_vector_ones (transp, last_basic_block);
 706     }
 707
 708   if (comp)
 709     sbitmap_vector_zero (comp, last_basic_block);
 710   if (antloc)
 711     sbitmap_vector_zero (antloc, last_basic_block);
 712
 713   for (i = 0; i < table->size; i++)
 714     {
 715       struct expr *expr;
 716
 717       for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
 718         {
 719           int indx = expr->bitmap_index;
 720           struct occr *occr;
 721
 722           /* The expression is transparent in this block if it is not killed.
 723              We start by assuming all are transparent [none are killed], and
 724              then reset the bits for those that are.  */
 725           if (transp)
 726             compute_transp (expr->expr, indx, transp, table->set_p);
 727
 728           /* The occurrences recorded in antic_occr are exactly those that
 729              we want to set to nonzero in ANTLOC.  */
 730           if (antloc)
 731             for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
 732               {
 733                 SET_BIT (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
 734
 735                 /* While we're scanning the table, this is a good place to
 736                    initialize this.  */
 737                 occr->deleted_p = 0;
 738               }
 739
 740           /* The occurrences recorded in avail_occr are exactly those that
 741              we want to set to nonzero in COMP.  */
 742           if (comp)
 743             for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
 744               {
 745                 SET_BIT (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
 746
 747                 /* While we're scanning the table, this is a good place to
 748                    initialize this.  */
 749                 occr->copied_p = 0;
 750               }
 751
 752           /* While we're scanning the table, this is a good place to
 753              initialize this.  */
 754           expr->reaching_reg = 0;
 755         }
 756     }
 757 }
 758 \f
 759 /* Hash table support.  */
 760
 761 struct reg_avail_info
 762 {
 763   basic_block last_bb;
 764   int first_set;
 765   int last_set;
 766 };
 767
 768 static struct reg_avail_info *reg_avail_info;
 769 static basic_block current_bb;
 770
 771
 772 /* See whether X, the source of a set, is something we want to consider for
 773    GCSE.  */
 774
 775 static int
 776 want_to_gcse_p (rtx x)
 777 {
 778 #ifdef STACK_REGS
 779   /* On register stack architectures, don't GCSE constants from the
 780      constant pool, as the benefits are often swamped by the overhead
 781      of shuffling the register stack between basic blocks.  */
 782   if (IS_STACK_MODE (GET_MODE (x)))
 783     x = avoid_constant_pool_reference (x);
 784 #endif
 785
 786   switch (GET_CODE (x))
 787     {
 788     case REG:
 789     case SUBREG:
 790     case CONST_INT:
 791     case CONST_DOUBLE:
 792     case CONST_FIXED:
 793     case CONST_VECTOR:
 794     case CALL:
 795       return 0;
 796
 797     default:
 798       return can_assign_to_reg_without_clobbers_p (x);
 799     }
 800 }
 801
 802 /* Used internally by can_assign_to_reg_without_clobbers_p.  */
 803
 804 static GTY(()) rtx test_insn;
 805
 806 /* Return true if we can assign X to a pseudo register such that the
 807    resulting insn does not result in clobbering a hard register as a
 808    side-effect.
 809
 810    Additionally, if the target requires it, check that the resulting insn
 811    can be copied.  If it cannot, this means that X is special and probably
 812    has hidden side-effects we don't want to mess with.
 813
 814    This function is typically used by code motion passes, to verify
 815    that it is safe to insert an insn without worrying about clobbering
 816    maybe live hard regs.  */
 817
 818 bool
 819 can_assign_to_reg_without_clobbers_p (rtx x)
 820 {
 821   int num_clobbers = 0;
 822   int icode;
 823
 824   /* If this is a valid operand, we are OK.  If it's VOIDmode, we aren't.  */
 825   if (general_operand (x, GET_MODE (x)))
 826     return 1;
 827   else if (GET_MODE (x) == VOIDmode)
 828     return 0;
 829
 830   /* Otherwise, check if we can make a valid insn from it.  First initialize
 831      our test insn if we haven't already.  */
 832   if (test_insn == 0)
 833     {
 834       test_insn
 835         = make_insn_raw (gen_rtx_SET (VOIDmode,
 836                                       gen_rtx_REG (word_mode,
 837                                                    FIRST_PSEUDO_REGISTER * 2),
 838                                       const0_rtx));
 839       NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
 840     }
 841
 842   /* Now make an insn like the one we would make when GCSE'ing and see if
 843      valid.  */
 844   PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
 845   SET_SRC (PATTERN (test_insn)) = x;
 846
 847   icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
 848   if (icode < 0)
 849     return false;
 850
 851   if (num_clobbers > 0 && added_clobbers_hard_reg_p (icode))
 852     return false;
 853
 854   if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (test_insn))
 855     return false;
 856
 857   return true;
 858 }
 859
 860 /* Return nonzero if the operands of expression X are unchanged from the
 861    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
 862    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
 863
 864 static int
 865 oprs_unchanged_p (const_rtx x, const_rtx insn, int avail_p)
 866 {
 867   int i, j;
 868   enum rtx_code code;
 869   const char *fmt;
 870
 871   if (x == 0)
 872     return 1;
 873
 874   code = GET_CODE (x);
 875   switch (code)
 876     {
 877     case REG:
 878       {
 879         struct reg_avail_info *info = &reg_avail_info[REGNO (x)];
 880
 881         if (info->last_bb != current_bb)
 882           return 1;
 883         if (avail_p)
 884           return info->last_set < DF_INSN_LUID (insn);
 885         else
 886           return info->first_set >= DF_INSN_LUID (insn);
 887       }
 888
 889     case MEM:
 890       if (load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
 891                                   x, avail_p))
 892         return 0;
 893       else
 894         return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
 895
 896     case PRE_DEC:
 897     case PRE_INC:
 898     case POST_DEC:
 899     case POST_INC:
 900     case PRE_MODIFY:
 901     case POST_MODIFY:
 902       return 0;
 903
 904     case PC:
 905     case CC0: /*FIXME*/
 906     case CONST:
 907     case CONST_INT:
 908     case CONST_DOUBLE:
 909     case CONST_FIXED:
 910     case CONST_VECTOR:
 911     case SYMBOL_REF:
 912     case LABEL_REF:
 913     case ADDR_VEC:
 914     case ADDR_DIFF_VEC:
 915       return 1;
 916
 917     default:
 918       break;
 919     }
 920
 921   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
 922     {
 923       if (fmt[i] == 'e')
 924         {
 925           /* If we are about to do the last recursive call needed at this
 926              level, change it into iteration.  This function is called enough
 927              to be worth it.  */
 928           if (i == 0)
 929             return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
 930
 931           else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
 932             return 0;
 933         }
 934       else if (fmt[i] == 'E')
 935         for (j = 0; j < XVECLEN (x, i); j++)
 936           if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
 937             return 0;
 938     }
 939
 940   return 1;
 941 }
 942
 943 /* Used for communication between mems_conflict_for_gcse_p and
 944    load_killed_in_block_p.  Nonzero if mems_conflict_for_gcse_p finds a
 945    conflict between two memory references.  */
 946 static int gcse_mems_conflict_p;
 947
 948 /* Used for communication between mems_conflict_for_gcse_p and
 949    load_killed_in_block_p.  A memory reference for a load instruction,
 950    mems_conflict_for_gcse_p will see if a memory store conflicts with
 951    this memory load.  */
 952 static const_rtx gcse_mem_operand;
 953
 954 /* DEST is the output of an instruction.  If it is a memory reference, and
 955    possibly conflicts with the load found in gcse_mem_operand, then set
 956    gcse_mems_conflict_p to a nonzero value.  */
 957
 958 static void
 959 mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
 960                           void *data ATTRIBUTE_UNUSED)
 961 {
 962   while (GET_CODE (dest) == SUBREG
 963          || GET_CODE (dest) == ZERO_EXTRACT
 964          || GET_CODE (dest) == STRICT_LOW_PART)
 965     dest = XEXP (dest, 0);
 966
 967   /* If DEST is not a MEM, then it will not conflict with the load.  Note
 968      that function calls are assumed to clobber memory, but are handled
 969      elsewhere.  */
 970   if (! MEM_P (dest))
 971     return;
 972
 973   /* If we are setting a MEM in our list of specially recognized MEMs,
 974      don't mark as killed this time.  */
 975
 976   if (expr_equiv_p (dest, gcse_mem_operand) && pre_ldst_mems != NULL)
 977     {
 978       if (!find_rtx_in_ldst (dest))
 979         gcse_mems_conflict_p = 1;
 980       return;
 981     }
 982
 983   if (true_dependence (dest, GET_MODE (dest), gcse_mem_operand,
 984                        rtx_addr_varies_p))
 985     gcse_mems_conflict_p = 1;
 986 }
 987
 988 /* Return nonzero if the expression in X (a memory reference) is killed
 989    in block BB before or after the insn with the LUID in UID_LIMIT.
 990    AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
 991    before UID_LIMIT.
 992
 993    To check the entire block, set UID_LIMIT to max_uid + 1 and
 994    AVAIL_P to 0.  */
 995
 996 static int
 997 load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x, int avail_p)
 998 {
 999   rtx list_entry = modify_mem_list[bb->index];
1000
1001   /* If this is a readonly then we aren't going to be changing it.  */
1002   if (MEM_READONLY_P (x))
1003     return 0;
1004
1005   while (list_entry)
1006     {
1007       rtx setter;
1008       /* Ignore entries in the list that do not apply.  */
1009       if ((avail_p
1010            && DF_INSN_LUID (XEXP (list_entry, 0)) < uid_limit)
1011           || (! avail_p
1012               && DF_INSN_LUID (XEXP (list_entry, 0)) > uid_limit))
1013         {
1014           list_entry = XEXP (list_entry, 1);
1015           continue;
1016         }
1017
1018       setter = XEXP (list_entry, 0);
1019
1020       /* If SETTER is a call everything is clobbered.  Note that calls
1021          to pure functions are never put on the list, so we need not
1022          worry about them.  */
1023       if (CALL_P (setter))
1024         return 1;
1025
1026       /* SETTER must be an INSN of some kind that sets memory.  Call
1027          note_stores to examine each hunk of memory that is modified.
1028
1029          The note_stores interface is pretty limited, so we have to
1030          communicate via global variables.  Yuk.  */
1031       gcse_mem_operand = x;
1032       gcse_mems_conflict_p = 0;
1033       note_stores (PATTERN (setter), mems_conflict_for_gcse_p, NULL);
1034       if (gcse_mems_conflict_p)
1035         return 1;
1036       list_entry = XEXP (list_entry, 1);
1037     }
1038   return 0;
1039 }
1040
1041 /* Return nonzero if the operands of expression X are unchanged from
1042    the start of INSN's basic block up to but not including INSN.  */
1043
1044 static int
1045 oprs_anticipatable_p (const_rtx x, const_rtx insn)
1046 {
1047   return oprs_unchanged_p (x, insn, 0);
1048 }
1049
1050 /* Return nonzero if the operands of expression X are unchanged from
1051    INSN to the end of INSN's basic block.  */
1052
1053 static int
1054 oprs_available_p (const_rtx x, const_rtx insn)
1055 {
1056   return oprs_unchanged_p (x, insn, 1);
1057 }
1058
1059 /* Hash expression X.
1060
1061    MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
1062    indicating if a volatile operand is found or if the expression contains
1063    something we don't want to insert in the table.  HASH_TABLE_SIZE is
1064    the current size of the hash table to be probed.  */
1065
1066 static unsigned int
1067 hash_expr (const_rtx x, enum machine_mode mode, int *do_not_record_p,
1068            int hash_table_size)
1069 {
1070   unsigned int hash;
1071
1072   *do_not_record_p = 0;
1073
1074   hash = hash_rtx (x, mode, do_not_record_p,
1075                    NULL,  /*have_reg_qty=*/false);
1076   return hash % hash_table_size;
1077 }
1078
1079 /* Hash a set of register REGNO.
1080
1081    Sets are hashed on the register that is set.  This simplifies the PRE copy
1082    propagation code.
1083
1084    ??? May need to make things more elaborate.  Later, as necessary.  */
1085
1086 static unsigned int
1087 hash_set (int regno, int hash_table_size)
1088 {
1089   unsigned int hash;
1090
1091   hash = regno;
1092   return hash % hash_table_size;
1093 }
1094
1095 /* Return nonzero if exp1 is equivalent to exp2.  */
1096
1097 static int
1098 expr_equiv_p (const_rtx x, const_rtx y)
1099 {
1100   return exp_equiv_p (x, y, 0, true);
1101 }
1102
1103 /* Insert expression X in INSN in the hash TABLE.
1104    If it is already present, record it as the last occurrence in INSN's
1105    basic block.
1106
1107    MODE is the mode of the value X is being stored into.
1108    It is only used if X is a CONST_INT.
1109
1110    ANTIC_P is nonzero if X is an anticipatable expression.
1111    AVAIL_P is nonzero if X is an available expression.  */
1112
1113 static void
1114 insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
1115                       int avail_p, struct hash_table_d *table)
1116 {
1117   int found, do_not_record_p;
1118   unsigned int hash;
1119   struct expr *cur_expr, *last_expr = NULL;
1120   struct occr *antic_occr, *avail_occr;
1121
1122   hash = hash_expr (x, mode, &do_not_record_p, table->size);
1123
1124   /* Do not insert expression in table if it contains volatile operands,
1125      or if hash_expr determines the expression is something we don't want
1126      to or can't handle.  */
1127   if (do_not_record_p)
1128     return;
1129
1130   cur_expr = table->table[hash];
1131   found = 0;
1132
1133   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1134     {
1135       /* If the expression isn't found, save a pointer to the end of
1136          the list.  */
1137       last_expr = cur_expr;
1138       cur_expr = cur_expr->next_same_hash;
1139     }
1140
1141   if (! found)
1142     {
1143       cur_expr = GOBNEW (struct expr);
1144       bytes_used += sizeof (struct expr);
1145       if (table->table[hash] == NULL)
1146         /* This is the first pattern that hashed to this index.  */
1147         table->table[hash] = cur_expr;
1148       else
1149         /* Add EXPR to end of this hash chain.  */
1150         last_expr->next_same_hash = cur_expr;
1151
1152       /* Set the fields of the expr element.  */
1153       cur_expr->expr = x;
1154       cur_expr->bitmap_index = table->n_elems++;
1155       cur_expr->next_same_hash = NULL;
1156       cur_expr->antic_occr = NULL;
1157       cur_expr->avail_occr = NULL;
1158     }
1159
1160   /* Now record the occurrence(s).  */
1161   if (antic_p)
1162     {
1163       antic_occr = cur_expr->antic_occr;
1164
1165       if (antic_occr
1166           && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1167         antic_occr = NULL;
1168
1169       if (antic_occr)
1170         /* Found another instance of the expression in the same basic block.
1171            Prefer the currently recorded one.  We want the first one in the
1172            block and the block is scanned from start to end.  */
1173         ; /* nothing to do */
1174       else
1175         {
1176           /* First occurrence of this expression in this basic block.  */
1177           antic_occr = GOBNEW (struct occr);
1178           bytes_used += sizeof (struct occr);
1179           antic_occr->insn = insn;
1180           antic_occr->next = cur_expr->antic_occr;
1181           antic_occr->deleted_p = 0;
1182           cur_expr->antic_occr = antic_occr;
1183         }
1184     }
1185
1186   if (avail_p)
1187     {
1188       avail_occr = cur_expr->avail_occr;
1189
1190       if (avail_occr
1191           && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1192         {
1193           /* Found another instance of the expression in the same basic block.
1194              Prefer this occurrence to the currently recorded one.  We want
1195              the last one in the block and the block is scanned from start
1196              to end.  */
1197           avail_occr->insn = insn;
1198         }
1199       else
1200         {
1201           /* First occurrence of this expression in this basic block.  */
1202           avail_occr = GOBNEW (struct occr);
1203           bytes_used += sizeof (struct occr);
1204           avail_occr->insn = insn;
1205           avail_occr->next = cur_expr->avail_occr;
1206           avail_occr->deleted_p = 0;
1207           cur_expr->avail_occr = avail_occr;
1208         }
1209     }
1210 }
1211
1212 /* Insert pattern X in INSN in the hash table.
1213    X is a SET of a reg to either another reg or a constant.
1214    If it is already present, record it as the last occurrence in INSN's
1215    basic block.  */
1216
1217 static void
1218 insert_set_in_table (rtx x, rtx insn, struct hash_table_d *table)
1219 {
1220   int found;
1221   unsigned int hash;
1222   struct expr *cur_expr, *last_expr = NULL;
1223   struct occr *cur_occr;
1224
1225   gcc_assert (GET_CODE (x) == SET && REG_P (SET_DEST (x)));
1226
1227   hash = hash_set (REGNO (SET_DEST (x)), table->size);
1228
1229   cur_expr = table->table[hash];
1230   found = 0;
1231
1232   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1233     {
1234       /* If the expression isn't found, save a pointer to the end of
1235          the list.  */
1236       last_expr = cur_expr;
1237       cur_expr = cur_expr->next_same_hash;
1238     }
1239
1240   if (! found)
1241     {
1242       cur_expr = GOBNEW (struct expr);
1243       bytes_used += sizeof (struct expr);
1244       if (table->table[hash] == NULL)
1245         /* This is the first pattern that hashed to this index.  */
1246         table->table[hash] = cur_expr;
1247       else
1248         /* Add EXPR to end of this hash chain.  */
1249         last_expr->next_same_hash = cur_expr;
1250
1251       /* Set the fields of the expr element.
1252          We must copy X because it can be modified when copy propagation is
1253          performed on its operands.  */
1254       cur_expr->expr = copy_rtx (x);
1255       cur_expr->bitmap_index = table->n_elems++;
1256       cur_expr->next_same_hash = NULL;
1257       cur_expr->antic_occr = NULL;
1258       cur_expr->avail_occr = NULL;
1259     }
1260
1261   /* Now record the occurrence.  */
1262   cur_occr = cur_expr->avail_occr;
1263
1264   if (cur_occr
1265       && BLOCK_FOR_INSN (cur_occr->insn) == BLOCK_FOR_INSN (insn))
1266     {
1267       /* Found another instance of the expression in the same basic block.
1268          Prefer this occurrence to the currently recorded one.  We want
1269          the last one in the block and the block is scanned from start
1270          to end.  */
1271       cur_occr->insn = insn;
1272     }
1273   else
1274     {
1275       /* First occurrence of this expression in this basic block.  */
1276       cur_occr = GOBNEW (struct occr);
1277       bytes_used += sizeof (struct occr);
1278       cur_occr->insn = insn;
1279       cur_occr->next = cur_expr->avail_occr;
1280       cur_occr->deleted_p = 0;
1281       cur_expr->avail_occr = cur_occr;
1282     }
1283 }
1284
1285 /* Determine whether the rtx X should be treated as a constant for
1286    the purposes of GCSE's constant propagation.  */
1287
1288 static bool
1289 gcse_constant_p (const_rtx x)
1290 {
1291   /* Consider a COMPARE of two integers constant.  */
1292   if (GET_CODE (x) == COMPARE
1293       && CONST_INT_P (XEXP (x, 0))
1294       && CONST_INT_P (XEXP (x, 1)))
1295     return true;
1296
1297   /* Consider a COMPARE of the same registers is a constant
1298      if they are not floating point registers.  */
1299   if (GET_CODE(x) == COMPARE
1300       && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1))
1301       && REGNO (XEXP (x, 0)) == REGNO (XEXP (x, 1))
1302       && ! FLOAT_MODE_P (GET_MODE (XEXP (x, 0)))
1303       && ! FLOAT_MODE_P (GET_MODE (XEXP (x, 1))))
1304     return true;
1305
1306   /* Since X might be inserted more than once we have to take care that it
1307      is sharable.  */
1308   return CONSTANT_P (x) && (GET_CODE (x) != CONST || shared_const_p (x));
1309 }
1310
1311 /* Scan pattern PAT of INSN and add an entry to the hash TABLE (set or
1312    expression one).  */
1313
1314 static void
1315 hash_scan_set (rtx pat, rtx insn, struct hash_table_d *table)
1316 {
1317   rtx src = SET_SRC (pat);
1318   rtx dest = SET_DEST (pat);
1319   rtx note;
1320
1321   if (GET_CODE (src) == CALL)
1322     hash_scan_call (src, insn, table);
1323
1324   else if (REG_P (dest))
1325     {
1326       unsigned int regno = REGNO (dest);
1327       rtx tmp;
1328
1329       /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1330
1331          This allows us to do a single GCSE pass and still eliminate
1332          redundant constants, addresses or other expressions that are
1333          constructed with multiple instructions.
1334
1335          However, keep the original SRC if INSN is a simple reg-reg move.  In
1336          In this case, there will almost always be a REG_EQUAL note on the
1337          insn that sets SRC.  By recording the REG_EQUAL value here as SRC
1338          for INSN, we miss copy propagation opportunities and we perform the
1339          same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1340          do more than one PRE GCSE pass.
1341
1342          Note that this does not impede profitable constant propagations.  We
1343          "look through" reg-reg sets in lookup_avail_set.  */
1344       note = find_reg_equal_equiv_note (insn);
1345       if (note != 0
1346           && REG_NOTE_KIND (note) == REG_EQUAL
1347           && !REG_P (src)
1348           && (table->set_p
1349               ? gcse_constant_p (XEXP (note, 0))
1350               : want_to_gcse_p (XEXP (note, 0))))
1351         src = XEXP (note, 0), pat = gen_rtx_SET (VOIDmode, dest, src);
1352
1353       /* Only record sets of pseudo-regs in the hash table.  */
1354       if (! table->set_p
1355           && regno >= FIRST_PSEUDO_REGISTER
1356           /* Don't GCSE something if we can't do a reg/reg copy.  */
1357           && can_copy_p (GET_MODE (dest))
1358           /* GCSE commonly inserts instruction after the insn.  We can't
1359              do that easily for EH edges so disable GCSE on these for now.  */
1360           /* ??? We can now easily create new EH landing pads at the
1361              gimple level, for splitting edges; there's no reason we
1362              can't do the same thing at the rtl level.  */
1363           && !can_throw_internal (insn)
1364           /* Is SET_SRC something we want to gcse?  */
1365           && want_to_gcse_p (src)
1366           /* Don't CSE a nop.  */
1367           && ! set_noop_p (pat)
1368           /* Don't GCSE if it has attached REG_EQUIV note.
1369              At this point this only function parameters should have
1370              REG_EQUIV notes and if the argument slot is used somewhere
1371              explicitly, it means address of parameter has been taken,
1372              so we should not extend the lifetime of the pseudo.  */
1373           && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1374         {
1375           /* An expression is not anticipatable if its operands are
1376              modified before this insn or if this is not the only SET in
1377              this insn.  The latter condition does not have to mean that
1378              SRC itself is not anticipatable, but we just will not be
1379              able to handle code motion of insns with multiple sets.  */
1380           int antic_p = oprs_anticipatable_p (src, insn)
1381                         && !multiple_sets (insn);
1382           /* An expression is not available if its operands are
1383              subsequently modified, including this insn.  It's also not
1384              available if this is a branch, because we can't insert
1385              a set after the branch.  */
1386           int avail_p = (oprs_available_p (src, insn)
1387                          && ! JUMP_P (insn));
1388
1389           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p, table);
1390         }
1391
1392       /* Record sets for constant/copy propagation.  */
1393       else if (table->set_p
1394                && regno >= FIRST_PSEUDO_REGISTER
1395                && ((REG_P (src)
1396                     && REGNO (src) >= FIRST_PSEUDO_REGISTER
1397                     && can_copy_p (GET_MODE (dest))
1398                     && REGNO (src) != regno)
1399                    || gcse_constant_p (src))
1400                /* A copy is not available if its src or dest is subsequently
1401                   modified.  Here we want to search from INSN+1 on, but
1402                   oprs_available_p searches from INSN on.  */
1403                && (insn == BB_END (BLOCK_FOR_INSN (insn))
1404                    || (tmp = next_nonnote_insn (insn)) == NULL_RTX
1405                    || BLOCK_FOR_INSN (tmp) != BLOCK_FOR_INSN (insn)
1406                    || oprs_available_p (pat, tmp)))
1407         insert_set_in_table (pat, insn, table);
1408     }
1409   /* In case of store we want to consider the memory value as available in
1410      the REG stored in that memory. This makes it possible to remove
1411      redundant loads from due to stores to the same location.  */
1412   else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1413       {
1414         unsigned int regno = REGNO (src);
1415
1416         /* Do not do this for constant/copy propagation.  */
1417         if (! table->set_p
1418             /* Only record sets of pseudo-regs in the hash table.  */
1419             && regno >= FIRST_PSEUDO_REGISTER
1420            /* Don't GCSE something if we can't do a reg/reg copy.  */
1421            && can_copy_p (GET_MODE (src))
1422            /* GCSE commonly inserts instruction after the insn.  We can't
1423               do that easily for EH edges so disable GCSE on these for now.  */
1424            && !can_throw_internal (insn)
1425            /* Is SET_DEST something we want to gcse?  */
1426            && want_to_gcse_p (dest)
1427            /* Don't CSE a nop.  */
1428            && ! set_noop_p (pat)
1429            /* Don't GCSE if it has attached REG_EQUIV note.
1430               At this point this only function parameters should have
1431               REG_EQUIV notes and if the argument slot is used somewhere
1432               explicitly, it means address of parameter has been taken,
1433               so we should not extend the lifetime of the pseudo.  */
1434            && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1435                || ! MEM_P (XEXP (note, 0))))
1436              {
1437                /* Stores are never anticipatable.  */
1438                int antic_p = 0;
1439                /* An expression is not available if its operands are
1440                   subsequently modified, including this insn.  It's also not
1441                   available if this is a branch, because we can't insert
1442                   a set after the branch.  */
1443                int avail_p = oprs_available_p (dest, insn)
1444                              && ! JUMP_P (insn);
1445
1446                /* Record the memory expression (DEST) in the hash table.  */
1447                insert_expr_in_table (dest, GET_MODE (dest), insn,
1448                                      antic_p, avail_p, table);
1449              }
1450       }
1451 }
1452
1453 static void
1454 hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1455                    struct hash_table_d *table ATTRIBUTE_UNUSED)
1456 {
1457   /* Currently nothing to do.  */
1458 }
1459
1460 static void
1461 hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1462                 struct hash_table_d *table ATTRIBUTE_UNUSED)
1463 {
1464   /* Currently nothing to do.  */
1465 }
1466
1467 /* Process INSN and add hash table entries as appropriate.
1468
1469    Only available expressions that set a single pseudo-reg are recorded.
1470
1471    Single sets in a PARALLEL could be handled, but it's an extra complication
1472    that isn't dealt with right now.  The trick is handling the CLOBBERs that
1473    are also in the PARALLEL.  Later.
1474
1475    If SET_P is nonzero, this is for the assignment hash table,
1476    otherwise it is for the expression hash table.  */
1477
1478 static void
1479 hash_scan_insn (rtx insn, struct hash_table_d *table)
1480 {
1481   rtx pat = PATTERN (insn);
1482   int i;
1483
1484   /* Pick out the sets of INSN and for other forms of instructions record
1485      what's been modified.  */
1486
1487   if (GET_CODE (pat) == SET)
1488     hash_scan_set (pat, insn, table);
1489   else if (GET_CODE (pat) == PARALLEL)
1490     for (i = 0; i < XVECLEN (pat, 0); i++)
1491       {
1492         rtx x = XVECEXP (pat, 0, i);
1493
1494         if (GET_CODE (x) == SET)
1495           hash_scan_set (x, insn, table);
1496         else if (GET_CODE (x) == CLOBBER)
1497           hash_scan_clobber (x, insn, table);
1498         else if (GET_CODE (x) == CALL)
1499           hash_scan_call (x, insn, table);
1500       }
1501
1502   else if (GET_CODE (pat) == CLOBBER)
1503     hash_scan_clobber (pat, insn, table);
1504   else if (GET_CODE (pat) == CALL)
1505     hash_scan_call (pat, insn, table);
1506 }
1507
1508 static void
1509 dump_hash_table (FILE *file, const char *name, struct hash_table_d *table)
1510 {
1511   int i;
1512   /* Flattened out table, so it's printed in proper order.  */
1513   struct expr **flat_table;
1514   unsigned int *hash_val;
1515   struct expr *expr;
1516
1517   flat_table = XCNEWVEC (struct expr *, table->n_elems);
1518   hash_val = XNEWVEC (unsigned int, table->n_elems);
1519
1520   for (i = 0; i < (int) table->size; i++)
1521     for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1522       {
1523         flat_table[expr->bitmap_index] = expr;
1524         hash_val[expr->bitmap_index] = i;
1525       }
1526
1527   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1528            name, table->size, table->n_elems);
1529
1530   for (i = 0; i < (int) table->n_elems; i++)
1531     if (flat_table[i] != 0)
1532       {
1533         expr = flat_table[i];
1534         fprintf (file, "Index %d (hash value %d)\n  ",
1535                  expr->bitmap_index, hash_val[i]);
1536         print_rtl (file, expr->expr);
1537         fprintf (file, "\n");
1538       }
1539
1540   fprintf (file, "\n");
1541
1542   free (flat_table);
1543   free (hash_val);
1544 }
1545
1546 /* Record register first/last/block set information for REGNO in INSN.
1547
1548    first_set records the first place in the block where the register
1549    is set and is used to compute "anticipatability".
1550
1551    last_set records the last place in the block where the register
1552    is set and is used to compute "availability".
1553
1554    last_bb records the block for which first_set and last_set are
1555    valid, as a quick test to invalidate them.  */
1556
1557 static void
1558 record_last_reg_set_info (rtx insn, int regno)
1559 {
1560   struct reg_avail_info *info = &reg_avail_info[regno];
1561   int luid = DF_INSN_LUID (insn);
1562
1563   info->last_set = luid;
1564   if (info->last_bb != current_bb)
1565     {
1566       info->last_bb = current_bb;
1567       info->first_set = luid;
1568     }
1569 }
1570
1571
1572 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
1573    Note we store a pair of elements in the list, so they have to be
1574    taken off pairwise.  */
1575
1576 static void
1577 canon_list_insert (rtx dest ATTRIBUTE_UNUSED, const_rtx unused1 ATTRIBUTE_UNUSED,
1578                    void * v_insn)
1579 {
1580   rtx dest_addr, insn;
1581   int bb;
1582
1583   while (GET_CODE (dest) == SUBREG
1584       || GET_CODE (dest) == ZERO_EXTRACT
1585       || GET_CODE (dest) == STRICT_LOW_PART)
1586     dest = XEXP (dest, 0);
1587
1588   /* If DEST is not a MEM, then it will not conflict with a load.  Note
1589      that function calls are assumed to clobber memory, but are handled
1590      elsewhere.  */
1591
1592   if (! MEM_P (dest))
1593     return;
1594
1595   dest_addr = get_addr (XEXP (dest, 0));
1596   dest_addr = canon_rtx (dest_addr);
1597   insn = (rtx) v_insn;
1598   bb = BLOCK_FOR_INSN (insn)->index;
1599
1600   canon_modify_mem_list[bb] =
1601     alloc_EXPR_LIST (VOIDmode, dest_addr, canon_modify_mem_list[bb]);
1602   canon_modify_mem_list[bb] =
1603     alloc_EXPR_LIST (VOIDmode, dest, canon_modify_mem_list[bb]);
1604 }
1605
1606 /* Record memory modification information for INSN.  We do not actually care
1607    about the memory location(s) that are set, or even how they are set (consider
1608    a CALL_INSN).  We merely need to record which insns modify memory.  */
1609
1610 static void
1611 record_last_mem_set_info (rtx insn)
1612 {
1613   int bb = BLOCK_FOR_INSN (insn)->index;
1614
1615   /* load_killed_in_block_p will handle the case of calls clobbering
1616      everything.  */
1617   modify_mem_list[bb] = alloc_INSN_LIST (insn, modify_mem_list[bb]);
1618   bitmap_set_bit (modify_mem_list_set, bb);
1619
1620   if (CALL_P (insn))
1621     {
1622       /* Note that traversals of this loop (other than for free-ing)
1623          will break after encountering a CALL_INSN.  So, there's no
1624          need to insert a pair of items, as canon_list_insert does.  */
1625       canon_modify_mem_list[bb] =
1626         alloc_INSN_LIST (insn, canon_modify_mem_list[bb]);
1627       bitmap_set_bit (blocks_with_calls, bb);
1628     }
1629   else
1630     note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
1631 }
1632
1633 /* Called from compute_hash_table via note_stores to handle one
1634    SET or CLOBBER in an insn.  DATA is really the instruction in which
1635    the SET is taking place.  */
1636
1637 static void
1638 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1639 {
1640   rtx last_set_insn = (rtx) data;
1641
1642   if (GET_CODE (dest) == SUBREG)
1643     dest = SUBREG_REG (dest);
1644
1645   if (REG_P (dest))
1646     record_last_reg_set_info (last_set_insn, REGNO (dest));
1647   else if (MEM_P (dest)
1648            /* Ignore pushes, they clobber nothing.  */
1649            && ! push_operand (dest, GET_MODE (dest)))
1650     record_last_mem_set_info (last_set_insn);
1651 }
1652
1653 /* Top level function to create an expression or assignment hash table.
1654
1655    Expression entries are placed in the hash table if
1656    - they are of the form (set (pseudo-reg) src),
1657    - src is something we want to perform GCSE on,
1658    - none of the operands are subsequently modified in the block
1659
1660    Assignment entries are placed in the hash table if
1661    - they are of the form (set (pseudo-reg) src),
1662    - src is something we want to perform const/copy propagation on,
1663    - none of the operands or target are subsequently modified in the block
1664
1665    Currently src must be a pseudo-reg or a const_int.
1666
1667    TABLE is the table computed.  */
1668
1669 static void
1670 compute_hash_table_work (struct hash_table_d *table)
1671 {
1672   int i;
1673
1674   /* re-Cache any INSN_LIST nodes we have allocated.  */
1675   clear_modify_mem_tables ();
1676   /* Some working arrays used to track first and last set in each block.  */
1677   reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1678
1679   for (i = 0; i < max_reg_num (); ++i)
1680     reg_avail_info[i].last_bb = NULL;
1681
1682   FOR_EACH_BB (current_bb)
1683     {
1684       rtx insn;
1685       unsigned int regno;
1686
1687       /* First pass over the instructions records information used to
1688          determine when registers and memory are first and last set.  */
1689       FOR_BB_INSNS (current_bb, insn)
1690         {
1691           if (! INSN_P (insn))
1692             continue;
1693
1694           if (CALL_P (insn))
1695             {
1696               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1697                 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
1698                   record_last_reg_set_info (insn, regno);
1699
1700               mark_call (insn);
1701             }
1702
1703           note_stores (PATTERN (insn), record_last_set_info, insn);
1704         }
1705
1706       /* Insert implicit sets in the hash table.  */
1707       if (table->set_p
1708           && implicit_sets[current_bb->index] != NULL_RTX)
1709         hash_scan_set (implicit_sets[current_bb->index],
1710                        BB_HEAD (current_bb), table);
1711
1712       /* The next pass builds the hash table.  */
1713       FOR_BB_INSNS (current_bb, insn)
1714         if (INSN_P (insn))
1715           hash_scan_insn (insn, table);
1716     }
1717
1718   free (reg_avail_info);
1719   reg_avail_info = NULL;
1720 }
1721
1722 /* Allocate space for the set/expr hash TABLE.
1723    It is used to determine the number of buckets to use.
1724    SET_P determines whether set or expression table will
1725    be created.  */
1726
1727 static void
1728 alloc_hash_table (struct hash_table_d *table, int set_p)
1729 {
1730   int n;
1731
1732   n = get_max_insn_count ();
1733
1734   table->size = n / 4;
1735   if (table->size < 11)
1736     table->size = 11;
1737
1738   /* Attempt to maintain efficient use of hash table.
1739      Making it an odd number is simplest for now.
1740      ??? Later take some measurements.  */
1741   table->size |= 1;
1742   n = table->size * sizeof (struct expr *);
1743   table->table = GNEWVAR (struct expr *, n);
1744   table->set_p = set_p;
1745 }
1746
1747 /* Free things allocated by alloc_hash_table.  */
1748
1749 static void
1750 free_hash_table (struct hash_table_d *table)
1751 {
1752   free (table->table);
1753 }
1754
1755 /* Compute the hash TABLE for doing copy/const propagation or
1756    expression hash table.  */
1757
1758 static void
1759 compute_hash_table (struct hash_table_d *table)
1760 {
1761   /* Initialize count of number of entries in hash table.  */
1762   table->n_elems = 0;
1763   memset (table->table, 0, table->size * sizeof (struct expr *));
1764
1765   compute_hash_table_work (table);
1766 }
1767 \f
1768 /* Expression tracking support.  */
1769
1770 /* Lookup REGNO in the set TABLE.  The result is a pointer to the
1771    table entry, or NULL if not found.  */
1772
1773 static struct expr *
1774 lookup_set (unsigned int regno, struct hash_table_d *table)
1775 {
1776   unsigned int hash = hash_set (regno, table->size);
1777   struct expr *expr;
1778
1779   expr = table->table[hash];
1780
1781   while (expr && REGNO (SET_DEST (expr->expr)) != regno)
1782     expr = expr->next_same_hash;
1783
1784   return expr;
1785 }
1786
1787 /* Return the next entry for REGNO in list EXPR.  */
1788
1789 static struct expr *
1790 next_set (unsigned int regno, struct expr *expr)
1791 {
1792   do
1793     expr = expr->next_same_hash;
1794   while (expr && REGNO (SET_DEST (expr->expr)) != regno);
1795
1796   return expr;
1797 }
1798
1799 /* Like free_INSN_LIST_list or free_EXPR_LIST_list, except that the node
1800    types may be mixed.  */
1801
1802 static void
1803 free_insn_expr_list_list (rtx *listp)
1804 {
1805   rtx list, next;
1806
1807   for (list = *listp; list ; list = next)
1808     {
1809       next = XEXP (list, 1);
1810       if (GET_CODE (list) == EXPR_LIST)
1811         free_EXPR_LIST_node (list);
1812       else
1813         free_INSN_LIST_node (list);
1814     }
1815
1816   *listp = NULL;
1817 }
1818
1819 /* Clear canon_modify_mem_list and modify_mem_list tables.  */
1820 static void
1821 clear_modify_mem_tables (void)
1822 {
1823   unsigned i;
1824   bitmap_iterator bi;
1825
1826   EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1827     {
1828       free_INSN_LIST_list (modify_mem_list + i);
1829       free_insn_expr_list_list (canon_modify_mem_list + i);
1830     }
1831   bitmap_clear (modify_mem_list_set);
1832   bitmap_clear (blocks_with_calls);
1833 }
1834
1835 /* Release memory used by modify_mem_list_set.  */
1836
1837 static void
1838 free_modify_mem_tables (void)
1839 {
1840   clear_modify_mem_tables ();
1841   free (modify_mem_list);
1842   free (canon_modify_mem_list);
1843   modify_mem_list = 0;
1844   canon_modify_mem_list = 0;
1845 }
1846
1847 /* Reset tables used to keep track of what's still available [since the
1848    start of the block].  */
1849
1850 static void
1851 reset_opr_set_tables (void)
1852 {
1853   /* Maintain a bitmap of which regs have been set since beginning of
1854      the block.  */
1855   CLEAR_REG_SET (reg_set_bitmap);
1856
1857   /* Also keep a record of the last instruction to modify memory.
1858      For now this is very trivial, we only record whether any memory
1859      location has been modified.  */
1860   clear_modify_mem_tables ();
1861 }
1862
1863 /* Return nonzero if the operands of X are not set before INSN in
1864    INSN's basic block.  */
1865
1866 static int
1867 oprs_not_set_p (const_rtx x, const_rtx insn)
1868 {
1869   int i, j;
1870   enum rtx_code code;
1871   const char *fmt;
1872
1873   if (x == 0)
1874     return 1;
1875
1876   code = GET_CODE (x);
1877   switch (code)
1878     {
1879     case PC:
1880     case CC0:
1881     case CONST:
1882     case CONST_INT:
1883     case CONST_DOUBLE:
1884     case CONST_FIXED:
1885     case CONST_VECTOR:
1886     case SYMBOL_REF:
1887     case LABEL_REF:
1888     case ADDR_VEC:
1889     case ADDR_DIFF_VEC:
1890       return 1;
1891
1892     case MEM:
1893       if (load_killed_in_block_p (BLOCK_FOR_INSN (insn),
1894                                   DF_INSN_LUID (insn), x, 0))
1895         return 0;
1896       else
1897         return oprs_not_set_p (XEXP (x, 0), insn);
1898
1899     case REG:
1900       return ! REGNO_REG_SET_P (reg_set_bitmap, REGNO (x));
1901
1902     default:
1903       break;
1904     }
1905
1906   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1907     {
1908       if (fmt[i] == 'e')
1909         {
1910           /* If we are about to do the last recursive call
1911              needed at this level, change it into iteration.
1912              This function is called enough to be worth it.  */
1913           if (i == 0)
1914             return oprs_not_set_p (XEXP (x, i), insn);
1915
1916           if (! oprs_not_set_p (XEXP (x, i), insn))
1917             return 0;
1918         }
1919       else if (fmt[i] == 'E')
1920         for (j = 0; j < XVECLEN (x, i); j++)
1921           if (! oprs_not_set_p (XVECEXP (x, i, j), insn))
1922             return 0;
1923     }
1924
1925   return 1;
1926 }
1927
1928 /* Mark things set by a CALL.  */
1929
1930 static void
1931 mark_call (rtx insn)
1932 {
1933   if (! RTL_CONST_OR_PURE_CALL_P (insn))
1934     record_last_mem_set_info (insn);
1935 }
1936
1937 /* Mark things set by a SET.  */
1938
1939 static void
1940 mark_set (rtx pat, rtx insn)
1941 {
1942   rtx dest = SET_DEST (pat);
1943
1944   while (GET_CODE (dest) == SUBREG
1945          || GET_CODE (dest) == ZERO_EXTRACT
1946          || GET_CODE (dest) == STRICT_LOW_PART)
1947     dest = XEXP (dest, 0);
1948
1949   if (REG_P (dest))
1950     SET_REGNO_REG_SET (reg_set_bitmap, REGNO (dest));
1951   else if (MEM_P (dest))
1952     record_last_mem_set_info (insn);
1953
1954   if (GET_CODE (SET_SRC (pat)) == CALL)
1955     mark_call (insn);
1956 }
1957
1958 /* Record things set by a CLOBBER.  */
1959
1960 static void
1961 mark_clobber (rtx pat, rtx insn)
1962 {
1963   rtx clob = XEXP (pat, 0);
1964
1965   while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
1966     clob = XEXP (clob, 0);
1967
1968   if (REG_P (clob))
1969     SET_REGNO_REG_SET (reg_set_bitmap, REGNO (clob));
1970   else
1971     record_last_mem_set_info (insn);
1972 }
1973
1974 /* Record things set by INSN.
1975    This data is used by oprs_not_set_p.  */
1976
1977 static void
1978 mark_oprs_set (rtx insn)
1979 {
1980   rtx pat = PATTERN (insn);
1981   int i;
1982
1983   if (GET_CODE (pat) == SET)
1984     mark_set (pat, insn);
1985   else if (GET_CODE (pat) == PARALLEL)
1986     for (i = 0; i < XVECLEN (pat, 0); i++)
1987       {
1988         rtx x = XVECEXP (pat, 0, i);
1989
1990         if (GET_CODE (x) == SET)
1991           mark_set (x, insn);
1992         else if (GET_CODE (x) == CLOBBER)
1993           mark_clobber (x, insn);
1994         else if (GET_CODE (x) == CALL)
1995           mark_call (insn);
1996       }
1997
1998   else if (GET_CODE (pat) == CLOBBER)
1999     mark_clobber (pat, insn);
2000   else if (GET_CODE (pat) == CALL)
2001     mark_call (insn);
2002 }
2003
2004 \f
2005 /* Compute copy/constant propagation working variables.  */
2006
2007 /* Local properties of assignments.  */
2008 static sbitmap *cprop_pavloc;
2009 static sbitmap *cprop_absaltered;
2010
2011 /* Global properties of assignments (computed from the local properties).  */
2012 static sbitmap *cprop_avin;
2013 static sbitmap *cprop_avout;
2014
2015 /* Allocate vars used for copy/const propagation.  N_BLOCKS is the number of
2016    basic blocks.  N_SETS is the number of sets.  */
2017
2018 static void
2019 alloc_cprop_mem (int n_blocks, int n_sets)
2020 {
2021   cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
2022   cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
2023
2024   cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
2025   cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
2026 }
2027
2028 /* Free vars used by copy/const propagation.  */
2029
2030 static void
2031 free_cprop_mem (void)
2032 {
2033   sbitmap_vector_free (cprop_pavloc);
2034   sbitmap_vector_free (cprop_absaltered);
2035   sbitmap_vector_free (cprop_avin);
2036   sbitmap_vector_free (cprop_avout);
2037 }
2038
2039 /* For each block, compute whether X is transparent.  X is either an
2040    expression or an assignment [though we don't care which, for this context
2041    an assignment is treated as an expression].  For each block where an
2042    element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
2043    bit in BMAP.  */
2044
2045 static void
2046 compute_transp (const_rtx x, int indx, sbitmap *bmap, int set_p)
2047 {
2048   int i, j;
2049   enum rtx_code code;
2050   const char *fmt;
2051
2052   /* repeat is used to turn tail-recursion into iteration since GCC
2053      can't do it when there's no return value.  */
2054  repeat:
2055
2056   if (x == 0)
2057     return;
2058
2059   code = GET_CODE (x);
2060   switch (code)
2061     {
2062     case REG:
2063       if (set_p)
2064         {
2065           df_ref def;
2066           for (def = DF_REG_DEF_CHAIN (REGNO (x));
2067                def;
2068                def = DF_REF_NEXT_REG (def))
2069             SET_BIT (bmap[DF_REF_BB (def)->index], indx);
2070         }
2071       else
2072         {
2073           df_ref def;
2074           for (def = DF_REG_DEF_CHAIN (REGNO (x));
2075                def;
2076                def = DF_REF_NEXT_REG (def))
2077             RESET_BIT (bmap[DF_REF_BB (def)->index], indx);
2078         }
2079
2080       return;
2081
2082     case MEM:
2083       if (! MEM_READONLY_P (x))
2084         {
2085           bitmap_iterator bi;
2086           unsigned bb_index;
2087
2088           /* First handle all the blocks with calls.  We don't need to
2089              do any list walking for them.  */
2090           EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls, 0, bb_index, bi)
2091             {
2092               if (set_p)
2093                 SET_BIT (bmap[bb_index], indx);
2094               else
2095                 RESET_BIT (bmap[bb_index], indx);
2096             }
2097
2098             /* Now iterate over the blocks which have memory modifications
2099                but which do not have any calls.  */
2100             EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set,
2101                                             blocks_with_calls,
2102                                             0, bb_index, bi)
2103               {
2104                 rtx list_entry = canon_modify_mem_list[bb_index];
2105
2106                 while (list_entry)
2107                   {
2108                     rtx dest, dest_addr;
2109
2110                     /* LIST_ENTRY must be an INSN of some kind that sets memory.
2111                        Examine each hunk of memory that is modified.  */
2112
2113                     dest = XEXP (list_entry, 0);
2114                     list_entry = XEXP (list_entry, 1);
2115                     dest_addr = XEXP (list_entry, 0);
2116
2117                     if (canon_true_dependence (dest, GET_MODE (dest), dest_addr,
2118                                                x, NULL_RTX, rtx_addr_varies_p))
2119                       {
2120                         if (set_p)
2121                           SET_BIT (bmap[bb_index], indx);
2122                         else
2123                           RESET_BIT (bmap[bb_index], indx);
2124                         break;
2125                       }
2126                     list_entry = XEXP (list_entry, 1);
2127                   }
2128               }
2129         }
2130
2131       x = XEXP (x, 0);
2132       goto repeat;
2133
2134     case PC:
2135     case CC0: /*FIXME*/
2136     case CONST:
2137     case CONST_INT:
2138     case CONST_DOUBLE:
2139     case CONST_FIXED:
2140     case CONST_VECTOR:
2141     case SYMBOL_REF:
2142     case LABEL_REF:
2143     case ADDR_VEC:
2144     case ADDR_DIFF_VEC:
2145       return;
2146
2147     default:
2148       break;
2149     }
2150
2151   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2152     {
2153       if (fmt[i] == 'e')
2154         {
2155           /* If we are about to do the last recursive call
2156              needed at this level, change it into iteration.
2157              This function is called enough to be worth it.  */
2158           if (i == 0)
2159             {
2160               x = XEXP (x, i);
2161               goto repeat;
2162             }
2163
2164           compute_transp (XEXP (x, i), indx, bmap, set_p);
2165         }
2166       else if (fmt[i] == 'E')
2167         for (j = 0; j < XVECLEN (x, i); j++)
2168           compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
2169     }
2170 }
2171
2172 /* Top level routine to do the dataflow analysis needed by copy/const
2173    propagation.  */
2174
2175 static void
2176 compute_cprop_data (void)
2177 {
2178   compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, &set_hash_table);
2179   compute_available (cprop_pavloc, cprop_absaltered,
2180                      cprop_avout, cprop_avin);
2181 }
2182 \f
2183 /* Copy/constant propagation.  */
2184
2185 /* Maximum number of register uses in an insn that we handle.  */
2186 #define MAX_USES 8
2187
2188 /* Table of uses found in an insn.
2189    Allocated statically to avoid alloc/free complexity and overhead.  */
2190 static struct reg_use reg_use_table[MAX_USES];
2191
2192 /* Index into `reg_use_table' while building it.  */
2193 static int reg_use_count;
2194
2195 /* Set up a list of register numbers used in INSN.  The found uses are stored
2196    in `reg_use_table'.  `reg_use_count' is initialized to zero before entry,
2197    and contains the number of uses in the table upon exit.
2198
2199    ??? If a register appears multiple times we will record it multiple times.
2200    This doesn't hurt anything but it will slow things down.  */
2201
2202 static void
2203 find_used_regs (rtx *xptr, void *data ATTRIBUTE_UNUSED)
2204 {
2205   int i, j;
2206   enum rtx_code code;
2207   const char *fmt;
2208   rtx x = *xptr;
2209
2210   /* repeat is used to turn tail-recursion into iteration since GCC
2211      can't do it when there's no return value.  */
2212  repeat:
2213   if (x == 0)
2214     return;
2215
2216   code = GET_CODE (x);
2217   if (REG_P (x))
2218     {
2219       if (reg_use_count == MAX_USES)
2220         return;
2221
2222       reg_use_table[reg_use_count].reg_rtx = x;
2223       reg_use_count++;
2224     }
2225
2226   /* Recursively scan the operands of this expression.  */
2227
2228   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2229     {
2230       if (fmt[i] == 'e')
2231         {
2232           /* If we are about to do the last recursive call
2233              needed at this level, change it into iteration.
2234              This function is called enough to be worth it.  */
2235           if (i == 0)
2236             {
2237               x = XEXP (x, 0);
2238               goto repeat;
2239             }
2240
2241           find_used_regs (&XEXP (x, i), data);
2242         }
2243       else if (fmt[i] == 'E')
2244         for (j = 0; j < XVECLEN (x, i); j++)
2245           find_used_regs (&XVECEXP (x, i, j), data);
2246     }
2247 }
2248
2249 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
2250    Returns nonzero is successful.  */
2251
2252 static int
2253 try_replace_reg (rtx from, rtx to, rtx insn)
2254 {
2255   rtx note = find_reg_equal_equiv_note (insn);
2256   rtx src = 0;
2257   int success = 0;
2258   rtx set = single_set (insn);
2259
2260   /* Usually we substitute easy stuff, so we won't copy everything.
2261      We however need to take care to not duplicate non-trivial CONST
2262      expressions.  */
2263   to = copy_rtx (to);
2264
2265   validate_replace_src_group (from, to, insn);
2266   if (num_changes_pending () && apply_change_group ())
2267     success = 1;
2268
2269   /* Try to simplify SET_SRC if we have substituted a constant.  */
2270   if (success && set && CONSTANT_P (to))
2271     {
2272       src = simplify_rtx (SET_SRC (set));
2273
2274       if (src)
2275         validate_change (insn, &SET_SRC (set), src, 0);
2276     }
2277
2278   /* If there is already a REG_EQUAL note, update the expression in it
2279      with our replacement.  */
2280   if (note != 0 && REG_NOTE_KIND (note) == REG_EQUAL)
2281     set_unique_reg_note (insn, REG_EQUAL,
2282                          simplify_replace_rtx (XEXP (note, 0), from, to));
2283   if (!success && set && reg_mentioned_p (from, SET_SRC (set)))
2284     {
2285       /* If above failed and this is a single set, try to simplify the source of
2286          the set given our substitution.  We could perhaps try this for multiple
2287          SETs, but it probably won't buy us anything.  */
2288       src = simplify_replace_rtx (SET_SRC (set), from, to);
2289
2290       if (!rtx_equal_p (src, SET_SRC (set))
2291           && validate_change (insn, &SET_SRC (set), src, 0))
2292         success = 1;
2293
2294       /* If we've failed to do replacement, have a single SET, don't already
2295          have a note, and have no special SET, add a REG_EQUAL note to not
2296          lose information.  */
2297       if (!success && note == 0 && set != 0
2298           && GET_CODE (SET_DEST (set)) != ZERO_EXTRACT
2299           && GET_CODE (SET_DEST (set)) != STRICT_LOW_PART)
2300         note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
2301     }
2302
2303   /* REG_EQUAL may get simplified into register.
2304      We don't allow that. Remove that note. This code ought
2305      not to happen, because previous code ought to synthesize
2306      reg-reg move, but be on the safe side.  */
2307   if (note && REG_NOTE_KIND (note) == REG_EQUAL && REG_P (XEXP (note, 0)))
2308     remove_note (insn, note);
2309
2310   return success;
2311 }
2312
2313 /* Find a set of REGNOs that are available on entry to INSN's block.  Returns
2314    NULL no such set is found.  */
2315
2316 static struct expr *
2317 find_avail_set (int regno, rtx insn)
2318 {
2319   /* SET1 contains the last set found that can be returned to the caller for
2320      use in a substitution.  */
2321   struct expr *set1 = 0;
2322
2323   /* Loops are not possible here.  To get a loop we would need two sets
2324      available at the start of the block containing INSN.  i.e. we would
2325      need two sets like this available at the start of the block:
2326
2327        (set (reg X) (reg Y))
2328        (set (reg Y) (reg X))
2329
2330      This can not happen since the set of (reg Y) would have killed the
2331      set of (reg X) making it unavailable at the start of this block.  */
2332   while (1)
2333     {
2334       rtx src;
2335       struct expr *set = lookup_set (regno, &set_hash_table);
2336
2337       /* Find a set that is available at the start of the block
2338          which contains INSN.  */
2339       while (set)
2340         {
2341           if (TEST_BIT (cprop_avin[BLOCK_FOR_INSN (insn)->index],
2342                         set->bitmap_index))
2343             break;
2344           set = next_set (regno, set);
2345         }
2346
2347       /* If no available set was found we've reached the end of the
2348          (possibly empty) copy chain.  */
2349       if (set == 0)
2350         break;
2351
2352       gcc_assert (GET_CODE (set->expr) == SET);
2353
2354       src = SET_SRC (set->expr);
2355
2356       /* We know the set is available.
2357          Now check that SRC is ANTLOC (i.e. none of the source operands
2358          have changed since the start of the block).
2359
2360          If the source operand changed, we may still use it for the next
2361          iteration of this loop, but we may not use it for substitutions.  */
2362
2363       if (gcse_constant_p (src) || oprs_not_set_p (src, insn))
2364         set1 = set;
2365
2366       /* If the source of the set is anything except a register, then
2367          we have reached the end of the copy chain.  */
2368       if (! REG_P (src))
2369         break;
2370
2371       /* Follow the copy chain, i.e. start another iteration of the loop
2372          and see if we have an available copy into SRC.  */
2373       regno = REGNO (src);
2374     }
2375
2376   /* SET1 holds the last set that was available and anticipatable at
2377      INSN.  */
2378   return set1;
2379 }
2380
2381 /* Subroutine of cprop_insn that tries to propagate constants into
2382    JUMP_INSNS.  JUMP must be a conditional jump.  If SETCC is non-NULL
2383    it is the instruction that immediately precedes JUMP, and must be a
2384    single SET of a register.  FROM is what we will try to replace,
2385    SRC is the constant we will try to substitute for it.  Returns nonzero
2386    if a change was made.  */
2387
2388 static int
2389 cprop_jump (basic_block bb, rtx setcc, rtx jump, rtx from, rtx src)
2390 {
2391   rtx new_rtx, set_src, note_src;
2392   rtx set = pc_set (jump);
2393   rtx note = find_reg_equal_equiv_note (jump);
2394
2395   if (note)
2396     {
2397       note_src = XEXP (note, 0);
2398       if (GET_CODE (note_src) == EXPR_LIST)
2399         note_src = NULL_RTX;
2400     }
2401   else note_src = NULL_RTX;
2402
2403   /* Prefer REG_EQUAL notes except those containing EXPR_LISTs.  */
2404   set_src = note_src ? note_src : SET_SRC (set);
2405
2406   /* First substitute the SETCC condition into the JUMP instruction,
2407      then substitute that given values into this expanded JUMP.  */
2408   if (setcc != NULL_RTX
2409       && !modified_between_p (from, setcc, jump)
2410       && !modified_between_p (src, setcc, jump))
2411     {
2412       rtx setcc_src;
2413       rtx setcc_set = single_set (setcc);
2414       rtx setcc_note = find_reg_equal_equiv_note (setcc);
2415       setcc_src = (setcc_note && GET_CODE (XEXP (setcc_note, 0)) != EXPR_LIST)
2416                 ? XEXP (setcc_note, 0) : SET_SRC (setcc_set);
2417       set_src = simplify_replace_rtx (set_src, SET_DEST (setcc_set),
2418                                       setcc_src);
2419     }
2420   else
2421     setcc = NULL_RTX;
2422
2423   new_rtx = simplify_replace_rtx (set_src, from, src);
2424
2425   /* If no simplification can be made, then try the next register.  */
2426   if (rtx_equal_p (new_rtx, SET_SRC (set)))
2427     return 0;
2428
2429   /* If this is now a no-op delete it, otherwise this must be a valid insn.  */
2430   if (new_rtx == pc_rtx)
2431     delete_insn (jump);
2432   else
2433     {
2434       /* Ensure the value computed inside the jump insn to be equivalent
2435          to one computed by setcc.  */
2436       if (setcc && modified_in_p (new_rtx, setcc))
2437         return 0;
2438       if (! validate_unshare_change (jump, &SET_SRC (set), new_rtx, 0))
2439         {
2440           /* When (some) constants are not valid in a comparison, and there
2441              are two registers to be replaced by constants before the entire
2442              comparison can be folded into a constant, we need to keep
2443              intermediate information in REG_EQUAL notes.  For targets with
2444              separate compare insns, such notes are added by try_replace_reg.
2445              When we have a combined compare-and-branch instruction, however,
2446              we need to attach a note to the branch itself to make this
2447              optimization work.  */
2448
2449           if (!rtx_equal_p (new_rtx, note_src))
2450             set_unique_reg_note (jump, REG_EQUAL, copy_rtx (new_rtx));
2451           return 0;
2452         }
2453
2454       /* Remove REG_EQUAL note after simplification.  */
2455       if (note_src)
2456         remove_note (jump, note);
2457      }
2458
2459 #ifdef HAVE_cc0
2460   /* Delete the cc0 setter.  */
2461   if (setcc != NULL && CC0_P (SET_DEST (single_set (setcc))))
2462     delete_insn (setcc);
2463 #endif
2464
2465   global_const_prop_count++;
2466   if (dump_file != NULL)
2467     {
2468       fprintf (dump_file,
2469                "GLOBAL CONST-PROP: Replacing reg %d in jump_insn %d with constant ",
2470                REGNO (from), INSN_UID (jump));
2471       print_rtl (dump_file, src);
2472       fprintf (dump_file, "\n");
2473     }
2474   purge_dead_edges (bb);
2475
2476   /* If a conditional jump has been changed into unconditional jump, remove
2477      the jump and make the edge fallthru - this is always called in
2478      cfglayout mode.  */
2479   if (new_rtx != pc_rtx && simplejump_p (jump))
2480     {
2481       edge e;
2482       edge_iterator ei;
2483
2484       for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); ei_next (&ei))
2485         if (e->dest != EXIT_BLOCK_PTR
2486             && BB_HEAD (e->dest) == JUMP_LABEL (jump))
2487           {
2488             e->flags |= EDGE_FALLTHRU;
2489             break;
2490           }
2491       delete_insn (jump);
2492     }
2493
2494   return 1;
2495 }
2496
2497 static bool
2498 constprop_register (rtx insn, rtx from, rtx to)
2499 {
2500   rtx sset;
2501
2502   /* Check for reg or cc0 setting instructions followed by
2503      conditional branch instructions first.  */
2504   if ((sset = single_set (insn)) != NULL
2505       && NEXT_INSN (insn)
2506       && any_condjump_p (NEXT_INSN (insn)) && onlyjump_p (NEXT_INSN (insn)))
2507     {
2508       rtx dest = SET_DEST (sset);
2509       if ((REG_P (dest) || CC0_P (dest))
2510           && cprop_jump (BLOCK_FOR_INSN (insn), insn, NEXT_INSN (insn), from, to))
2511         return 1;
2512     }
2513
2514   /* Handle normal insns next.  */
2515   if (NONJUMP_INSN_P (insn)
2516       && try_replace_reg (from, to, insn))
2517     return 1;
2518
2519   /* Try to propagate a CONST_INT into a conditional jump.
2520      We're pretty specific about what we will handle in this
2521      code, we can extend this as necessary over time.
2522
2523      Right now the insn in question must look like
2524      (set (pc) (if_then_else ...))  */
2525   else if (any_condjump_p (insn) && onlyjump_p (insn))
2526     return cprop_jump (BLOCK_FOR_INSN (insn), NULL, insn, from, to);
2527   return 0;
2528 }
2529
2530 /* Perform constant and copy propagation on INSN.
2531    The result is nonzero if a change was made.  */
2532
2533 static int
2534 cprop_insn (rtx insn)
2535 {
2536   struct reg_use *reg_used;
2537   int changed = 0;
2538   rtx note;
2539
2540   if (!INSN_P (insn))
2541     return 0;
2542
2543   reg_use_count = 0;
2544   note_uses (&PATTERN (insn), find_used_regs, NULL);
2545
2546   note = find_reg_equal_equiv_note (insn);
2547
2548   /* We may win even when propagating constants into notes.  */
2549   if (note)
2550     find_used_regs (&XEXP (note, 0), NULL);
2551
2552   for (reg_used = &reg_use_table[0]; reg_use_count > 0;
2553        reg_used++, reg_use_count--)
2554     {
2555       unsigned int regno = REGNO (reg_used->reg_rtx);
2556       rtx pat, src;
2557       struct expr *set;
2558
2559       /* If the register has already been set in this block, there's
2560          nothing we can do.  */
2561       if (! oprs_not_set_p (reg_used->reg_rtx, insn))
2562         continue;
2563
2564       /* Find an assignment that sets reg_used and is available
2565          at the start of the block.  */
2566       set = find_avail_set (regno, insn);
2567       if (! set)
2568         continue;
2569
2570       pat = set->expr;
2571       /* ??? We might be able to handle PARALLELs.  Later.  */
2572       gcc_assert (GET_CODE (pat) == SET);
2573
2574       src = SET_SRC (pat);
2575
2576       /* Constant propagation.  */
2577       if (gcse_constant_p (src))
2578         {
2579           if (constprop_register (insn, reg_used->reg_rtx, src))
2580             {
2581               changed = 1;
2582               global_const_prop_count++;
2583               if (dump_file != NULL)
2584                 {
2585                   fprintf (dump_file, "GLOBAL CONST-PROP: Replacing reg %d in ", regno);
2586                   fprintf (dump_file, "insn %d with constant ", INSN_UID (insn));
2587                   print_rtl (dump_file, src);
2588                   fprintf (dump_file, "\n");
2589                 }
2590               if (INSN_DELETED_P (insn))
2591                 return 1;
2592             }
2593         }
2594       else if (REG_P (src)
2595                && REGNO (src) >= FIRST_PSEUDO_REGISTER
2596                && REGNO (src) != regno)
2597         {
2598           if (try_replace_reg (reg_used->reg_rtx, src, insn))
2599             {
2600               changed = 1;
2601               global_copy_prop_count++;
2602               if (dump_file != NULL)
2603                 {
2604                   fprintf (dump_file, "GLOBAL COPY-PROP: Replacing reg %d in insn %d",
2605                            regno, INSN_UID (insn));
2606                   fprintf (dump_file, " with reg %d\n", REGNO (src));
2607                 }
2608
2609               /* The original insn setting reg_used may or may not now be
2610                  deletable.  We leave the deletion to flow.  */
2611               /* FIXME: If it turns out that the insn isn't deletable,
2612                  then we may have unnecessarily extended register lifetimes
2613                  and made things worse.  */
2614             }
2615         }
2616     }
2617
2618   if (changed && DEBUG_INSN_P (insn))
2619     return 0;
2620
2621   return changed;
2622 }
2623
2624 /* Like find_used_regs, but avoid recording uses that appear in
2625    input-output contexts such as zero_extract or pre_dec.  This
2626    restricts the cases we consider to those for which local cprop
2627    can legitimately make replacements.  */
2628
2629 static void
2630 local_cprop_find_used_regs (rtx *xptr, void *data)
2631 {
2632   rtx x = *xptr;
2633
2634   if (x == 0)
2635     return;
2636
2637   switch (GET_CODE (x))
2638     {
2639     case ZERO_EXTRACT:
2640     case SIGN_EXTRACT:
2641     case STRICT_LOW_PART:
2642       return;
2643
2644     case PRE_DEC:
2645     case PRE_INC:
2646     case POST_DEC:
2647     case POST_INC:
2648     case PRE_MODIFY:
2649     case POST_MODIFY:
2650       /* Can only legitimately appear this early in the context of
2651          stack pushes for function arguments, but handle all of the
2652          codes nonetheless.  */
2653       return;
2654
2655     case SUBREG:
2656       /* Setting a subreg of a register larger than word_mode leaves
2657          the non-written words unchanged.  */
2658       if (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x))) > BITS_PER_WORD)
2659         return;
2660       break;
2661
2662     default:
2663       break;
2664     }
2665
2666   find_used_regs (xptr, data);
2667 }
2668
2669 /* Try to perform local const/copy propagation on X in INSN.  */
2670
2671 static bool
2672 do_local_cprop (rtx x, rtx insn)
2673 {
2674   rtx newreg = NULL, newcnst = NULL;
2675
2676   /* Rule out USE instructions and ASM statements as we don't want to
2677      change the hard registers mentioned.  */
2678   if (REG_P (x)
2679       && (REGNO (x) >= FIRST_PSEUDO_REGISTER
2680           || (GET_CODE (PATTERN (insn)) != USE
2681               && asm_noperands (PATTERN (insn)) < 0)))
2682     {
2683       cselib_val *val = cselib_lookup (x, GET_MODE (x), 0);
2684       struct elt_loc_list *l;
2685
2686       if (!val)
2687         return false;
2688       for (l = val->locs; l; l = l->next)
2689         {
2690           rtx this_rtx = l->loc;
2691           rtx note;
2692
2693           if (gcse_constant_p (this_rtx))
2694             newcnst = this_rtx;
2695           if (REG_P (this_rtx) && REGNO (this_rtx) >= FIRST_PSEUDO_REGISTER
2696               /* Don't copy propagate if it has attached REG_EQUIV note.
2697                  At this point this only function parameters should have
2698                  REG_EQUIV notes and if the argument slot is used somewhere
2699                  explicitly, it means address of parameter has been taken,
2700                  so we should not extend the lifetime of the pseudo.  */
2701               && (!(note = find_reg_note (l->setting_insn, REG_EQUIV, NULL_RTX))
2702                   || ! MEM_P (XEXP (note, 0))))
2703             newreg = this_rtx;
2704         }
2705       if (newcnst && constprop_register (insn, x, newcnst))
2706         {
2707           if (dump_file != NULL)
2708             {
2709               fprintf (dump_file, "LOCAL CONST-PROP: Replacing reg %d in ",
2710                        REGNO (x));
2711               fprintf (dump_file, "insn %d with constant ",
2712                        INSN_UID (insn));
2713               print_rtl (dump_file, newcnst);
2714               fprintf (dump_file, "\n");
2715             }
2716           local_const_prop_count++;
2717           return true;
2718         }
2719       else if (newreg && newreg != x && try_replace_reg (x, newreg, insn))
2720         {
2721           if (dump_file != NULL)
2722             {
2723               fprintf (dump_file,
2724                        "LOCAL COPY-PROP: Replacing reg %d in insn %d",
2725                        REGNO (x), INSN_UID (insn));
2726               fprintf (dump_file, " with reg %d\n", REGNO (newreg));
2727             }
2728           local_copy_prop_count++;
2729           return true;
2730         }
2731     }
2732   return false;
2733 }
2734
2735 /* Do local const/copy propagation (i.e. within each basic block).  */
2736
2737 static int
2738 local_cprop_pass (void)
2739 {
2740   basic_block bb;
2741   rtx insn;
2742   struct reg_use *reg_used;
2743   bool changed = false;
2744
2745   cselib_init (false);
2746   FOR_EACH_BB (bb)
2747     {
2748       FOR_BB_INSNS (bb, insn)
2749         {
2750           if (INSN_P (insn))
2751             {
2752               rtx note = find_reg_equal_equiv_note (insn);
2753               do
2754                 {
2755                   reg_use_count = 0;
2756                   note_uses (&PATTERN (insn), local_cprop_find_used_regs,
2757                              NULL);
2758                   if (note)
2759                     local_cprop_find_used_regs (&XEXP (note, 0), NULL);
2760
2761                   for (reg_used = &reg_use_table[0]; reg_use_count > 0;
2762                        reg_used++, reg_use_count--)
2763                     {
2764                       if (do_local_cprop (reg_used->reg_rtx, insn))
2765                         {
2766                           changed = true;
2767                           break;
2768                         }
2769                     }
2770                   if (INSN_DELETED_P (insn))
2771                     break;
2772                 }
2773               while (reg_use_count);
2774             }
2775           cselib_process_insn (insn);
2776         }
2777
2778       /* Forget everything at the end of a basic block.  */
2779       cselib_clear_table ();
2780     }
2781
2782   cselib_finish ();
2783
2784   return changed;
2785 }
2786
2787 /* Similar to get_condition, only the resulting condition must be
2788    valid at JUMP, instead of at EARLIEST.
2789
2790    This differs from noce_get_condition in ifcvt.c in that we prefer not to
2791    settle for the condition variable in the jump instruction being integral.
2792    We prefer to be able to record the value of a user variable, rather than
2793    the value of a temporary used in a condition.  This could be solved by
2794    recording the value of *every* register scanned by canonicalize_condition,
2795    but this would require some code reorganization.  */
2796
2797 rtx
2798 fis_get_condition (rtx jump)
2799 {
2800   return get_condition (jump, NULL, false, true);
2801 }
2802
2803 /* Check the comparison COND to see if we can safely form an implicit set from
2804    it.  COND is either an EQ or NE comparison.  */
2805
2806 static bool
2807 implicit_set_cond_p (const_rtx cond)
2808 {
2809   const enum machine_mode mode = GET_MODE (XEXP (cond, 0));
2810   const_rtx cst = XEXP (cond, 1);
2811
2812   /* We can't perform this optimization if either operand might be or might
2813      contain a signed zero.  */
2814   if (HONOR_SIGNED_ZEROS (mode))
2815     {
2816       /* It is sufficient to check if CST is or contains a zero.  We must
2817          handle float, complex, and vector.  If any subpart is a zero, then
2818          the optimization can't be performed.  */
2819       /* ??? The complex and vector checks are not implemented yet.  We just
2820          always return zero for them.  */
2821       if (GET_CODE (cst) == CONST_DOUBLE)
2822         {
2823           REAL_VALUE_TYPE d;
2824           REAL_VALUE_FROM_CONST_DOUBLE (d, cst);
2825           if (REAL_VALUES_EQUAL (d, dconst0))
2826             return 0;
2827         }
2828       else
2829         return 0;
2830     }
2831
2832   return gcse_constant_p (cst);
2833 }
2834
2835 /* Find the implicit sets of a function.  An "implicit set" is a constraint
2836    on the value of a variable, implied by a conditional jump.  For example,
2837    following "if (x == 2)", the then branch may be optimized as though the
2838    conditional performed an "explicit set", in this example, "x = 2".  This
2839    function records the set patterns that are implicit at the start of each
2840    basic block.
2841
2842    FIXME: This would be more effective if critical edges are pre-split.  As
2843           it is now, we can't record implicit sets for blocks that have
2844           critical successor edges.  This results in missed optimizations
2845           and in more (unnecessary) work in cfgcleanup.c:thread_jump().  */
2846
2847 static void
2848 find_implicit_sets (void)
2849 {
2850   basic_block bb, dest;
2851   unsigned int count;
2852   rtx cond, new_rtx;
2853
2854   count = 0;
2855   FOR_EACH_BB (bb)
2856     /* Check for more than one successor.  */
2857     if (EDGE_COUNT (bb->succs) > 1)
2858       {
2859         cond = fis_get_condition (BB_END (bb));
2860
2861         if (cond
2862             && (GET_CODE (cond) == EQ || GET_CODE (cond) == NE)
2863             && REG_P (XEXP (cond, 0))
2864             && REGNO (XEXP (cond, 0)) >= FIRST_PSEUDO_REGISTER
2865             && implicit_set_cond_p (cond))
2866           {
2867             dest = GET_CODE (cond) == EQ ? BRANCH_EDGE (bb)->dest
2868                                          : FALLTHRU_EDGE (bb)->dest;
2869
2870             if (dest
2871                 /* Record nothing for a critical edge.  */
2872                 && single_pred_p (dest)
2873                 && dest != EXIT_BLOCK_PTR)
2874               {
2875                 new_rtx = gen_rtx_SET (VOIDmode, XEXP (cond, 0),
2876                                              XEXP (cond, 1));
2877                 implicit_sets[dest->index] = new_rtx;
2878                 if (dump_file)
2879                   {
2880                     fprintf(dump_file, "Implicit set of reg %d in ",
2881                             REGNO (XEXP (cond, 0)));
2882                     fprintf(dump_file, "basic block %d\n", dest->index);
2883                   }
2884                 count++;
2885               }
2886           }
2887       }
2888
2889   if (dump_file)
2890     fprintf (dump_file, "Found %d implicit sets\n", count);
2891 }
2892
2893 /* Bypass conditional jumps.  */
2894
2895 /* The value of last_basic_block at the beginning of the jump_bypass
2896    pass.  The use of redirect_edge_and_branch_force may introduce new
2897    basic blocks, but the data flow analysis is only valid for basic
2898    block indices less than bypass_last_basic_block.  */
2899
2900 static int bypass_last_basic_block;
2901
2902 /* Find a set of REGNO to a constant that is available at the end of basic
2903    block BB.  Returns NULL if no such set is found.  Based heavily upon
2904    find_avail_set.  */
2905
2906 static struct expr *
2907 find_bypass_set (int regno, int bb)
2908 {
2909   struct expr *result = 0;
2910
2911   for (;;)
2912     {
2913       rtx src;
2914       struct expr *set = lookup_set (regno, &set_hash_table);
2915
2916       while (set)
2917         {
2918           if (TEST_BIT (cprop_avout[bb], set->bitmap_index))
2919             break;
2920           set = next_set (regno, set);
2921         }
2922
2923       if (set == 0)
2924         break;
2925
2926       gcc_assert (GET_CODE (set->expr) == SET);
2927
2928       src = SET_SRC (set->expr);
2929       if (gcse_constant_p (src))
2930         result = set;
2931
2932       if (! REG_P (src))
2933         break;
2934
2935       regno = REGNO (src);
2936     }
2937   return result;
2938 }
2939
2940
2941 /* Subroutine of bypass_block that checks whether a pseudo is killed by
2942    any of the instructions inserted on an edge.  Jump bypassing places
2943    condition code setters on CFG edges using insert_insn_on_edge.  This
2944    function is required to check that our data flow analysis is still
2945    valid prior to commit_edge_insertions.  */
2946
2947 static bool
2948 reg_killed_on_edge (const_rtx reg, const_edge e)
2949 {
2950   rtx insn;
2951
2952   for (insn = e->insns.r; insn; insn = NEXT_INSN (insn))
2953     if (INSN_P (insn) && reg_set_p (reg, insn))
2954       return true;
2955
2956   return false;
2957 }
2958
2959 /* Subroutine of bypass_conditional_jumps that attempts to bypass the given
2960    basic block BB which has more than one predecessor.  If not NULL, SETCC
2961    is the first instruction of BB, which is immediately followed by JUMP_INSN
2962    JUMP.  Otherwise, SETCC is NULL, and JUMP is the first insn of BB.
2963    Returns nonzero if a change was made.
2964
2965    During the jump bypassing pass, we may place copies of SETCC instructions
2966    on CFG edges.  The following routine must be careful to pay attention to
2967    these inserted insns when performing its transformations.  */
2968
2969 static int
2970 bypass_block (basic_block bb, rtx setcc, rtx jump)
2971 {
2972   rtx insn, note;
2973   edge e, edest;
2974   int i, change;
2975   int may_be_loop_header;
2976   unsigned removed_p;
2977   edge_iterator ei;
2978
2979   insn = (setcc != NULL) ? setcc : jump;
2980
2981   /* Determine set of register uses in INSN.  */
2982   reg_use_count = 0;
2983   note_uses (&PATTERN (insn), find_used_regs, NULL);
2984   note = find_reg_equal_equiv_note (insn);
2985   if (note)
2986     find_used_regs (&XEXP (note, 0), NULL);
2987
2988   may_be_loop_header = false;
2989   FOR_EACH_EDGE (e, ei, bb->preds)
2990     if (e->flags & EDGE_DFS_BACK)
2991       {
2992         may_be_loop_header = true;
2993         break;
2994       }
2995
2996   change = 0;
2997   for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei)); )
2998     {
2999       removed_p = 0;
3000
3001       if (e->flags & EDGE_COMPLEX)
3002         {
3003           ei_next (&ei);
3004           continue;
3005         }
3006
3007       /* We can't redirect edges from new basic blocks.  */
3008       if (e->src->index >= bypass_last_basic_block)
3009         {
3010           ei_next (&ei);
3011           continue;
3012         }
3013
3014       /* The irreducible loops created by redirecting of edges entering the
3015          loop from outside would decrease effectiveness of some of the following
3016          optimizations, so prevent this.  */
3017       if (may_be_loop_header
3018           && !(e->flags & EDGE_DFS_BACK))
3019         {
3020           ei_next (&ei);
3021           continue;
3022         }
3023
3024       for (i = 0; i < reg_use_count; i++)
3025         {
3026           struct reg_use *reg_used = &reg_use_table[i];
3027           unsigned int regno = REGNO (reg_used->reg_rtx);
3028           basic_block dest, old_dest;
3029           struct expr *set;
3030           rtx src, new_rtx;
3031
3032           set = find_bypass_set (regno, e->src->index);
3033
3034           if (! set)
3035             continue;
3036
3037           /* Check the data flow is valid after edge insertions.  */
3038           if (e->insns.r && reg_killed_on_edge (reg_used->reg_rtx, e))
3039             continue;
3040
3041           src = SET_SRC (pc_set (jump));
3042
3043           if (setcc != NULL)
3044             src = simplify_replace_rtx (src,
3045                                         SET_DEST (PATTERN (setcc)),
3046                                         SET_SRC (PATTERN (setcc)));
3047
3048           new_rtx = simplify_replace_rtx (src, reg_used->reg_rtx,
3049                                           SET_SRC (set->expr));
3050
3051           /* Jump bypassing may have already placed instructions on
3052              edges of the CFG.  We can't bypass an outgoing edge that
3053              has instructions associated with it, as these insns won't
3054              get executed if the incoming edge is redirected.  */
3055
3056           if (new_rtx == pc_rtx)
3057             {
3058               edest = FALLTHRU_EDGE (bb);
3059               dest = edest->insns.r ? NULL : edest->dest;
3060             }
3061           else if (GET_CODE (new_rtx) == LABEL_REF)
3062             {
3063               dest = BLOCK_FOR_INSN (XEXP (new_rtx, 0));
3064               /* Don't bypass edges containing instructions.  */
3065               edest = find_edge (bb, dest);
3066               if (edest && edest->insns.r)
3067                 dest = NULL;
3068             }
3069           else
3070             dest = NULL;
3071
3072           /* Avoid unification of the edge with other edges from original
3073              branch.  We would end up emitting the instruction on "both"
3074              edges.  */
3075
3076           if (dest && setcc && !CC0_P (SET_DEST (PATTERN (setcc)))
3077               && find_edge (e->src, dest))
3078             dest = NULL;
3079
3080           old_dest = e->dest;
3081           if (dest != NULL
3082               && dest != old_dest
3083               && dest != EXIT_BLOCK_PTR)
3084             {
3085               redirect_edge_and_branch_force (e, dest);
3086
3087               /* Copy the register setter to the redirected edge.
3088                  Don't copy CC0 setters, as CC0 is dead after jump.  */
3089               if (setcc)
3090                 {
3091                   rtx pat = PATTERN (setcc);
3092                   if (!CC0_P (SET_DEST (pat)))
3093                     insert_insn_on_edge (copy_insn (pat), e);
3094                 }
3095
3096               if (dump_file != NULL)
3097                 {
3098                   fprintf (dump_file, "JUMP-BYPASS: Proved reg %d "
3099                                       "in jump_insn %d equals constant ",
3100                            regno, INSN_UID (jump));
3101                   print_rtl (dump_file, SET_SRC (set->expr));
3102                   fprintf (dump_file, "\nBypass edge from %d->%d to %d\n",
3103                            e->src->index, old_dest->index, dest->index);
3104                 }
3105               change = 1;
3106               removed_p = 1;
3107               break;
3108             }
3109         }
3110       if (!removed_p)
3111         ei_next (&ei);
3112     }
3113   return change;
3114 }
3115
3116 /* Find basic blocks with more than one predecessor that only contain a
3117    single conditional jump.  If the result of the comparison is known at
3118    compile-time from any incoming edge, redirect that edge to the
3119    appropriate target.  Returns nonzero if a change was made.
3120
3121    This function is now mis-named, because we also handle indirect jumps.  */
3122
3123 static int
3124 bypass_conditional_jumps (void)
3125 {
3126   basic_block bb;
3127   int changed;
3128   rtx setcc;
3129   rtx insn;
3130   rtx dest;
3131
3132   /* Note we start at block 1.  */
3133   if (ENTRY_BLOCK_PTR->next_bb == EXIT_BLOCK_PTR)
3134     return 0;
3135
3136   bypass_last_basic_block = last_basic_block;
3137   mark_dfs_back_edges ();
3138
3139   changed = 0;
3140   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR->next_bb->next_bb,
3141                   EXIT_BLOCK_PTR, next_bb)
3142     {
3143       /* Check for more than one predecessor.  */
3144       if (!single_pred_p (bb))
3145         {
3146           setcc = NULL_RTX;
3147           FOR_BB_INSNS (bb, insn)
3148             if (DEBUG_INSN_P (insn))
3149               continue;
3150             else if (NONJUMP_INSN_P (insn))
3151               {
3152                 if (setcc)
3153                   break;
3154                 if (GET_CODE (PATTERN (insn)) != SET)
3155                   break;
3156
3157                 dest = SET_DEST (PATTERN (insn));
3158                 if (REG_P (dest) || CC0_P (dest))
3159                   setcc = insn;
3160                 else
3161                   break;
3162               }
3163             else if (JUMP_P (insn))
3164               {
3165                 if ((any_condjump_p (insn) || computed_jump_p (insn))
3166                     && onlyjump_p (insn))
3167                   changed |= bypass_block (bb, setcc, insn);
3168                 break;
3169               }
3170             else if (INSN_P (insn))
3171               break;
3172         }
3173     }
3174
3175   /* If we bypassed any register setting insns, we inserted a
3176      copy on the redirected edge.  These need to be committed.  */
3177   if (changed)
3178     commit_edge_insertions ();
3179
3180   return changed;
3181 }
3182 \f
3183 /* Compute PRE+LCM working variables.  */
3184
3185 /* Local properties of expressions.  */
3186 /* Nonzero for expressions that are transparent in the block.  */
3187 static sbitmap *transp;
3188
3189 /* Nonzero for expressions that are transparent at the end of the block.
3190    This is only zero for expressions killed by abnormal critical edge
3191    created by a calls.  */
3192 static sbitmap *transpout;
3193
3194 /* Nonzero for expressions that are computed (available) in the block.  */
3195 static sbitmap *comp;
3196
3197 /* Nonzero for expressions that are locally anticipatable in the block.  */
3198 static sbitmap *antloc;
3199
3200 /* Nonzero for expressions where this block is an optimal computation
3201    point.  */
3202 static sbitmap *pre_optimal;
3203
3204 /* Nonzero for expressions which are redundant in a particular block.  */
3205 static sbitmap *pre_redundant;
3206
3207 /* Nonzero for expressions which should be inserted on a specific edge.  */
3208 static sbitmap *pre_insert_map;
3209
3210 /* Nonzero for expressions which should be deleted in a specific block.  */
3211 static sbitmap *pre_delete_map;
3212
3213 /* Contains the edge_list returned by pre_edge_lcm.  */
3214 static struct edge_list *edge_list;
3215
3216 /* Allocate vars used for PRE analysis.  */
3217
3218 static void
3219 alloc_pre_mem (int n_blocks, int n_exprs)
3220 {
3221   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
3222   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
3223   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
3224
3225   pre_optimal = NULL;
3226   pre_redundant = NULL;
3227   pre_insert_map = NULL;
3228   pre_delete_map = NULL;
3229   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
3230
3231   /* pre_insert and pre_delete are allocated later.  */
3232 }
3233
3234 /* Free vars used for PRE analysis.  */
3235
3236 static void
3237 free_pre_mem (void)
3238 {
3239   sbitmap_vector_free (transp);
3240   sbitmap_vector_free (comp);
3241
3242   /* ANTLOC and AE_KILL are freed just after pre_lcm finishes.  */
3243
3244   if (pre_optimal)
3245     sbitmap_vector_free (pre_optimal);
3246   if (pre_redundant)
3247     sbitmap_vector_free (pre_redundant);
3248   if (pre_insert_map)
3249     sbitmap_vector_free (pre_insert_map);
3250   if (pre_delete_map)
3251     sbitmap_vector_free (pre_delete_map);
3252
3253   transp = comp = NULL;
3254   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
3255 }
3256
3257 /* Top level routine to do the dataflow analysis needed by PRE.  */
3258
3259 static void
3260 compute_pre_data (void)
3261 {
3262   sbitmap trapping_expr;
3263   basic_block bb;
3264   unsigned int ui;
3265
3266   compute_local_properties (transp, comp, antloc, &expr_hash_table);
3267   sbitmap_vector_zero (ae_kill, last_basic_block);
3268
3269   /* Collect expressions which might trap.  */
3270   trapping_expr = sbitmap_alloc (expr_hash_table.n_elems);
3271   sbitmap_zero (trapping_expr);
3272   for (ui = 0; ui < expr_hash_table.size; ui++)
3273     {
3274       struct expr *e;
3275       for (e = expr_hash_table.table[ui]; e != NULL; e = e->next_same_hash)
3276         if (may_trap_p (e->expr))
3277           SET_BIT (trapping_expr, e->bitmap_index);
3278     }
3279
3280   /* Compute ae_kill for each basic block using:
3281
3282      ~(TRANSP | COMP)
3283   */
3284
3285   FOR_EACH_BB (bb)
3286     {
3287       edge e;
3288       edge_iterator ei;
3289
3290       /* If the current block is the destination of an abnormal edge, we
3291          kill all trapping expressions because we won't be able to properly
3292          place the instruction on the edge.  So make them neither
3293          anticipatable nor transparent.  This is fairly conservative.  */
3294       FOR_EACH_EDGE (e, ei, bb->preds)
3295         if (e->flags & EDGE_ABNORMAL)
3296           {
3297             sbitmap_difference (antloc[bb->index], antloc[bb->index], trapping_expr);
3298             sbitmap_difference (transp[bb->index], transp[bb->index], trapping_expr);
3299             break;
3300           }
3301
3302       sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
3303       sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
3304     }
3305
3306   edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
3307                             ae_kill, &pre_insert_map, &pre_delete_map);
3308   sbitmap_vector_free (antloc);
3309   antloc = NULL;
3310   sbitmap_vector_free (ae_kill);
3311   ae_kill = NULL;
3312   sbitmap_free (trapping_expr);
3313 }
3314 \f
3315 /* PRE utilities */
3316
3317 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
3318    block BB.
3319
3320    VISITED is a pointer to a working buffer for tracking which BB's have
3321    been visited.  It is NULL for the top-level call.
3322
3323    We treat reaching expressions that go through blocks containing the same
3324    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
3325    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
3326    2 as not reaching.  The intent is to improve the probability of finding
3327    only one reaching expression and to reduce register lifetimes by picking
3328    the closest such expression.  */
3329
3330 static int
3331 pre_expr_reaches_here_p_work (basic_block occr_bb, struct expr *expr, basic_block bb, char *visited)
3332 {
3333   edge pred;
3334   edge_iterator ei;
3335
3336   FOR_EACH_EDGE (pred, ei, bb->preds)
3337     {
3338       basic_block pred_bb = pred->src;
3339
3340       if (pred->src == ENTRY_BLOCK_PTR
3341           /* Has predecessor has already been visited?  */
3342           || visited[pred_bb->index])
3343         ;/* Nothing to do.  */
3344
3345       /* Does this predecessor generate this expression?  */
3346       else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index))
3347         {
3348           /* Is this the occurrence we're looking for?
3349              Note that there's only one generating occurrence per block
3350              so we just need to check the block number.  */
3351           if (occr_bb == pred_bb)
3352             return 1;
3353
3354           visited[pred_bb->index] = 1;
3355         }
3356       /* Ignore this predecessor if it kills the expression.  */
3357       else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index))
3358         visited[pred_bb->index] = 1;
3359
3360       /* Neither gen nor kill.  */
3361       else
3362         {
3363           visited[pred_bb->index] = 1;
3364           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
3365             return 1;
3366         }
3367     }
3368
3369   /* All paths have been checked.  */
3370   return 0;
3371 }
3372
3373 /* The wrapper for pre_expr_reaches_here_work that ensures that any
3374    memory allocated for that function is returned.  */
3375
3376 static int
3377 pre_expr_reaches_here_p (basic_block occr_bb, struct expr *expr, basic_block bb)
3378 {
3379   int rval;
3380   char *visited = XCNEWVEC (char, last_basic_block);
3381
3382   rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
3383
3384   free (visited);
3385   return rval;
3386 }
3387 \f
3388
3389 /* Given an expr, generate RTL which we can insert at the end of a BB,
3390    or on an edge.  Set the block number of any insns generated to
3391    the value of BB.  */
3392
3393 static rtx
3394 process_insert_insn (struct expr *expr)
3395 {
3396   rtx reg = expr->reaching_reg;
3397   rtx exp = copy_rtx (expr->expr);
3398   rtx pat;
3399
3400   start_sequence ();
3401
3402   /* If the expression is something that's an operand, like a constant,
3403      just copy it to a register.  */
3404   if (general_operand (exp, GET_MODE (reg)))
3405     emit_move_insn (reg, exp);
3406
3407   /* Otherwise, make a new insn to compute this expression and make sure the
3408      insn will be recognized (this also adds any needed CLOBBERs).  Copy the
3409      expression to make sure we don't have any sharing issues.  */
3410   else
3411     {
3412       rtx insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp));
3413
3414       if (insn_invalid_p (insn))
3415         gcc_unreachable ();
3416     }
3417
3418
3419   pat = get_insns ();
3420   end_sequence ();
3421
3422   return pat;
3423 }
3424
3425 /* Add EXPR to the end of basic block BB.
3426
3427    This is used by both the PRE and code hoisting.
3428
3429    For PRE, we want to verify that the expr is either transparent
3430    or locally anticipatable in the target block.  This check makes
3431    no sense for code hoisting.  */
3432
3433 static void
3434 insert_insn_end_basic_block (struct expr *expr, basic_block bb, int pre)
3435 {
3436   rtx insn = BB_END (bb);
3437   rtx new_insn;
3438   rtx reg = expr->reaching_reg;
3439   int regno = REGNO (reg);
3440   rtx pat, pat_end;
3441
3442   pat = process_insert_insn (expr);
3443   gcc_assert (pat && INSN_P (pat));
3444
3445   pat_end = pat;
3446   while (NEXT_INSN (pat_end) != NULL_RTX)
3447     pat_end = NEXT_INSN (pat_end);
3448
3449   /* If the last insn is a jump, insert EXPR in front [taking care to
3450      handle cc0, etc. properly].  Similarly we need to care trapping
3451      instructions in presence of non-call exceptions.  */
3452
3453   if (JUMP_P (insn)
3454       || (NONJUMP_INSN_P (insn)
3455           && (!single_succ_p (bb)
3456               || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
3457     {
3458 #ifdef HAVE_cc0
3459       rtx note;
3460 #endif
3461       /* It should always be the case that we can put these instructions
3462          anywhere in the basic block with performing PRE optimizations.
3463          Check this.  */
3464       gcc_assert (!NONJUMP_INSN_P (insn) || !pre
3465                   || TEST_BIT (antloc[bb->index], expr->bitmap_index)
3466                   || TEST_BIT (transp[bb->index], expr->bitmap_index));
3467
3468       /* If this is a jump table, then we can't insert stuff here.  Since
3469          we know the previous real insn must be the tablejump, we insert
3470          the new instruction just before the tablejump.  */
3471       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
3472           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
3473         insn = prev_real_insn (insn);
3474
3475 #ifdef HAVE_cc0
3476       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
3477          if cc0 isn't set.  */
3478       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
3479       if (note)
3480         insn = XEXP (note, 0);
3481       else
3482         {
3483           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
3484           if (maybe_cc0_setter
3485               && INSN_P (maybe_cc0_setter)
3486               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
3487             insn = maybe_cc0_setter;
3488         }
3489 #endif
3490       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
3491       new_insn = emit_insn_before_noloc (pat, insn, bb);
3492     }
3493
3494   /* Likewise if the last insn is a call, as will happen in the presence
3495      of exception handling.  */
3496   else if (CALL_P (insn)
3497            && (!single_succ_p (bb)
3498                || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
3499     {
3500       /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
3501          we search backward and place the instructions before the first
3502          parameter is loaded.  Do this for everyone for consistency and a
3503          presumption that we'll get better code elsewhere as well.
3504
3505          It should always be the case that we can put these instructions
3506          anywhere in the basic block with performing PRE optimizations.
3507          Check this.  */
3508
3509       gcc_assert (!pre
3510                   || TEST_BIT (antloc[bb->index], expr->bitmap_index)
3511                   || TEST_BIT (transp[bb->index], expr->bitmap_index));
3512
3513       /* Since different machines initialize their parameter registers
3514          in different orders, assume nothing.  Collect the set of all
3515          parameter registers.  */
3516       insn = find_first_parameter_load (insn, BB_HEAD (bb));
3517
3518       /* If we found all the parameter loads, then we want to insert
3519          before the first parameter load.
3520
3521          If we did not find all the parameter loads, then we might have
3522          stopped on the head of the block, which could be a CODE_LABEL.
3523          If we inserted before the CODE_LABEL, then we would be putting
3524          the insn in the wrong basic block.  In that case, put the insn
3525          after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
3526       while (LABEL_P (insn)
3527              || NOTE_INSN_BASIC_BLOCK_P (insn))
3528         insn = NEXT_INSN (insn);
3529
3530       new_insn = emit_insn_before_noloc (pat, insn, bb);
3531     }
3532   else
3533     new_insn = emit_insn_after_noloc (pat, insn, bb);
3534
3535   while (1)
3536     {
3537       if (INSN_P (pat))
3538         add_label_notes (PATTERN (pat), new_insn);
3539       if (pat == pat_end)
3540         break;
3541       pat = NEXT_INSN (pat);
3542     }
3543
3544   gcse_create_count++;
3545
3546   if (dump_file)
3547     {
3548       fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
3549                bb->index, INSN_UID (new_insn));
3550       fprintf (dump_file, "copying expression %d to reg %d\n",
3551                expr->bitmap_index, regno);
3552     }
3553 }
3554
3555 /* Insert partially redundant expressions on edges in the CFG to make
3556    the expressions fully redundant.  */
3557
3558 static int
3559 pre_edge_insert (struct edge_list *edge_list, struct expr **index_map)
3560 {
3561   int e, i, j, num_edges, set_size, did_insert = 0;
3562   sbitmap *inserted;
3563
3564   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
3565      if it reaches any of the deleted expressions.  */
3566
3567   set_size = pre_insert_map[0]->size;
3568   num_edges = NUM_EDGES (edge_list);
3569   inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
3570   sbitmap_vector_zero (inserted, num_edges);
3571
3572   for (e = 0; e < num_edges; e++)
3573     {
3574       int indx;
3575       basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
3576
3577       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
3578         {
3579           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
3580
3581           for (j = indx; insert && j < (int) expr_hash_table.n_elems; j++, insert >>= 1)
3582             if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
3583               {
3584                 struct expr *expr = index_map[j];
3585                 struct occr *occr;
3586
3587                 /* Now look at each deleted occurrence of this expression.  */
3588                 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
3589                   {
3590                     if (! occr->deleted_p)
3591                       continue;
3592
3593                     /* Insert this expression on this edge if it would
3594                        reach the deleted occurrence in BB.  */
3595                     if (!TEST_BIT (inserted[e], j))
3596                       {
3597                         rtx insn;
3598                         edge eg = INDEX_EDGE (edge_list, e);
3599
3600                         /* We can't insert anything on an abnormal and
3601                            critical edge, so we insert the insn at the end of
3602                            the previous block. There are several alternatives
3603                            detailed in Morgans book P277 (sec 10.5) for
3604                            handling this situation.  This one is easiest for
3605                            now.  */
3606
3607                         if (eg->flags & EDGE_ABNORMAL)
3608                           insert_insn_end_basic_block (index_map[j], bb, 0);
3609                         else
3610                           {
3611                             insn = process_insert_insn (index_map[j]);
3612                             insert_insn_on_edge (insn, eg);
3613                           }
3614
3615                         if (dump_file)
3616                           {
3617                             fprintf (dump_file, "PRE: edge (%d,%d), ",
3618                                      bb->index,
3619                                      INDEX_EDGE_SUCC_BB (edge_list, e)->index);
3620                             fprintf (dump_file, "copy expression %d\n",
3621                                      expr->bitmap_index);
3622                           }
3623
3624                         update_ld_motion_stores (expr);
3625                         SET_BIT (inserted[e], j);
3626                         did_insert = 1;
3627                         gcse_create_count++;
3628                       }
3629                   }
3630               }
3631         }
3632     }
3633
3634   sbitmap_vector_free (inserted);
3635   return did_insert;
3636 }
3637
3638 /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
3639    Given "old_reg <- expr" (INSN), instead of adding after it
3640      reaching_reg <- old_reg
3641    it's better to do the following:
3642      reaching_reg <- expr
3643      old_reg      <- reaching_reg
3644    because this way copy propagation can discover additional PRE
3645    opportunities.  But if this fails, we try the old way.
3646    When "expr" is a store, i.e.
3647    given "MEM <- old_reg", instead of adding after it
3648      reaching_reg <- old_reg
3649    it's better to add it before as follows:
3650      reaching_reg <- old_reg
3651      MEM          <- reaching_reg.  */
3652
3653 static void
3654 pre_insert_copy_insn (struct expr *expr, rtx insn)
3655 {
3656   rtx reg = expr->reaching_reg;
3657   int regno = REGNO (reg);
3658   int indx = expr->bitmap_index;
3659   rtx pat = PATTERN (insn);
3660   rtx set, first_set, new_insn;
3661   rtx old_reg;
3662   int i;
3663
3664   /* This block matches the logic in hash_scan_insn.  */
3665   switch (GET_CODE (pat))
3666     {
3667     case SET:
3668       set = pat;
3669       break;
3670
3671     case PARALLEL:
3672       /* Search through the parallel looking for the set whose
3673          source was the expression that we're interested in.  */
3674       first_set = NULL_RTX;
3675       set = NULL_RTX;
3676       for (i = 0; i < XVECLEN (pat, 0); i++)
3677         {
3678           rtx x = XVECEXP (pat, 0, i);
3679           if (GET_CODE (x) == SET)
3680             {
3681               /* If the source was a REG_EQUAL or REG_EQUIV note, we
3682                  may not find an equivalent expression, but in this
3683                  case the PARALLEL will have a single set.  */
3684               if (first_set == NULL_RTX)
3685                 first_set = x;
3686               if (expr_equiv_p (SET_SRC (x), expr->expr))
3687                 {
3688                   set = x;
3689                   break;
3690                 }
3691             }
3692         }
3693
3694       gcc_assert (first_set);
3695       if (set == NULL_RTX)
3696         set = first_set;
3697       break;
3698
3699     default:
3700       gcc_unreachable ();
3701     }
3702
3703   if (REG_P (SET_DEST (set)))
3704     {
3705       old_reg = SET_DEST (set);
3706       /* Check if we can modify the set destination in the original insn.  */
3707       if (validate_change (insn, &SET_DEST (set), reg, 0))
3708         {
3709           new_insn = gen_move_insn (old_reg, reg);
3710           new_insn = emit_insn_after (new_insn, insn);
3711         }
3712       else
3713         {
3714           new_insn = gen_move_insn (reg, old_reg);
3715           new_insn = emit_insn_after (new_insn, insn);
3716         }
3717     }
3718   else /* This is possible only in case of a store to memory.  */
3719     {
3720       old_reg = SET_SRC (set);
3721       new_insn = gen_move_insn (reg, old_reg);
3722
3723       /* Check if we can modify the set source in the original insn.  */
3724       if (validate_change (insn, &SET_SRC (set), reg, 0))
3725         new_insn = emit_insn_before (new_insn, insn);
3726       else
3727         new_insn = emit_insn_after (new_insn, insn);
3728     }
3729
3730   gcse_create_count++;
3731
3732   if (dump_file)
3733     fprintf (dump_file,
3734              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
3735               BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
3736               INSN_UID (insn), regno);
3737 }
3738
3739 /* Copy available expressions that reach the redundant expression
3740    to `reaching_reg'.  */
3741
3742 static void
3743 pre_insert_copies (void)
3744 {
3745   unsigned int i, added_copy;
3746   struct expr *expr;
3747   struct occr *occr;
3748   struct occr *avail;
3749
3750   /* For each available expression in the table, copy the result to
3751      `reaching_reg' if the expression reaches a deleted one.
3752
3753      ??? The current algorithm is rather brute force.
3754      Need to do some profiling.  */
3755
3756   for (i = 0; i < expr_hash_table.size; i++)
3757     for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
3758       {
3759         /* If the basic block isn't reachable, PPOUT will be TRUE.  However,
3760            we don't want to insert a copy here because the expression may not
3761            really be redundant.  So only insert an insn if the expression was
3762            deleted.  This test also avoids further processing if the
3763            expression wasn't deleted anywhere.  */
3764         if (expr->reaching_reg == NULL)
3765           continue;
3766
3767         /* Set when we add a copy for that expression.  */
3768         added_copy = 0;
3769
3770         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
3771           {
3772             if (! occr->deleted_p)
3773               continue;
3774
3775             for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
3776               {
3777                 rtx insn = avail->insn;
3778
3779                 /* No need to handle this one if handled already.  */
3780                 if (avail->copied_p)
3781                   continue;
3782
3783                 /* Don't handle this one if it's a redundant one.  */
3784                 if (INSN_DELETED_P (insn))
3785                   continue;
3786
3787                 /* Or if the expression doesn't reach the deleted one.  */
3788                 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
3789                                                expr,
3790                                                BLOCK_FOR_INSN (occr->insn)))
3791                   continue;
3792
3793                 added_copy = 1;
3794
3795                 /* Copy the result of avail to reaching_reg.  */
3796                 pre_insert_copy_insn (expr, insn);
3797                 avail->copied_p = 1;
3798               }
3799           }
3800
3801           if (added_copy)
3802             update_ld_motion_stores (expr);
3803       }
3804 }
3805
3806 /* Emit move from SRC to DEST noting the equivalence with expression computed
3807    in INSN.  */
3808 static rtx
3809 gcse_emit_move_after (rtx src, rtx dest, rtx insn)
3810 {
3811   rtx new_rtx;
3812   rtx set = single_set (insn), set2;
3813   rtx note;
3814   rtx eqv;
3815
3816   /* This should never fail since we're creating a reg->reg copy
3817      we've verified to be valid.  */
3818
3819   new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
3820
3821   /* Note the equivalence for local CSE pass.  */
3822   set2 = single_set (new_rtx);
3823   if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
3824     return new_rtx;
3825   if ((note = find_reg_equal_equiv_note (insn)))
3826     eqv = XEXP (note, 0);
3827   else
3828     eqv = SET_SRC (set);
3829
3830   set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
3831
3832   return new_rtx;
3833 }
3834
3835 /* Delete redundant computations.
3836    Deletion is done by changing the insn to copy the `reaching_reg' of
3837    the expression into the result of the SET.  It is left to later passes
3838    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
3839
3840    Returns nonzero if a change is made.  */
3841
3842 static int
3843 pre_delete (void)
3844 {
3845   unsigned int i;
3846   int changed;
3847   struct expr *expr;
3848   struct occr *occr;
3849
3850   changed = 0;
3851   for (i = 0; i < expr_hash_table.size; i++)
3852     for (expr = expr_hash_table.table[i];
3853          expr != NULL;
3854          expr = expr->next_same_hash)
3855       {
3856         int indx = expr->bitmap_index;
3857
3858         /* We only need to search antic_occr since we require
3859            ANTLOC != 0.  */
3860
3861         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
3862           {
3863             rtx insn = occr->insn;
3864             rtx set;
3865             basic_block bb = BLOCK_FOR_INSN (insn);
3866
3867             /* We only delete insns that have a single_set.  */
3868             if (TEST_BIT (pre_delete_map[bb->index], indx)
3869                 && (set = single_set (insn)) != 0
3870                 && dbg_cnt (pre_insn))
3871               {
3872                 /* Create a pseudo-reg to store the result of reaching
3873                    expressions into.  Get the mode for the new pseudo from
3874                    the mode of the original destination pseudo.  */
3875                 if (expr->reaching_reg == NULL)
3876                   expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
3877
3878                 gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), insn);
3879                 delete_insn (insn);
3880                 occr->deleted_p = 1;
3881                 changed = 1;
3882                 gcse_subst_count++;
3883
3884                 if (dump_file)
3885                   {
3886                     fprintf (dump_file,
3887                              "PRE: redundant insn %d (expression %d) in ",
3888                                INSN_UID (insn), indx);
3889                     fprintf (dump_file, "bb %d, reaching reg is %d\n",
3890                              bb->index, REGNO (expr->reaching_reg));
3891                   }
3892               }
3893           }
3894       }
3895
3896   return changed;
3897 }
3898
3899 /* Perform GCSE optimizations using PRE.
3900    This is called by one_pre_gcse_pass after all the dataflow analysis
3901    has been done.
3902
3903    This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
3904    lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
3905    Compiler Design and Implementation.
3906
3907    ??? A new pseudo reg is created to hold the reaching expression.  The nice
3908    thing about the classical approach is that it would try to use an existing
3909    reg.  If the register can't be adequately optimized [i.e. we introduce
3910    reload problems], one could add a pass here to propagate the new register
3911    through the block.
3912
3913    ??? We don't handle single sets in PARALLELs because we're [currently] not
3914    able to copy the rest of the parallel when we insert copies to create full
3915    redundancies from partial redundancies.  However, there's no reason why we
3916    can't handle PARALLELs in the cases where there are no partial
3917    redundancies.  */
3918
3919 static int
3920 pre_gcse (void)
3921 {
3922   unsigned int i;
3923   int did_insert, changed;
3924   struct expr **index_map;
3925   struct expr *expr;
3926
3927   /* Compute a mapping from expression number (`bitmap_index') to
3928      hash table entry.  */
3929
3930   index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
3931   for (i = 0; i < expr_hash_table.size; i++)
3932     for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
3933       index_map[expr->bitmap_index] = expr;
3934
3935   /* Delete the redundant insns first so that
3936      - we know what register to use for the new insns and for the other
3937        ones with reaching expressions
3938      - we know which insns are redundant when we go to create copies  */
3939
3940   changed = pre_delete ();
3941   did_insert = pre_edge_insert (edge_list, index_map);
3942
3943   /* In other places with reaching expressions, copy the expression to the
3944      specially allocated pseudo-reg that reaches the redundant expr.  */
3945   pre_insert_copies ();
3946   if (did_insert)
3947     {
3948       commit_edge_insertions ();
3949       changed = 1;
3950     }
3951
3952   free (index_map);
3953   return changed;
3954 }
3955
3956 /* Top level routine to perform one PRE GCSE pass.
3957
3958    Return nonzero if a change was made.  */
3959
3960 static int
3961 one_pre_gcse_pass (void)
3962 {
3963   int changed = 0;
3964
3965   gcse_subst_count = 0;
3966   gcse_create_count = 0;
3967
3968   /* Return if there's nothing to do, or it is too expensive.  */
3969   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
3970       || is_too_expensive (_("PRE disabled")))
3971     return 0;
3972
3973   /* We need alias.  */
3974   init_alias_analysis ();
3975
3976   bytes_used = 0;
3977   gcc_obstack_init (&gcse_obstack);
3978   alloc_gcse_mem ();
3979
3980   alloc_hash_table (&expr_hash_table, 0);
3981   add_noreturn_fake_exit_edges ();
3982   if (flag_gcse_lm)
3983     compute_ld_motion_mems ();
3984
3985   compute_hash_table (&expr_hash_table);
3986   trim_ld_motion_mems ();
3987   if (dump_file)
3988     dump_hash_table (dump_file, "Expression", &expr_hash_table);
3989
3990   if (expr_hash_table.n_elems > 0)
3991     {
3992       alloc_pre_mem (last_basic_block, expr_hash_table.n_elems);
3993       compute_pre_data ();
3994       changed |= pre_gcse ();
3995       free_edge_list (edge_list);
3996       free_pre_mem ();
3997     }
3998
3999   free_ldst_mems ();
4000   remove_fake_exit_edges ();
4001   free_hash_table (&expr_hash_table);
4002
4003   free_gcse_mem ();
4004   obstack_free (&gcse_obstack, NULL);
4005
4006   /* We are finished with alias.  */
4007   end_alias_analysis ();
4008
4009   if (dump_file)
4010     {
4011       fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
4012                current_function_name (), n_basic_blocks, bytes_used);
4013       fprintf (dump_file, "%d substs, %d insns created\n",
4014                gcse_subst_count, gcse_create_count);
4015     }
4016
4017   return changed;
4018 }
4019 \f
4020 /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
4021    to INSN.  If such notes are added to an insn which references a
4022    CODE_LABEL, the LABEL_NUSES count is incremented.  We have to add
4023    that note, because the following loop optimization pass requires
4024    them.  */
4025
4026 /* ??? If there was a jump optimization pass after gcse and before loop,
4027    then we would not need to do this here, because jump would add the
4028    necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes.  */
4029
4030 static void
4031 add_label_notes (rtx x, rtx insn)
4032 {
4033   enum rtx_code code = GET_CODE (x);
4034   int i, j;
4035   const char *fmt;
4036
4037   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4038     {
4039       /* This code used to ignore labels that referred to dispatch tables to
4040          avoid flow generating (slightly) worse code.
4041
4042          We no longer ignore such label references (see LABEL_REF handling in
4043          mark_jump_label for additional information).  */
4044
4045       /* There's no reason for current users to emit jump-insns with
4046          such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
4047          notes.  */
4048       gcc_assert (!JUMP_P (insn));
4049       add_reg_note (insn, REG_LABEL_OPERAND, XEXP (x, 0));
4050
4051       if (LABEL_P (XEXP (x, 0)))
4052         LABEL_NUSES (XEXP (x, 0))++;
4053
4054       return;
4055     }
4056
4057   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
4058     {
4059       if (fmt[i] == 'e')
4060         add_label_notes (XEXP (x, i), insn);
4061       else if (fmt[i] == 'E')
4062         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4063           add_label_notes (XVECEXP (x, i, j), insn);
4064     }
4065 }
4066
4067 /* Compute transparent outgoing information for each block.
4068
4069    An expression is transparent to an edge unless it is killed by
4070    the edge itself.  This can only happen with abnormal control flow,
4071    when the edge is traversed through a call.  This happens with
4072    non-local labels and exceptions.
4073
4074    This would not be necessary if we split the edge.  While this is
4075    normally impossible for abnormal critical edges, with some effort
4076    it should be possible with exception handling, since we still have
4077    control over which handler should be invoked.  But due to increased
4078    EH table sizes, this may not be worthwhile.  */
4079
4080 static void
4081 compute_transpout (void)
4082 {
4083   basic_block bb;
4084   unsigned int i;
4085   struct expr *expr;
4086
4087   sbitmap_vector_ones (transpout, last_basic_block);
4088
4089   FOR_EACH_BB (bb)
4090     {
4091       /* Note that flow inserted a nop at the end of basic blocks that
4092          end in call instructions for reasons other than abnormal
4093          control flow.  */
4094       if (! CALL_P (BB_END (bb)))
4095         continue;
4096
4097       for (i = 0; i < expr_hash_table.size; i++)
4098         for (expr = expr_hash_table.table[i]; expr ; expr = expr->next_same_hash)
4099           if (MEM_P (expr->expr))
4100             {
4101               if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
4102                   && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
4103                 continue;
4104
4105               /* ??? Optimally, we would use interprocedural alias
4106                  analysis to determine if this mem is actually killed
4107                  by this call.  */
4108               RESET_BIT (transpout[bb->index], expr->bitmap_index);
4109             }
4110     }
4111 }
4112
4113 /* Code Hoisting variables and subroutines.  */
4114
4115 /* Very busy expressions.  */
4116 static sbitmap *hoist_vbein;
4117 static sbitmap *hoist_vbeout;
4118
4119 /* Hoistable expressions.  */
4120 static sbitmap *hoist_exprs;
4121
4122 /* ??? We could compute post dominators and run this algorithm in
4123    reverse to perform tail merging, doing so would probably be
4124    more effective than the tail merging code in jump.c.
4125
4126    It's unclear if tail merging could be run in parallel with
4127    code hoisting.  It would be nice.  */
4128
4129 /* Allocate vars used for code hoisting analysis.  */
4130
4131 static void
4132 alloc_code_hoist_mem (int n_blocks, int n_exprs)
4133 {
4134   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4135   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4136   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4137
4138   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
4139   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
4140   hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
4141   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
4142 }
4143
4144 /* Free vars used for code hoisting analysis.  */
4145
4146 static void
4147 free_code_hoist_mem (void)
4148 {
4149   sbitmap_vector_free (antloc);
4150   sbitmap_vector_free (transp);
4151   sbitmap_vector_free (comp);
4152
4153   sbitmap_vector_free (hoist_vbein);
4154   sbitmap_vector_free (hoist_vbeout);
4155   sbitmap_vector_free (hoist_exprs);
4156   sbitmap_vector_free (transpout);
4157
4158   free_dominance_info (CDI_DOMINATORS);
4159 }
4160
4161 /* Compute the very busy expressions at entry/exit from each block.
4162
4163    An expression is very busy if all paths from a given point
4164    compute the expression.  */
4165
4166 static void
4167 compute_code_hoist_vbeinout (void)
4168 {
4169   int changed, passes;
4170   basic_block bb;
4171
4172   sbitmap_vector_zero (hoist_vbeout, last_basic_block);
4173   sbitmap_vector_zero (hoist_vbein, last_basic_block);
4174
4175   passes = 0;
4176   changed = 1;
4177
4178   while (changed)
4179     {
4180       changed = 0;
4181
4182       /* We scan the blocks in the reverse order to speed up
4183          the convergence.  */
4184       FOR_EACH_BB_REVERSE (bb)
4185         {
4186           if (bb->next_bb != EXIT_BLOCK_PTR)
4187             sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
4188                                            hoist_vbein, bb->index);
4189
4190           changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
4191                                               antloc[bb->index],
4192                                               hoist_vbeout[bb->index],
4193                                               transp[bb->index]);
4194         }
4195
4196       passes++;
4197     }
4198
4199   if (dump_file)
4200     fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
4201 }
4202
4203 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
4204
4205 static void
4206 compute_code_hoist_data (void)
4207 {
4208   compute_local_properties (transp, comp, antloc, &expr_hash_table);
4209   compute_transpout ();
4210   compute_code_hoist_vbeinout ();
4211   calculate_dominance_info (CDI_DOMINATORS);
4212   if (dump_file)
4213     fprintf (dump_file, "\n");
4214 }
4215
4216 /* Determine if the expression identified by EXPR_INDEX would
4217    reach BB unimpared if it was placed at the end of EXPR_BB.
4218
4219    It's unclear exactly what Muchnick meant by "unimpared".  It seems
4220    to me that the expression must either be computed or transparent in
4221    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
4222    would allow the expression to be hoisted out of loops, even if
4223    the expression wasn't a loop invariant.
4224
4225    Contrast this to reachability for PRE where an expression is
4226    considered reachable if *any* path reaches instead of *all*
4227    paths.  */
4228
4229 static int
4230 hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb, char *visited)
4231 {
4232   edge pred;
4233   edge_iterator ei;
4234   int visited_allocated_locally = 0;
4235
4236
4237   if (visited == NULL)
4238     {
4239       visited_allocated_locally = 1;
4240       visited = XCNEWVEC (char, last_basic_block);
4241     }
4242
4243   FOR_EACH_EDGE (pred, ei, bb->preds)
4244     {
4245       basic_block pred_bb = pred->src;
4246
4247       if (pred->src == ENTRY_BLOCK_PTR)
4248         break;
4249       else if (pred_bb == expr_bb)
4250         continue;
4251       else if (visited[pred_bb->index])
4252         continue;
4253
4254       /* Does this predecessor generate this expression?  */
4255       else if (TEST_BIT (comp[pred_bb->index], expr_index))
4256         break;
4257       else if (! TEST_BIT (transp[pred_bb->index], expr_index))
4258         break;
4259
4260       /* Not killed.  */
4261       else
4262         {
4263           visited[pred_bb->index] = 1;
4264           if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
4265                                            pred_bb, visited))
4266             break;
4267         }
4268     }
4269   if (visited_allocated_locally)
4270     free (visited);
4271
4272   return (pred == NULL);
4273 }
4274 \f
4275 /* Actually perform code hoisting.  */
4276
4277 static int
4278 hoist_code (void)
4279 {
4280   basic_block bb, dominated;
4281   VEC (basic_block, heap) *domby;
4282   unsigned int i,j;
4283   struct expr **index_map;
4284   struct expr *expr;
4285   int changed = 0;
4286
4287   sbitmap_vector_zero (hoist_exprs, last_basic_block);
4288
4289   /* Compute a mapping from expression number (`bitmap_index') to
4290      hash table entry.  */
4291
4292   index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
4293   for (i = 0; i < expr_hash_table.size; i++)
4294     for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
4295       index_map[expr->bitmap_index] = expr;
4296
4297   /* Walk over each basic block looking for potentially hoistable
4298      expressions, nothing gets hoisted from the entry block.  */
4299   FOR_EACH_BB (bb)
4300     {
4301       int found = 0;
4302       int insn_inserted_p;
4303
4304       domby = get_dominated_by (CDI_DOMINATORS, bb);
4305       /* Examine each expression that is very busy at the exit of this
4306          block.  These are the potentially hoistable expressions.  */
4307       for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++)
4308         {
4309           int hoistable = 0;
4310
4311           if (TEST_BIT (hoist_vbeout[bb->index], i)
4312               && TEST_BIT (transpout[bb->index], i))
4313             {
4314               /* We've found a potentially hoistable expression, now
4315                  we look at every block BB dominates to see if it
4316                  computes the expression.  */
4317               for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++)
4318                 {
4319                   /* Ignore self dominance.  */
4320                   if (bb == dominated)
4321                     continue;
4322                   /* We've found a dominated block, now see if it computes
4323                      the busy expression and whether or not moving that
4324                      expression to the "beginning" of that block is safe.  */
4325                   if (!TEST_BIT (antloc[dominated->index], i))
4326                     continue;
4327
4328                   /* Note if the expression would reach the dominated block
4329                      unimpared if it was placed at the end of BB.
4330
4331                      Keep track of how many times this expression is hoistable
4332                      from a dominated block into BB.  */
4333                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
4334                     hoistable++;
4335                 }
4336
4337               /* If we found more than one hoistable occurrence of this
4338                  expression, then note it in the bitmap of expressions to
4339                  hoist.  It makes no sense to hoist things which are computed
4340                  in only one BB, and doing so tends to pessimize register
4341                  allocation.  One could increase this value to try harder
4342                  to avoid any possible code expansion due to register
4343                  allocation issues; however experiments have shown that
4344                  the vast majority of hoistable expressions are only movable
4345                  from two successors, so raising this threshold is likely
4346                  to nullify any benefit we get from code hoisting.  */
4347               if (hoistable > 1)
4348                 {
4349                   SET_BIT (hoist_exprs[bb->index], i);
4350                   found = 1;
4351                 }
4352             }
4353         }
4354       /* If we found nothing to hoist, then quit now.  */
4355       if (! found)
4356         {
4357           VEC_free (basic_block, heap, domby);
4358           continue;
4359         }
4360
4361       /* Loop over all the hoistable expressions.  */
4362       for (i = 0; i < hoist_exprs[bb->index]->n_bits; i++)
4363         {
4364           /* We want to insert the expression into BB only once, so
4365              note when we've inserted it.  */
4366           insn_inserted_p = 0;
4367
4368           /* These tests should be the same as the tests above.  */
4369           if (TEST_BIT (hoist_exprs[bb->index], i))
4370             {
4371               /* We've found a potentially hoistable expression, now
4372                  we look at every block BB dominates to see if it
4373                  computes the expression.  */
4374               for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++)
4375                 {
4376                   /* Ignore self dominance.  */
4377                   if (bb == dominated)
4378                     continue;
4379
4380                   /* We've found a dominated block, now see if it computes
4381                      the busy expression and whether or not moving that
4382                      expression to the "beginning" of that block is safe.  */
4383                   if (!TEST_BIT (antloc[dominated->index], i))
4384                     continue;
4385
4386                   /* The expression is computed in the dominated block and
4387                      it would be safe to compute it at the start of the
4388                      dominated block.  Now we have to determine if the
4389                      expression would reach the dominated block if it was
4390                      placed at the end of BB.  */
4391                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
4392                     {
4393                       struct expr *expr = index_map[i];
4394                       struct occr *occr = expr->antic_occr;
4395                       rtx insn;
4396                       rtx set;
4397
4398                       /* Find the right occurrence of this expression.  */
4399                       while (BLOCK_FOR_INSN (occr->insn) != dominated && occr)
4400                         occr = occr->next;
4401
4402                       gcc_assert (occr);
4403                       insn = occr->insn;
4404                       set = single_set (insn);
4405                       gcc_assert (set);
4406
4407                       /* Create a pseudo-reg to store the result of reaching
4408                          expressions into.  Get the mode for the new pseudo
4409                          from the mode of the original destination pseudo.  */
4410                       if (expr->reaching_reg == NULL)
4411                         expr->reaching_reg
4412                           = gen_reg_rtx_and_attrs (SET_DEST (set));
4413
4414                       gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), insn);
4415                       delete_insn (insn);
4416                       occr->deleted_p = 1;
4417                       changed = 1;
4418                       gcse_subst_count++;
4419
4420                       if (!insn_inserted_p)
4421                         {
4422                           insert_insn_end_basic_block (index_map[i], bb, 0);
4423                           insn_inserted_p = 1;
4424                         }
4425                     }
4426                 }
4427             }
4428         }
4429       VEC_free (basic_block, heap, domby);
4430     }
4431
4432   free (index_map);
4433
4434   return changed;
4435 }
4436
4437 /* Top level routine to perform one code hoisting (aka unification) pass
4438
4439    Return nonzero if a change was made.  */
4440
4441 static int
4442 one_code_hoisting_pass (void)
4443 {
4444   int changed = 0;
4445
4446   gcse_subst_count = 0;
4447   gcse_create_count = 0;
4448
4449   /* Return if there's nothing to do, or it is too expensive.  */
4450   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
4451       || is_too_expensive (_("GCSE disabled")))
4452     return 0;
4453
4454   /* We need alias.  */
4455   init_alias_analysis ();
4456
4457   bytes_used = 0;
4458   gcc_obstack_init (&gcse_obstack);
4459   alloc_gcse_mem ();
4460
4461   alloc_hash_table (&expr_hash_table, 0);
4462   compute_hash_table (&expr_hash_table);
4463   if (dump_file)
4464     dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
4465
4466   if (expr_hash_table.n_elems > 0)
4467     {
4468       alloc_code_hoist_mem (last_basic_block, expr_hash_table.n_elems);
4469       compute_code_hoist_data ();
4470       changed = hoist_code ();
4471       free_code_hoist_mem ();
4472     }
4473
4474   free_hash_table (&expr_hash_table);
4475   free_gcse_mem ();
4476   obstack_free (&gcse_obstack, NULL);
4477
4478   /* We are finished with alias.  */
4479   end_alias_analysis ();
4480
4481   if (dump_file)
4482     {
4483       fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
4484                current_function_name (), n_basic_blocks, bytes_used);
4485       fprintf (dump_file, "%d substs, %d insns created\n",
4486                gcse_subst_count, gcse_create_count);
4487     }
4488
4489   return changed;
4490 }
4491 \f
4492 /*  Here we provide the things required to do store motion towards
4493     the exit. In order for this to be effective, gcse also needed to
4494     be taught how to move a load when it is kill only by a store to itself.
4495
4496             int i;
4497             float a[10];
4498
4499             void foo(float scale)
4500             {
4501               for (i=0; i<10; i++)
4502                 a[i] *= scale;
4503             }
4504
4505     'i' is both loaded and stored to in the loop. Normally, gcse cannot move
4506     the load out since its live around the loop, and stored at the bottom
4507     of the loop.
4508
4509       The 'Load Motion' referred to and implemented in this file is
4510     an enhancement to gcse which when using edge based lcm, recognizes
4511     this situation and allows gcse to move the load out of the loop.
4512
4513       Once gcse has hoisted the load, store motion can then push this
4514     load towards the exit, and we end up with no loads or stores of 'i'
4515     in the loop.  */
4516
4517 static hashval_t
4518 pre_ldst_expr_hash (const void *p)
4519 {
4520   int do_not_record_p = 0;
4521   const struct ls_expr *const x = (const struct ls_expr *) p;
4522   return hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
4523 }
4524
4525 static int
4526 pre_ldst_expr_eq (const void *p1, const void *p2)
4527 {
4528   const struct ls_expr *const ptr1 = (const struct ls_expr *) p1,
4529     *const ptr2 = (const struct ls_expr *) p2;
4530   return expr_equiv_p (ptr1->pattern, ptr2->pattern);
4531 }
4532
4533 /* This will search the ldst list for a matching expression. If it
4534    doesn't find one, we create one and initialize it.  */
4535
4536 static struct ls_expr *
4537 ldst_entry (rtx x)
4538 {
4539   int do_not_record_p = 0;
4540   struct ls_expr * ptr;
4541   unsigned int hash;
4542   void **slot;
4543   struct ls_expr e;
4544
4545   hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
4546                    NULL,  /*have_reg_qty=*/false);
4547
4548   e.pattern = x;
4549   slot = htab_find_slot_with_hash (pre_ldst_table, &e, hash, INSERT);
4550   if (*slot)
4551     return (struct ls_expr *)*slot;
4552
4553   ptr = XNEW (struct ls_expr);
4554
4555   ptr->next         = pre_ldst_mems;
4556   ptr->expr         = NULL;
4557   ptr->pattern      = x;
4558   ptr->pattern_regs = NULL_RTX;
4559   ptr->loads        = NULL_RTX;
4560   ptr->stores       = NULL_RTX;
4561   ptr->reaching_reg = NULL_RTX;
4562   ptr->invalid      = 0;
4563   ptr->index        = 0;
4564   ptr->hash_index   = hash;
4565   pre_ldst_mems     = ptr;
4566   *slot = ptr;
4567
4568   return ptr;
4569 }
4570
4571 /* Free up an individual ldst entry.  */
4572
4573 static void
4574 free_ldst_entry (struct ls_expr * ptr)
4575 {
4576   free_INSN_LIST_list (& ptr->loads);
4577   free_INSN_LIST_list (& ptr->stores);
4578
4579   free (ptr);
4580 }
4581
4582 /* Free up all memory associated with the ldst list.  */
4583
4584 static void
4585 free_ldst_mems (void)
4586 {
4587   if (pre_ldst_table)
4588     htab_delete (pre_ldst_table);
4589   pre_ldst_table = NULL;
4590
4591   while (pre_ldst_mems)
4592     {
4593       struct ls_expr * tmp = pre_ldst_mems;
4594
4595       pre_ldst_mems = pre_ldst_mems->next;
4596
4597       free_ldst_entry (tmp);
4598     }
4599
4600   pre_ldst_mems = NULL;
4601 }
4602
4603 /* Dump debugging info about the ldst list.  */
4604
4605 static void
4606 print_ldst_list (FILE * file)
4607 {
4608   struct ls_expr * ptr;
4609
4610   fprintf (file, "LDST list: \n");
4611
4612   for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
4613     {
4614       fprintf (file, "  Pattern (%3d): ", ptr->index);
4615
4616       print_rtl (file, ptr->pattern);
4617
4618       fprintf (file, "\n         Loads : ");
4619
4620       if (ptr->loads)
4621         print_rtl (file, ptr->loads);
4622       else
4623         fprintf (file, "(nil)");
4624
4625       fprintf (file, "\n        Stores : ");
4626
4627       if (ptr->stores)
4628         print_rtl (file, ptr->stores);
4629       else
4630         fprintf (file, "(nil)");
4631
4632       fprintf (file, "\n\n");
4633     }
4634
4635   fprintf (file, "\n");
4636 }
4637
4638 /* Returns 1 if X is in the list of ldst only expressions.  */
4639
4640 static struct ls_expr *
4641 find_rtx_in_ldst (rtx x)
4642 {
4643   struct ls_expr e;
4644   void **slot;
4645   if (!pre_ldst_table)
4646     return NULL;
4647   e.pattern = x;
4648   slot = htab_find_slot (pre_ldst_table, &e, NO_INSERT);
4649   if (!slot || ((struct ls_expr *)*slot)->invalid)
4650     return NULL;
4651   return (struct ls_expr *) *slot;
4652 }
4653
4654 /* Return first item in the list.  */
4655
4656 static inline struct ls_expr *
4657 first_ls_expr (void)
4658 {
4659   return pre_ldst_mems;
4660 }
4661
4662 /* Return the next item in the list after the specified one.  */
4663
4664 static inline struct ls_expr *
4665 next_ls_expr (struct ls_expr * ptr)
4666 {
4667   return ptr->next;
4668 }
4669 \f
4670 /* Load Motion for loads which only kill themselves.  */
4671
4672 /* Return true if x is a simple MEM operation, with no registers or
4673    side effects. These are the types of loads we consider for the
4674    ld_motion list, otherwise we let the usual aliasing take care of it.  */
4675
4676 static int
4677 simple_mem (const_rtx x)
4678 {
4679   if (! MEM_P (x))
4680     return 0;
4681
4682   if (MEM_VOLATILE_P (x))
4683     return 0;
4684
4685   if (GET_MODE (x) == BLKmode)
4686     return 0;
4687
4688   /* If we are handling exceptions, we must be careful with memory references
4689      that may trap. If we are not, the behavior is undefined, so we may just
4690      continue.  */
4691   if (flag_non_call_exceptions && may_trap_p (x))
4692     return 0;
4693
4694   if (side_effects_p (x))
4695     return 0;
4696
4697   /* Do not consider function arguments passed on stack.  */
4698   if (reg_mentioned_p (stack_pointer_rtx, x))
4699     return 0;
4700
4701   if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
4702     return 0;
4703
4704   return 1;
4705 }
4706
4707 /* Make sure there isn't a buried reference in this pattern anywhere.
4708    If there is, invalidate the entry for it since we're not capable
4709    of fixing it up just yet.. We have to be sure we know about ALL
4710    loads since the aliasing code will allow all entries in the
4711    ld_motion list to not-alias itself.  If we miss a load, we will get
4712    the wrong value since gcse might common it and we won't know to
4713    fix it up.  */
4714
4715 static void
4716 invalidate_any_buried_refs (rtx x)
4717 {
4718   const char * fmt;
4719   int i, j;
4720   struct ls_expr * ptr;
4721
4722   /* Invalidate it in the list.  */
4723   if (MEM_P (x) && simple_mem (x))
4724     {
4725       ptr = ldst_entry (x);
4726       ptr->invalid = 1;
4727     }
4728
4729   /* Recursively process the insn.  */
4730   fmt = GET_RTX_FORMAT (GET_CODE (x));
4731
4732   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4733     {
4734       if (fmt[i] == 'e')
4735         invalidate_any_buried_refs (XEXP (x, i));
4736       else if (fmt[i] == 'E')
4737         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4738           invalidate_any_buried_refs (XVECEXP (x, i, j));
4739     }
4740 }
4741
4742 /* Find all the 'simple' MEMs which are used in LOADs and STORES.  Simple
4743    being defined as MEM loads and stores to symbols, with no side effects
4744    and no registers in the expression.  For a MEM destination, we also
4745    check that the insn is still valid if we replace the destination with a
4746    REG, as is done in update_ld_motion_stores.  If there are any uses/defs
4747    which don't match this criteria, they are invalidated and trimmed out
4748    later.  */
4749
4750 static void
4751 compute_ld_motion_mems (void)
4752 {
4753   struct ls_expr * ptr;
4754   basic_block bb;
4755   rtx insn;
4756
4757   pre_ldst_mems = NULL;
4758   pre_ldst_table = htab_create (13, pre_ldst_expr_hash,
4759                                 pre_ldst_expr_eq, NULL);
4760
4761   FOR_EACH_BB (bb)
4762     {
4763       FOR_BB_INSNS (bb, insn)
4764         {
4765           if (NONDEBUG_INSN_P (insn))
4766             {
4767               if (GET_CODE (PATTERN (insn)) == SET)
4768                 {
4769                   rtx src = SET_SRC (PATTERN (insn));
4770                   rtx dest = SET_DEST (PATTERN (insn));
4771
4772                   /* Check for a simple LOAD...  */
4773                   if (MEM_P (src) && simple_mem (src))
4774                     {
4775                       ptr = ldst_entry (src);
4776                       if (REG_P (dest))
4777                         ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
4778                       else
4779                         ptr->invalid = 1;
4780                     }
4781                   else
4782                     {
4783                       /* Make sure there isn't a buried load somewhere.  */
4784                       invalidate_any_buried_refs (src);
4785                     }
4786
4787                   /* Check for stores. Don't worry about aliased ones, they
4788                      will block any movement we might do later. We only care
4789                      about this exact pattern since those are the only
4790                      circumstance that we will ignore the aliasing info.  */
4791                   if (MEM_P (dest) && simple_mem (dest))
4792                     {
4793                       ptr = ldst_entry (dest);
4794
4795                       if (! MEM_P (src)
4796                           && GET_CODE (src) != ASM_OPERANDS
4797                           /* Check for REG manually since want_to_gcse_p
4798                              returns 0 for all REGs.  */
4799                           && can_assign_to_reg_without_clobbers_p (src))
4800                         ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
4801                       else
4802                         ptr->invalid = 1;
4803                     }
4804                 }
4805               else
4806                 invalidate_any_buried_refs (PATTERN (insn));
4807             }
4808         }
4809     }
4810 }
4811
4812 /* Remove any references that have been either invalidated or are not in the
4813    expression list for pre gcse.  */
4814
4815 static void
4816 trim_ld_motion_mems (void)
4817 {
4818   struct ls_expr * * last = & pre_ldst_mems;
4819   struct ls_expr * ptr = pre_ldst_mems;
4820
4821   while (ptr != NULL)
4822     {
4823       struct expr * expr;
4824
4825       /* Delete if entry has been made invalid.  */
4826       if (! ptr->invalid)
4827         {
4828           /* Delete if we cannot find this mem in the expression list.  */
4829           unsigned int hash = ptr->hash_index % expr_hash_table.size;
4830
4831           for (expr = expr_hash_table.table[hash];
4832                expr != NULL;
4833                expr = expr->next_same_hash)
4834             if (expr_equiv_p (expr->expr, ptr->pattern))
4835               break;
4836         }
4837       else
4838         expr = (struct expr *) 0;
4839
4840       if (expr)
4841         {
4842           /* Set the expression field if we are keeping it.  */
4843           ptr->expr = expr;
4844           last = & ptr->next;
4845           ptr = ptr->next;
4846         }
4847       else
4848         {
4849           *last = ptr->next;
4850           htab_remove_elt_with_hash (pre_ldst_table, ptr, ptr->hash_index);
4851           free_ldst_entry (ptr);
4852           ptr = * last;
4853         }
4854     }
4855
4856   /* Show the world what we've found.  */
4857   if (dump_file && pre_ldst_mems != NULL)
4858     print_ldst_list (dump_file);
4859 }
4860
4861 /* This routine will take an expression which we are replacing with
4862    a reaching register, and update any stores that are needed if
4863    that expression is in the ld_motion list.  Stores are updated by
4864    copying their SRC to the reaching register, and then storing
4865    the reaching register into the store location. These keeps the
4866    correct value in the reaching register for the loads.  */
4867
4868 static void
4869 update_ld_motion_stores (struct expr * expr)
4870 {
4871   struct ls_expr * mem_ptr;
4872
4873   if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
4874     {
4875       /* We can try to find just the REACHED stores, but is shouldn't
4876          matter to set the reaching reg everywhere...  some might be
4877          dead and should be eliminated later.  */
4878
4879       /* We replace (set mem expr) with (set reg expr) (set mem reg)
4880          where reg is the reaching reg used in the load.  We checked in
4881          compute_ld_motion_mems that we can replace (set mem expr) with
4882          (set reg expr) in that insn.  */
4883       rtx list = mem_ptr->stores;
4884
4885       for ( ; list != NULL_RTX; list = XEXP (list, 1))
4886         {
4887           rtx insn = XEXP (list, 0);
4888           rtx pat = PATTERN (insn);
4889           rtx src = SET_SRC (pat);
4890           rtx reg = expr->reaching_reg;
4891           rtx copy;
4892
4893           /* If we've already copied it, continue.  */
4894           if (expr->reaching_reg == src)
4895             continue;
4896
4897           if (dump_file)
4898             {
4899               fprintf (dump_file, "PRE:  store updated with reaching reg ");
4900               print_rtl (dump_file, expr->reaching_reg);
4901               fprintf (dump_file, ":\n  ");
4902               print_inline_rtx (dump_file, insn, 8);
4903               fprintf (dump_file, "\n");
4904             }
4905
4906           copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
4907           emit_insn_before (copy, insn);
4908           SET_SRC (pat) = reg;
4909           df_insn_rescan (insn);
4910
4911           /* un-recognize this pattern since it's probably different now.  */
4912           INSN_CODE (insn) = -1;
4913           gcse_create_count++;
4914         }
4915     }
4916 }
4917 \f
4918 /* Return true if the graph is too expensive to optimize. PASS is the
4919    optimization about to be performed.  */
4920
4921 static bool
4922 is_too_expensive (const char *pass)
4923 {
4924   /* Trying to perform global optimizations on flow graphs which have
4925      a high connectivity will take a long time and is unlikely to be
4926      particularly useful.
4927
4928      In normal circumstances a cfg should have about twice as many
4929      edges as blocks.  But we do not want to punish small functions
4930      which have a couple switch statements.  Rather than simply
4931      threshold the number of blocks, uses something with a more
4932      graceful degradation.  */
4933   if (n_edges > 20000 + n_basic_blocks * 4)
4934     {
4935       warning (OPT_Wdisabled_optimization,
4936                "%s: %d basic blocks and %d edges/basic block",
4937                pass, n_basic_blocks, n_edges / n_basic_blocks);
4938
4939       return true;
4940     }
4941
4942   /* If allocating memory for the cprop bitmap would take up too much
4943      storage it's better just to disable the optimization.  */
4944   if ((n_basic_blocks
4945        * SBITMAP_SET_SIZE (max_reg_num ())
4946        * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
4947     {
4948       warning (OPT_Wdisabled_optimization,
4949                "%s: %d basic blocks and %d registers",
4950                pass, n_basic_blocks, max_reg_num ());
4951
4952       return true;
4953     }
4954
4955   return false;
4956 }
4957
4958 \f
4959 /* Main function for the CPROP pass.  */
4960
4961 static int
4962 one_cprop_pass (void)
4963 {
4964   int changed = 0;
4965
4966   /* Return if there's nothing to do, or it is too expensive.  */
4967   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
4968       || is_too_expensive (_ ("const/copy propagation disabled")))
4969     return 0;
4970
4971   global_const_prop_count = local_const_prop_count = 0;
4972   global_copy_prop_count = local_copy_prop_count = 0;
4973
4974   bytes_used = 0;
4975   gcc_obstack_init (&gcse_obstack);
4976   alloc_gcse_mem ();
4977
4978   /* Do a local const/copy propagation pass first.  The global pass
4979      only handles global opportunities.
4980      If the local pass changes something, remove any unreachable blocks
4981      because the CPROP global dataflow analysis may get into infinite
4982      loops for CFGs with unreachable blocks.
4983
4984      FIXME: This local pass should not be necessary after CSE (but for
4985             some reason it still is).  It is also (proven) not necessary
4986             to run the local pass right after FWPWOP.
4987
4988      FIXME: The global analysis would not get into infinite loops if it
4989             would use the DF solver (via df_simple_dataflow) instead of
4990             the solver implemented in this file.  */
4991   if (local_cprop_pass ())
4992     {
4993       delete_unreachable_blocks ();
4994       df_analyze ();
4995     }
4996
4997   /* Determine implicit sets.  */
4998   implicit_sets = XCNEWVEC (rtx, last_basic_block);
4999   find_implicit_sets ();
5000
5001   alloc_hash_table (&set_hash_table, 1);
5002   compute_hash_table (&set_hash_table);
5003
5004   /* Free implicit_sets before peak usage.  */
5005   free (implicit_sets);
5006   implicit_sets = NULL;
5007
5008   if (dump_file)
5009     dump_hash_table (dump_file, "SET", &set_hash_table);
5010   if (set_hash_table.n_elems > 0)
5011     {
5012       basic_block bb;
5013       rtx insn;
5014
5015       alloc_cprop_mem (last_basic_block, set_hash_table.n_elems);
5016       compute_cprop_data ();
5017
5018       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR->next_bb->next_bb, EXIT_BLOCK_PTR, next_bb)
5019         {
5020           /* Reset tables used to keep track of what's still valid [since
5021              the start of the block].  */
5022           reset_opr_set_tables ();
5023
5024           FOR_BB_INSNS (bb, insn)
5025             if (INSN_P (insn))
5026               {
5027                 changed |= cprop_insn (insn);
5028
5029                 /* Keep track of everything modified by this insn.  */
5030                 /* ??? Need to be careful w.r.t. mods done to INSN.
5031                        Don't call mark_oprs_set if we turned the
5032                        insn into a NOTE.  */
5033                 if (! NOTE_P (insn))
5034                   mark_oprs_set (insn);
5035               }
5036         }
5037
5038       changed |= bypass_conditional_jumps ();
5039       free_cprop_mem ();
5040     }
5041
5042   free_hash_table (&set_hash_table);
5043   free_gcse_mem ();
5044   obstack_free (&gcse_obstack, NULL);
5045
5046   if (dump_file)
5047     {
5048       fprintf (dump_file, "CPROP of %s, %d basic blocks, %d bytes needed, ",
5049                current_function_name (), n_basic_blocks, bytes_used);
5050       fprintf (dump_file, "%d local const props, %d local copy props, ",
5051                local_const_prop_count, local_copy_prop_count);
5052       fprintf (dump_file, "%d global const props, %d global copy props\n\n",
5053                global_const_prop_count, global_copy_prop_count);
5054     }
5055
5056   return changed;
5057 }
5058
5059 \f
5060 /* All the passes implemented in this file.  Each pass has its
5061    own gate and execute function, and at the end of the file a
5062    pass definition for passes.c.
5063
5064    We do not construct an accurate cfg in functions which call
5065    setjmp, so none of these passes runs if the function calls
5066    setjmp.
5067    FIXME: Should just handle setjmp via REG_SETJMP notes.  */
5068
5069 static bool
5070 gate_rtl_cprop (void)
5071 {
5072   return optimize > 0 && flag_gcse
5073     && !cfun->calls_setjmp
5074     && dbg_cnt (cprop);
5075 }
5076
5077 static unsigned int
5078 execute_rtl_cprop (void)
5079 {
5080   delete_unreachable_blocks ();
5081   df_set_flags (DF_LR_RUN_DCE);
5082   df_analyze ();
5083   flag_rerun_cse_after_global_opts |= one_cprop_pass ();
5084   return 0;
5085 }
5086
5087 static bool
5088 gate_rtl_pre (void)
5089 {
5090   return optimize > 0 && flag_gcse
5091     && !cfun->calls_setjmp
5092     && optimize_function_for_speed_p (cfun)
5093     && dbg_cnt (pre);
5094 }
5095
5096 static unsigned int
5097 execute_rtl_pre (void)
5098 {
5099   delete_unreachable_blocks ();
5100   df_analyze ();
5101   flag_rerun_cse_after_global_opts |= one_pre_gcse_pass ();
5102   return 0;
5103 }
5104
5105 static bool
5106 gate_rtl_hoist (void)
5107 {
5108   return optimize > 0 && flag_gcse
5109     && !cfun->calls_setjmp
5110     /* It does not make sense to run code hoisting unless we are optimizing
5111        for code size -- it rarely makes programs faster, and can make then
5112        bigger if we did PRE (when optimizing for space, we don't run PRE).  */
5113     && optimize_function_for_size_p (cfun)
5114     && dbg_cnt (hoist);
5115 }
5116
5117 static unsigned int
5118 execute_rtl_hoist (void)
5119 {
5120   delete_unreachable_blocks ();
5121   df_analyze ();
5122   flag_rerun_cse_after_global_opts |= one_code_hoisting_pass ();
5123   return 0;
5124 }
5125
5126 struct rtl_opt_pass pass_rtl_cprop =
5127 {
5128  {
5129   RTL_PASS,
5130   "cprop",                              /* name */
5131   gate_rtl_cprop,                       /* gate */
5132   execute_rtl_cprop,                    /* execute */
5133   NULL,                                 /* sub */
5134   NULL,                                 /* next */
5135   0,                                    /* static_pass_number */
5136   TV_CPROP,                             /* tv_id */
5137   PROP_cfglayout,                       /* properties_required */
5138   0,                                    /* properties_provided */
5139   0,                                    /* properties_destroyed */
5140   0,                                    /* todo_flags_start */
5141   TODO_df_finish | TODO_verify_rtl_sharing |
5142   TODO_dump_func |
5143   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
5144  }
5145 };
5146
5147 struct rtl_opt_pass pass_rtl_pre =
5148 {
5149  {
5150   RTL_PASS,
5151   "rtl pre",                            /* name */
5152   gate_rtl_pre,                         /* gate */
5153   execute_rtl_pre,                      /* execute */
5154   NULL,                                 /* sub */
5155   NULL,                                 /* next */
5156   0,                                    /* static_pass_number */
5157   TV_PRE,                               /* tv_id */
5158   PROP_cfglayout,                       /* properties_required */
5159   0,                                    /* properties_provided */
5160   0,                                    /* properties_destroyed */
5161   0,                                    /* todo_flags_start */
5162   TODO_df_finish | TODO_verify_rtl_sharing |
5163   TODO_dump_func |
5164   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
5165  }
5166 };
5167
5168 struct rtl_opt_pass pass_rtl_hoist =
5169 {
5170  {
5171   RTL_PASS,
5172   "hoist",                              /* name */
5173   gate_rtl_hoist,                       /* gate */
5174   execute_rtl_hoist,                    /* execute */
5175   NULL,                                 /* sub */
5176   NULL,                                 /* next */
5177   0,                                    /* static_pass_number */
5178   TV_HOIST,                             /* tv_id */
5179   PROP_cfglayout,                       /* properties_required */
5180   0,                                    /* properties_provided */
5181   0,                                    /* properties_destroyed */
5182   0,                                    /* todo_flags_start */
5183   TODO_df_finish | TODO_verify_rtl_sharing |
5184   TODO_dump_func |
5185   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
5186  }
5187 };
5188
5189 #include "gt-gcse.h"