gcc/gcse.c

   1 /* Global common subexpression elimination/Partial redundancy elimination
   2    and global constant/copy propagation for GNU compiler.
   3    Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
   4    2006, 2007, 2008, 2009 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 /* TODO
  23    - reordering of memory allocation and freeing to be more space efficient
  24    - do rough calc of how many regs are needed in each block, and a rough
  25      calc of how many regs are available in each class and use that to
  26      throttle back the code in cases where RTX_COST is minimal.
  27    - a store to the same address as a load does not kill the load if the
  28      source of the store is also the destination of the load.  Handling this
  29      allows more load motion, particularly out of loops.
  30
  31 */
  32
  33 /* References searched while implementing this.
  34
  35    Compilers Principles, Techniques and Tools
  36    Aho, Sethi, Ullman
  37    Addison-Wesley, 1988
  38
  39    Global Optimization by Suppression of Partial Redundancies
  40    E. Morel, C. Renvoise
  41    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  42
  43    A Portable Machine-Independent Global Optimizer - Design and Measurements
  44    Frederick Chow
  45    Stanford Ph.D. thesis, Dec. 1983
  46
  47    A Fast Algorithm for Code Movement Optimization
  48    D.M. Dhamdhere
  49    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  50
  51    A Solution to a Problem with Morel and Renvoise's
  52    Global Optimization by Suppression of Partial Redundancies
  53    K-H Drechsler, M.P. Stadel
  54    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  55
  56    Practical Adaptation of the Global Optimization
  57    Algorithm of Morel and Renvoise
  58    D.M. Dhamdhere
  59    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  60
  61    Efficiently Computing Static Single Assignment Form and the Control
  62    Dependence Graph
  63    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  64    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  65
  66    Lazy Code Motion
  67    J. Knoop, O. Ruthing, B. Steffen
  68    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  69
  70    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  71    Time for Reducible Flow Control
  72    Thomas Ball
  73    ACM Letters on Programming Languages and Systems,
  74    Vol. 2, Num. 1-4, Mar-Dec 1993
  75
  76    An Efficient Representation for Sparse Sets
  77    Preston Briggs, Linda Torczon
  78    ACM Letters on Programming Languages and Systems,
  79    Vol. 2, Num. 1-4, Mar-Dec 1993
  80
  81    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  82    K-H Drechsler, M.P. Stadel
  83    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  84
  85    Partial Dead Code Elimination
  86    J. Knoop, O. Ruthing, B. Steffen
  87    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  88
  89    Effective Partial Redundancy Elimination
  90    P. Briggs, K.D. Cooper
  91    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  92
  93    The Program Structure Tree: Computing Control Regions in Linear Time
  94    R. Johnson, D. Pearson, K. Pingali
  95    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  96
  97    Optimal Code Motion: Theory and Practice
  98    J. Knoop, O. Ruthing, B. Steffen
  99    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
 100
 101    The power of assignment motion
 102    J. Knoop, O. Ruthing, B. Steffen
 103    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 104
 105    Global code motion / global value numbering
 106    C. Click
 107    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 108
 109    Value Driven Redundancy Elimination
 110    L.T. Simpson
 111    Rice University Ph.D. thesis, Apr. 1996
 112
 113    Value Numbering
 114    L.T. Simpson
 115    Massively Scalar Compiler Project, Rice University, Sep. 1996
 116
 117    High Performance Compilers for Parallel Computing
 118    Michael Wolfe
 119    Addison-Wesley, 1996
 120
 121    Advanced Compiler Design and Implementation
 122    Steven Muchnick
 123    Morgan Kaufmann, 1997
 124
 125    Building an Optimizing Compiler
 126    Robert Morgan
 127    Digital Press, 1998
 128
 129    People wishing to speed up the code here should read:
 130      Elimination Algorithms for Data Flow Analysis
 131      B.G. Ryder, M.C. Paull
 132      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 133
 134      How to Analyze Large Programs Efficiently and Informatively
 135      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 136      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 137
 138    People wishing to do something different can find various possibilities
 139    in the above papers and elsewhere.
 140 */
 141
 142 #include "config.h"
 143 #include "system.h"
 144 #include "coretypes.h"
 145 #include "tm.h"
 146 #include "toplev.h"
 147
 148 #include "rtl.h"
 149 #include "tree.h"
 150 #include "tm_p.h"
 151 #include "regs.h"
 152 #include "hard-reg-set.h"
 153 #include "flags.h"
 154 #include "real.h"
 155 #include "insn-config.h"
 156 #include "recog.h"
 157 #include "basic-block.h"
 158 #include "output.h"
 159 #include "function.h"
 160 #include "expr.h"
 161 #include "except.h"
 162 #include "ggc.h"
 163 #include "params.h"
 164 #include "cselib.h"
 165 #include "intl.h"
 166 #include "obstack.h"
 167 #include "timevar.h"
 168 #include "tree-pass.h"
 169 #include "hashtab.h"
 170 #include "df.h"
 171 #include "dbgcnt.h"
 172 #include "target.h"
 173
 174 /* Propagate flow information through back edges and thus enable PRE's
 175    moving loop invariant calculations out of loops.
 176
 177    Originally this tended to create worse overall code, but several
 178    improvements during the development of PRE seem to have made following
 179    back edges generally a win.
 180
 181    Note much of the loop invariant code motion done here would normally
 182    be done by loop.c, which has more heuristics for when to move invariants
 183    out of loops.  At some point we might need to move some of those
 184    heuristics into gcse.c.  */
 185
 186 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 187    are a superset of those done by GCSE.
 188
 189    We perform the following steps:
 190
 191    1) Compute table of places where registers are set.
 192
 193    2) Perform copy/constant propagation.
 194
 195    3) Perform global cse using lazy code motion if not optimizing
 196       for size, or code hoisting if we are.
 197
 198    4) Perform another pass of copy/constant propagation.  Try to bypass
 199       conditional jumps if the condition can be computed from a value of
 200       an incoming edge.
 201
 202    5) Perform store motion.
 203
 204    Two passes of copy/constant propagation are done because the first one
 205    enables more GCSE and the second one helps to clean up the copies that
 206    GCSE creates.  This is needed more for PRE than for Classic because Classic
 207    GCSE will try to use an existing register containing the common
 208    subexpression rather than create a new one.  This is harder to do for PRE
 209    because of the code motion (which Classic GCSE doesn't do).
 210
 211    Expressions we are interested in GCSE-ing are of the form
 212    (set (pseudo-reg) (expression)).
 213    Function want_to_gcse_p says what these are.
 214
 215    In addition, expressions in REG_EQUAL notes are candidates for GXSE-ing.
 216    This allows PRE to hoist expressions that are expressed in multiple insns,
 217    such as comprex address calculations (e.g. for PIC code, or loads with a
 218    high part and as lowe part).
 219
 220    PRE handles moving invariant expressions out of loops (by treating them as
 221    partially redundant).
 222
 223    Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
 224    assignment) based GVN (global value numbering).  L. T. Simpson's paper
 225    (Rice University) on value numbering is a useful reference for this.
 226
 227    **********************
 228
 229    We used to support multiple passes but there are diminishing returns in
 230    doing so.  The first pass usually makes 90% of the changes that are doable.
 231    A second pass can make a few more changes made possible by the first pass.
 232    Experiments show any further passes don't make enough changes to justify
 233    the expense.
 234
 235    A study of spec92 using an unlimited number of passes:
 236    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 237    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 238    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 239
 240    It was found doing copy propagation between each pass enables further
 241    substitutions.
 242
 243    This study was done before expressions in REG_EQUAL notes were added as
 244    candidate expressions for optimization, and before the GIMPLE optimizers
 245    were added.  Probably, multiple passes is even less efficient now than
 246    at the time when the study was conducted.
 247
 248    PRE is quite expensive in complicated functions because the DFA can take
 249    a while to converge.  Hence we only perform one pass.
 250
 251    **********************
 252
 253    The steps for PRE are:
 254
 255    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 256
 257    2) Perform the data flow analysis for PRE.
 258
 259    3) Delete the redundant instructions
 260
 261    4) Insert the required copies [if any] that make the partially
 262       redundant instructions fully redundant.
 263
 264    5) For other reaching expressions, insert an instruction to copy the value
 265       to a newly created pseudo that will reach the redundant instruction.
 266
 267    The deletion is done first so that when we do insertions we
 268    know which pseudo reg to use.
 269
 270    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 271    argue it is not.  The number of iterations for the algorithm to converge
 272    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 273
 274    PRE GCSE depends heavily on the second CSE pass to clean up the copies
 275    we create.  To make an expression reach the place where it's redundant,
 276    the result of the expression is copied to a new register, and the redundant
 277    expression is deleted by replacing it with this new register.  Classic GCSE
 278    doesn't have this problem as much as it computes the reaching defs of
 279    each register in each block and thus can try to use an existing
 280    register.  */
 281 \f
 282 /* GCSE global vars.  */
 283
 284 /* Set to non-zero if CSE should run after all GCSE optimizations are done.  */
 285 int flag_rerun_cse_after_global_opts;
 286
 287 /* An obstack for our working variables.  */
 288 static struct obstack gcse_obstack;
 289
 290 struct reg_use {rtx reg_rtx; };
 291
 292 /* Hash table of expressions.  */
 293
 294 struct expr
 295 {
 296   /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
 297   rtx expr;
 298   /* Index in the available expression bitmaps.  */
 299   int bitmap_index;
 300   /* Next entry with the same hash.  */
 301   struct expr *next_same_hash;
 302   /* List of anticipatable occurrences in basic blocks in the function.
 303      An "anticipatable occurrence" is one that is the first occurrence in the
 304      basic block, the operands are not modified in the basic block prior
 305      to the occurrence and the output is not used between the start of
 306      the block and the occurrence.  */
 307   struct occr *antic_occr;
 308   /* List of available occurrence in basic blocks in the function.
 309      An "available occurrence" is one that is the last occurrence in the
 310      basic block and the operands are not modified by following statements in
 311      the basic block [including this insn].  */
 312   struct occr *avail_occr;
 313   /* Non-null if the computation is PRE redundant.
 314      The value is the newly created pseudo-reg to record a copy of the
 315      expression in all the places that reach the redundant copy.  */
 316   rtx reaching_reg;
 317 };
 318
 319 /* Occurrence of an expression.
 320    There is one per basic block.  If a pattern appears more than once the
 321    last appearance is used [or first for anticipatable expressions].  */
 322
 323 struct occr
 324 {
 325   /* Next occurrence of this expression.  */
 326   struct occr *next;
 327   /* The insn that computes the expression.  */
 328   rtx insn;
 329   /* Nonzero if this [anticipatable] occurrence has been deleted.  */
 330   char deleted_p;
 331   /* Nonzero if this [available] occurrence has been copied to
 332      reaching_reg.  */
 333   /* ??? This is mutually exclusive with deleted_p, so they could share
 334      the same byte.  */
 335   char copied_p;
 336 };
 337
 338 /* Expression and copy propagation hash tables.
 339    Each hash table is an array of buckets.
 340    ??? It is known that if it were an array of entries, structure elements
 341    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 342    not clear whether in the final analysis a sufficient amount of memory would
 343    be saved as the size of the available expression bitmaps would be larger
 344    [one could build a mapping table without holes afterwards though].
 345    Someday I'll perform the computation and figure it out.  */
 346
 347 struct hash_table_d
 348 {
 349   /* The table itself.
 350      This is an array of `expr_hash_table_size' elements.  */
 351   struct expr **table;
 352
 353   /* Size of the hash table, in elements.  */
 354   unsigned int size;
 355
 356   /* Number of hash table elements.  */
 357   unsigned int n_elems;
 358
 359   /* Whether the table is expression of copy propagation one.  */
 360   int set_p;
 361 };
 362
 363 /* Expression hash table.  */
 364 static struct hash_table_d expr_hash_table;
 365
 366 /* Copy propagation hash table.  */
 367 static struct hash_table_d set_hash_table;
 368
 369 /* This is a list of expressions which are MEMs and will be used by load
 370    or store motion.
 371    Load motion tracks MEMs which aren't killed by
 372    anything except itself. (i.e., loads and stores to a single location).
 373    We can then allow movement of these MEM refs with a little special
 374    allowance. (all stores copy the same value to the reaching reg used
 375    for the loads).  This means all values used to store into memory must have
 376    no side effects so we can re-issue the setter value.
 377    Store Motion uses this structure as an expression table to track stores
 378    which look interesting, and might be moveable towards the exit block.  */
 379
 380 struct ls_expr
 381 {
 382   struct expr * expr;           /* Gcse expression reference for LM.  */
 383   rtx pattern;                  /* Pattern of this mem.  */
 384   rtx pattern_regs;             /* List of registers mentioned by the mem.  */
 385   rtx loads;                    /* INSN list of loads seen.  */
 386   rtx stores;                   /* INSN list of stores seen.  */
 387   struct ls_expr * next;        /* Next in the list.  */
 388   int invalid;                  /* Invalid for some reason.  */
 389   int index;                    /* If it maps to a bitmap index.  */
 390   unsigned int hash_index;      /* Index when in a hash table.  */
 391   rtx reaching_reg;             /* Register to use when re-writing.  */
 392 };
 393
 394 /* Array of implicit set patterns indexed by basic block index.  */
 395 static rtx *implicit_sets;
 396
 397 /* Head of the list of load/store memory refs.  */
 398 static struct ls_expr * pre_ldst_mems = NULL;
 399
 400 /* Hashtable for the load/store memory refs.  */
 401 static htab_t pre_ldst_table = NULL;
 402
 403 /* Bitmap containing one bit for each register in the program.
 404    Used when performing GCSE to track which registers have been set since
 405    the start of the basic block.  */
 406 static regset reg_set_bitmap;
 407
 408 /* Array, indexed by basic block number for a list of insns which modify
 409    memory within that block.  */
 410 static rtx * modify_mem_list;
 411 static bitmap modify_mem_list_set;
 412
 413 /* This array parallels modify_mem_list, but is kept canonicalized.  */
 414 static rtx * canon_modify_mem_list;
 415
 416 /* Bitmap indexed by block numbers to record which blocks contain
 417    function calls.  */
 418 static bitmap blocks_with_calls;
 419
 420 /* Various variables for statistics gathering.  */
 421
 422 /* Memory used in a pass.
 423    This isn't intended to be absolutely precise.  Its intent is only
 424    to keep an eye on memory usage.  */
 425 static int bytes_used;
 426
 427 /* GCSE substitutions made.  */
 428 static int gcse_subst_count;
 429 /* Number of copy instructions created.  */
 430 static int gcse_create_count;
 431 /* Number of local constants propagated.  */
 432 static int local_const_prop_count;
 433 /* Number of local copies propagated.  */
 434 static int local_copy_prop_count;
 435 /* Number of global constants propagated.  */
 436 static int global_const_prop_count;
 437 /* Number of global copies propagated.  */
 438 static int global_copy_prop_count;
 439 \f
 440 /* For available exprs */
 441 static sbitmap *ae_kill;
 442 \f
 443 static void compute_can_copy (void);
 444 static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
 445 static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
 446 static void *gcse_alloc (unsigned long);
 447 static void alloc_gcse_mem (void);
 448 static void free_gcse_mem (void);
 449 static void hash_scan_insn (rtx, struct hash_table_d *);
 450 static void hash_scan_set (rtx, rtx, struct hash_table_d *);
 451 static void hash_scan_clobber (rtx, rtx, struct hash_table_d *);
 452 static void hash_scan_call (rtx, rtx, struct hash_table_d *);
 453 static int want_to_gcse_p (rtx);
 454 static bool gcse_constant_p (const_rtx);
 455 static int oprs_unchanged_p (const_rtx, const_rtx, int);
 456 static int oprs_anticipatable_p (const_rtx, const_rtx);
 457 static int oprs_available_p (const_rtx, const_rtx);
 458 static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int,
 459                                   struct hash_table_d *);
 460 static void insert_set_in_table (rtx, rtx, struct hash_table_d *);
 461 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
 462 static unsigned int hash_set (int, int);
 463 static int expr_equiv_p (const_rtx, const_rtx);
 464 static void record_last_reg_set_info (rtx, int);
 465 static void record_last_mem_set_info (rtx);
 466 static void record_last_set_info (rtx, const_rtx, void *);
 467 static void compute_hash_table (struct hash_table_d *);
 468 static void alloc_hash_table (int, struct hash_table_d *, int);
 469 static void free_hash_table (struct hash_table_d *);
 470 static void compute_hash_table_work (struct hash_table_d *);
 471 static void dump_hash_table (FILE *, const char *, struct hash_table_d *);
 472 static struct expr *lookup_set (unsigned int, struct hash_table_d *);
 473 static struct expr *next_set (unsigned int, struct expr *);
 474 static void reset_opr_set_tables (void);
 475 static int oprs_not_set_p (const_rtx, const_rtx);
 476 static void mark_call (rtx);
 477 static void mark_set (rtx, rtx);
 478 static void mark_clobber (rtx, rtx);
 479 static void mark_oprs_set (rtx);
 480 static void alloc_cprop_mem (int, int);
 481 static void free_cprop_mem (void);
 482 static void compute_transp (const_rtx, int, sbitmap *, int);
 483 static void compute_transpout (void);
 484 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
 485                                       struct hash_table_d *);
 486 static void compute_cprop_data (void);
 487 static void find_used_regs (rtx *, void *);
 488 static int try_replace_reg (rtx, rtx, rtx);
 489 static struct expr *find_avail_set (int, rtx);
 490 static int cprop_jump (basic_block, rtx, rtx, rtx, rtx);
 491 static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
 492 static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
 493 static void canon_list_insert (rtx, const_rtx, void *);
 494 static int cprop_insn (rtx);
 495 static void find_implicit_sets (void);
 496 static int one_cprop_pass (void);
 497 static bool constprop_register (rtx, rtx, rtx);
 498 static struct expr *find_bypass_set (int, int);
 499 static bool reg_killed_on_edge (const_rtx, const_edge);
 500 static int bypass_block (basic_block, rtx, rtx);
 501 static int bypass_conditional_jumps (void);
 502 static void alloc_pre_mem (int, int);
 503 static void free_pre_mem (void);
 504 static void compute_pre_data (void);
 505 static int pre_expr_reaches_here_p (basic_block, struct expr *,
 506                                     basic_block);
 507 static void insert_insn_end_basic_block (struct expr *, basic_block, int);
 508 static void pre_insert_copy_insn (struct expr *, rtx);
 509 static void pre_insert_copies (void);
 510 static int pre_delete (void);
 511 static int pre_gcse (void);
 512 static int one_pre_gcse_pass (void);
 513 static void add_label_notes (rtx, rtx);
 514 static void alloc_code_hoist_mem (int, int);
 515 static void free_code_hoist_mem (void);
 516 static void compute_code_hoist_vbeinout (void);
 517 static void compute_code_hoist_data (void);
 518 static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *);
 519 static int hoist_code (void);
 520 static int one_code_hoisting_pass (void);
 521 static rtx process_insert_insn (struct expr *);
 522 static int pre_edge_insert (struct edge_list *, struct expr **);
 523 static int pre_expr_reaches_here_p_work (basic_block, struct expr *,
 524                                          basic_block, char *);
 525 static struct ls_expr * ldst_entry (rtx);
 526 static void free_ldst_entry (struct ls_expr *);
 527 static void free_ldst_mems (void);
 528 static void print_ldst_list (FILE *);
 529 static struct ls_expr * find_rtx_in_ldst (rtx);
 530 static inline struct ls_expr * first_ls_expr (void);
 531 static inline struct ls_expr * next_ls_expr (struct ls_expr *);
 532 static int simple_mem (const_rtx);
 533 static void invalidate_any_buried_refs (rtx);
 534 static void compute_ld_motion_mems (void);
 535 static void trim_ld_motion_mems (void);
 536 static void update_ld_motion_stores (struct expr *);
 537 static void free_insn_expr_list_list (rtx *);
 538 static void clear_modify_mem_tables (void);
 539 static void free_modify_mem_tables (void);
 540 static rtx gcse_emit_move_after (rtx, rtx, rtx);
 541 static void local_cprop_find_used_regs (rtx *, void *);
 542 static bool do_local_cprop (rtx, rtx);
 543 static int local_cprop_pass (void);
 544 static bool is_too_expensive (const char *);
 545
 546 #define GNEW(T)                 ((T *) gmalloc (sizeof (T)))
 547 #define GCNEW(T)                ((T *) gcalloc (1, sizeof (T)))
 548
 549 #define GNEWVEC(T, N)           ((T *) gmalloc (sizeof (T) * (N)))
 550 #define GCNEWVEC(T, N)          ((T *) gcalloc ((N), sizeof (T)))
 551
 552 #define GNEWVAR(T, S)           ((T *) gmalloc ((S)))
 553 #define GCNEWVAR(T, S)          ((T *) gcalloc (1, (S)))
 554
 555 #define GOBNEW(T)               ((T *) gcse_alloc (sizeof (T)))
 556 #define GOBNEWVAR(T, S)         ((T *) gcse_alloc ((S)))
 557 \f
 558 /* Misc. utilities.  */
 559
 560 /* Nonzero for each mode that supports (set (reg) (reg)).
 561    This is trivially true for integer and floating point values.
 562    It may or may not be true for condition codes.  */
 563 static char can_copy[(int) NUM_MACHINE_MODES];
 564
 565 /* Compute which modes support reg/reg copy operations.  */
 566
 567 static void
 568 compute_can_copy (void)
 569 {
 570   int i;
 571 #ifndef AVOID_CCMODE_COPIES
 572   rtx reg, insn;
 573 #endif
 574   memset (can_copy, 0, NUM_MACHINE_MODES);
 575
 576   start_sequence ();
 577   for (i = 0; i < NUM_MACHINE_MODES; i++)
 578     if (GET_MODE_CLASS (i) == MODE_CC)
 579       {
 580 #ifdef AVOID_CCMODE_COPIES
 581         can_copy[i] = 0;
 582 #else
 583         reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 584         insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 585         if (recog (PATTERN (insn), insn, NULL) >= 0)
 586           can_copy[i] = 1;
 587 #endif
 588       }
 589     else
 590       can_copy[i] = 1;
 591
 592   end_sequence ();
 593 }
 594
 595 /* Returns whether the mode supports reg/reg copy operations.  */
 596
 597 bool
 598 can_copy_p (enum machine_mode mode)
 599 {
 600   static bool can_copy_init_p = false;
 601
 602   if (! can_copy_init_p)
 603     {
 604       compute_can_copy ();
 605       can_copy_init_p = true;
 606     }
 607
 608   return can_copy[mode] != 0;
 609 }
 610
 611 \f
 612 /* Cover function to xmalloc to record bytes allocated.  */
 613
 614 static void *
 615 gmalloc (size_t size)
 616 {
 617   bytes_used += size;
 618   return xmalloc (size);
 619 }
 620
 621 /* Cover function to xcalloc to record bytes allocated.  */
 622
 623 static void *
 624 gcalloc (size_t nelem, size_t elsize)
 625 {
 626   bytes_used += nelem * elsize;
 627   return xcalloc (nelem, elsize);
 628 }
 629
 630 /* Cover function to obstack_alloc.  */
 631
 632 static void *
 633 gcse_alloc (unsigned long size)
 634 {
 635   bytes_used += size;
 636   return obstack_alloc (&gcse_obstack, size);
 637 }
 638
 639 /* Allocate memory for the reg/memory set tracking tables.
 640    This is called at the start of each pass.  */
 641
 642 static void
 643 alloc_gcse_mem (void)
 644 {
 645   /* Allocate vars to track sets of regs.  */
 646   reg_set_bitmap = BITMAP_ALLOC (NULL);
 647
 648   /* Allocate array to keep a list of insns which modify memory in each
 649      basic block.  */
 650   modify_mem_list = GCNEWVEC (rtx, last_basic_block);
 651   canon_modify_mem_list = GCNEWVEC (rtx, last_basic_block);
 652   modify_mem_list_set = BITMAP_ALLOC (NULL);
 653   blocks_with_calls = BITMAP_ALLOC (NULL);
 654 }
 655
 656 /* Free memory allocated by alloc_gcse_mem.  */
 657
 658 static void
 659 free_gcse_mem (void)
 660 {
 661   free_modify_mem_tables ();
 662   BITMAP_FREE (modify_mem_list_set);
 663   BITMAP_FREE (blocks_with_calls);
 664 }
 665 \f
 666 /* Compute the local properties of each recorded expression.
 667
 668    Local properties are those that are defined by the block, irrespective of
 669    other blocks.
 670
 671    An expression is transparent in a block if its operands are not modified
 672    in the block.
 673
 674    An expression is computed (locally available) in a block if it is computed
 675    at least once and expression would contain the same value if the
 676    computation was moved to the end of the block.
 677
 678    An expression is locally anticipatable in a block if it is computed at
 679    least once and expression would contain the same value if the computation
 680    was moved to the beginning of the block.
 681
 682    We call this routine for cprop, pre and code hoisting.  They all compute
 683    basically the same information and thus can easily share this code.
 684
 685    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
 686    properties.  If NULL, then it is not necessary to compute or record that
 687    particular property.
 688
 689    TABLE controls which hash table to look at.  If it is  set hash table,
 690    additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
 691    ABSALTERED.  */
 692
 693 static void
 694 compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
 695                           struct hash_table_d *table)
 696 {
 697   unsigned int i;
 698
 699   /* Initialize any bitmaps that were passed in.  */
 700   if (transp)
 701     {
 702       if (table->set_p)
 703         sbitmap_vector_zero (transp, last_basic_block);
 704       else
 705         sbitmap_vector_ones (transp, last_basic_block);
 706     }
 707
 708   if (comp)
 709     sbitmap_vector_zero (comp, last_basic_block);
 710   if (antloc)
 711     sbitmap_vector_zero (antloc, last_basic_block);
 712
 713   for (i = 0; i < table->size; i++)
 714     {
 715       struct expr *expr;
 716
 717       for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
 718         {
 719           int indx = expr->bitmap_index;
 720           struct occr *occr;
 721
 722           /* The expression is transparent in this block if it is not killed.
 723              We start by assuming all are transparent [none are killed], and
 724              then reset the bits for those that are.  */
 725           if (transp)
 726             compute_transp (expr->expr, indx, transp, table->set_p);
 727
 728           /* The occurrences recorded in antic_occr are exactly those that
 729              we want to set to nonzero in ANTLOC.  */
 730           if (antloc)
 731             for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
 732               {
 733                 SET_BIT (antloc[BLOCK_NUM (occr->insn)], indx);
 734
 735                 /* While we're scanning the table, this is a good place to
 736                    initialize this.  */
 737                 occr->deleted_p = 0;
 738               }
 739
 740           /* The occurrences recorded in avail_occr are exactly those that
 741              we want to set to nonzero in COMP.  */
 742           if (comp)
 743             for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
 744               {
 745                 SET_BIT (comp[BLOCK_NUM (occr->insn)], indx);
 746
 747                 /* While we're scanning the table, this is a good place to
 748                    initialize this.  */
 749                 occr->copied_p = 0;
 750               }
 751
 752           /* While we're scanning the table, this is a good place to
 753              initialize this.  */
 754           expr->reaching_reg = 0;
 755         }
 756     }
 757 }
 758 \f
 759 /* Hash table support.  */
 760
 761 struct reg_avail_info
 762 {
 763   basic_block last_bb;
 764   int first_set;
 765   int last_set;
 766 };
 767
 768 static struct reg_avail_info *reg_avail_info;
 769 static basic_block current_bb;
 770
 771
 772 /* See whether X, the source of a set, is something we want to consider for
 773    GCSE.  */
 774
 775 static int
 776 want_to_gcse_p (rtx x)
 777 {
 778 #ifdef STACK_REGS
 779   /* On register stack architectures, don't GCSE constants from the
 780      constant pool, as the benefits are often swamped by the overhead
 781      of shuffling the register stack between basic blocks.  */
 782   if (IS_STACK_MODE (GET_MODE (x)))
 783     x = avoid_constant_pool_reference (x);
 784 #endif
 785
 786   switch (GET_CODE (x))
 787     {
 788     case REG:
 789     case SUBREG:
 790     case CONST_INT:
 791     case CONST_DOUBLE:
 792     case CONST_FIXED:
 793     case CONST_VECTOR:
 794     case CALL:
 795       return 0;
 796
 797     default:
 798       return can_assign_to_reg_without_clobbers_p (x);
 799     }
 800 }
 801
 802 /* Used internally by can_assign_to_reg_without_clobbers_p.  */
 803
 804 static GTY(()) rtx test_insn;
 805
 806 /* Return true if we can assign X to a pseudo register such that the
 807    resulting insn does not result in clobbering a hard register as a
 808    side-effect.
 809
 810    Additionally, if the target requires it, check that the resulting insn
 811    can be copied.  If it cannot, this means that X is special and probably
 812    has hidden side-effects we don't want to mess with.
 813
 814    This function is typically used by code motion passes, to verify
 815    that it is safe to insert an insn without worrying about clobbering
 816    maybe live hard regs.  */
 817
 818 bool
 819 can_assign_to_reg_without_clobbers_p (rtx x)
 820 {
 821   int num_clobbers = 0;
 822   int icode;
 823
 824   /* If this is a valid operand, we are OK.  If it's VOIDmode, we aren't.  */
 825   if (general_operand (x, GET_MODE (x)))
 826     return 1;
 827   else if (GET_MODE (x) == VOIDmode)
 828     return 0;
 829
 830   /* Otherwise, check if we can make a valid insn from it.  First initialize
 831      our test insn if we haven't already.  */
 832   if (test_insn == 0)
 833     {
 834       test_insn
 835         = make_insn_raw (gen_rtx_SET (VOIDmode,
 836                                       gen_rtx_REG (word_mode,
 837                                                    FIRST_PSEUDO_REGISTER * 2),
 838                                       const0_rtx));
 839       NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
 840     }
 841
 842   /* Now make an insn like the one we would make when GCSE'ing and see if
 843      valid.  */
 844   PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
 845   SET_SRC (PATTERN (test_insn)) = x;
 846
 847   icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
 848   if (icode < 0)
 849     return false;
 850
 851   if (num_clobbers > 0 && added_clobbers_hard_reg_p (icode))
 852     return false;
 853
 854   if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (test_insn))
 855     return false;
 856
 857   return true;
 858 }
 859
 860 /* Return nonzero if the operands of expression X are unchanged from the
 861    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
 862    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
 863
 864 static int
 865 oprs_unchanged_p (const_rtx x, const_rtx insn, int avail_p)
 866 {
 867   int i, j;
 868   enum rtx_code code;
 869   const char *fmt;
 870
 871   if (x == 0)
 872     return 1;
 873
 874   code = GET_CODE (x);
 875   switch (code)
 876     {
 877     case REG:
 878       {
 879         struct reg_avail_info *info = &reg_avail_info[REGNO (x)];
 880
 881         if (info->last_bb != current_bb)
 882           return 1;
 883         if (avail_p)
 884           return info->last_set < DF_INSN_LUID (insn);
 885         else
 886           return info->first_set >= DF_INSN_LUID (insn);
 887       }
 888
 889     case MEM:
 890       if (load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
 891                                   x, avail_p))
 892         return 0;
 893       else
 894         return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
 895
 896     case PRE_DEC:
 897     case PRE_INC:
 898     case POST_DEC:
 899     case POST_INC:
 900     case PRE_MODIFY:
 901     case POST_MODIFY:
 902       return 0;
 903
 904     case PC:
 905     case CC0: /*FIXME*/
 906     case CONST:
 907     case CONST_INT:
 908     case CONST_DOUBLE:
 909     case CONST_FIXED:
 910     case CONST_VECTOR:
 911     case SYMBOL_REF:
 912     case LABEL_REF:
 913     case ADDR_VEC:
 914     case ADDR_DIFF_VEC:
 915       return 1;
 916
 917     default:
 918       break;
 919     }
 920
 921   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
 922     {
 923       if (fmt[i] == 'e')
 924         {
 925           /* If we are about to do the last recursive call needed at this
 926              level, change it into iteration.  This function is called enough
 927              to be worth it.  */
 928           if (i == 0)
 929             return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
 930
 931           else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
 932             return 0;
 933         }
 934       else if (fmt[i] == 'E')
 935         for (j = 0; j < XVECLEN (x, i); j++)
 936           if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
 937             return 0;
 938     }
 939
 940   return 1;
 941 }
 942
 943 /* Used for communication between mems_conflict_for_gcse_p and
 944    load_killed_in_block_p.  Nonzero if mems_conflict_for_gcse_p finds a
 945    conflict between two memory references.  */
 946 static int gcse_mems_conflict_p;
 947
 948 /* Used for communication between mems_conflict_for_gcse_p and
 949    load_killed_in_block_p.  A memory reference for a load instruction,
 950    mems_conflict_for_gcse_p will see if a memory store conflicts with
 951    this memory load.  */
 952 static const_rtx gcse_mem_operand;
 953
 954 /* DEST is the output of an instruction.  If it is a memory reference, and
 955    possibly conflicts with the load found in gcse_mem_operand, then set
 956    gcse_mems_conflict_p to a nonzero value.  */
 957
 958 static void
 959 mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
 960                           void *data ATTRIBUTE_UNUSED)
 961 {
 962   while (GET_CODE (dest) == SUBREG
 963          || GET_CODE (dest) == ZERO_EXTRACT
 964          || GET_CODE (dest) == STRICT_LOW_PART)
 965     dest = XEXP (dest, 0);
 966
 967   /* If DEST is not a MEM, then it will not conflict with the load.  Note
 968      that function calls are assumed to clobber memory, but are handled
 969      elsewhere.  */
 970   if (! MEM_P (dest))
 971     return;
 972
 973   /* If we are setting a MEM in our list of specially recognized MEMs,
 974      don't mark as killed this time.  */
 975
 976   if (expr_equiv_p (dest, gcse_mem_operand) && pre_ldst_mems != NULL)
 977     {
 978       if (!find_rtx_in_ldst (dest))
 979         gcse_mems_conflict_p = 1;
 980       return;
 981     }
 982
 983   if (true_dependence (dest, GET_MODE (dest), gcse_mem_operand,
 984                        rtx_addr_varies_p))
 985     gcse_mems_conflict_p = 1;
 986 }
 987
 988 /* Return nonzero if the expression in X (a memory reference) is killed
 989    in block BB before or after the insn with the LUID in UID_LIMIT.
 990    AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
 991    before UID_LIMIT.
 992
 993    To check the entire block, set UID_LIMIT to max_uid + 1 and
 994    AVAIL_P to 0.  */
 995
 996 static int
 997 load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x, int avail_p)
 998 {
 999   rtx list_entry = modify_mem_list[bb->index];
1000
1001   /* If this is a readonly then we aren't going to be changing it.  */
1002   if (MEM_READONLY_P (x))
1003     return 0;
1004
1005   while (list_entry)
1006     {
1007       rtx setter;
1008       /* Ignore entries in the list that do not apply.  */
1009       if ((avail_p
1010            && DF_INSN_LUID (XEXP (list_entry, 0)) < uid_limit)
1011           || (! avail_p
1012               && DF_INSN_LUID (XEXP (list_entry, 0)) > uid_limit))
1013         {
1014           list_entry = XEXP (list_entry, 1);
1015           continue;
1016         }
1017
1018       setter = XEXP (list_entry, 0);
1019
1020       /* If SETTER is a call everything is clobbered.  Note that calls
1021          to pure functions are never put on the list, so we need not
1022          worry about them.  */
1023       if (CALL_P (setter))
1024         return 1;
1025
1026       /* SETTER must be an INSN of some kind that sets memory.  Call
1027          note_stores to examine each hunk of memory that is modified.
1028
1029          The note_stores interface is pretty limited, so we have to
1030          communicate via global variables.  Yuk.  */
1031       gcse_mem_operand = x;
1032       gcse_mems_conflict_p = 0;
1033       note_stores (PATTERN (setter), mems_conflict_for_gcse_p, NULL);
1034       if (gcse_mems_conflict_p)
1035         return 1;
1036       list_entry = XEXP (list_entry, 1);
1037     }
1038   return 0;
1039 }
1040
1041 /* Return nonzero if the operands of expression X are unchanged from
1042    the start of INSN's basic block up to but not including INSN.  */
1043
1044 static int
1045 oprs_anticipatable_p (const_rtx x, const_rtx insn)
1046 {
1047   return oprs_unchanged_p (x, insn, 0);
1048 }
1049
1050 /* Return nonzero if the operands of expression X are unchanged from
1051    INSN to the end of INSN's basic block.  */
1052
1053 static int
1054 oprs_available_p (const_rtx x, const_rtx insn)
1055 {
1056   return oprs_unchanged_p (x, insn, 1);
1057 }
1058
1059 /* Hash expression X.
1060
1061    MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
1062    indicating if a volatile operand is found or if the expression contains
1063    something we don't want to insert in the table.  HASH_TABLE_SIZE is
1064    the current size of the hash table to be probed.  */
1065
1066 static unsigned int
1067 hash_expr (const_rtx x, enum machine_mode mode, int *do_not_record_p,
1068            int hash_table_size)
1069 {
1070   unsigned int hash;
1071
1072   *do_not_record_p = 0;
1073
1074   hash = hash_rtx (x, mode, do_not_record_p,
1075                    NULL,  /*have_reg_qty=*/false);
1076   return hash % hash_table_size;
1077 }
1078
1079 /* Hash a set of register REGNO.
1080
1081    Sets are hashed on the register that is set.  This simplifies the PRE copy
1082    propagation code.
1083
1084    ??? May need to make things more elaborate.  Later, as necessary.  */
1085
1086 static unsigned int
1087 hash_set (int regno, int hash_table_size)
1088 {
1089   unsigned int hash;
1090
1091   hash = regno;
1092   return hash % hash_table_size;
1093 }
1094
1095 /* Return nonzero if exp1 is equivalent to exp2.  */
1096
1097 static int
1098 expr_equiv_p (const_rtx x, const_rtx y)
1099 {
1100   return exp_equiv_p (x, y, 0, true);
1101 }
1102
1103 /* Insert expression X in INSN in the hash TABLE.
1104    If it is already present, record it as the last occurrence in INSN's
1105    basic block.
1106
1107    MODE is the mode of the value X is being stored into.
1108    It is only used if X is a CONST_INT.
1109
1110    ANTIC_P is nonzero if X is an anticipatable expression.
1111    AVAIL_P is nonzero if X is an available expression.  */
1112
1113 static void
1114 insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
1115                       int avail_p, struct hash_table_d *table)
1116 {
1117   int found, do_not_record_p;
1118   unsigned int hash;
1119   struct expr *cur_expr, *last_expr = NULL;
1120   struct occr *antic_occr, *avail_occr;
1121
1122   hash = hash_expr (x, mode, &do_not_record_p, table->size);
1123
1124   /* Do not insert expression in table if it contains volatile operands,
1125      or if hash_expr determines the expression is something we don't want
1126      to or can't handle.  */
1127   if (do_not_record_p)
1128     return;
1129
1130   cur_expr = table->table[hash];
1131   found = 0;
1132
1133   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1134     {
1135       /* If the expression isn't found, save a pointer to the end of
1136          the list.  */
1137       last_expr = cur_expr;
1138       cur_expr = cur_expr->next_same_hash;
1139     }
1140
1141   if (! found)
1142     {
1143       cur_expr = GOBNEW (struct expr);
1144       bytes_used += sizeof (struct expr);
1145       if (table->table[hash] == NULL)
1146         /* This is the first pattern that hashed to this index.  */
1147         table->table[hash] = cur_expr;
1148       else
1149         /* Add EXPR to end of this hash chain.  */
1150         last_expr->next_same_hash = cur_expr;
1151
1152       /* Set the fields of the expr element.  */
1153       cur_expr->expr = x;
1154       cur_expr->bitmap_index = table->n_elems++;
1155       cur_expr->next_same_hash = NULL;
1156       cur_expr->antic_occr = NULL;
1157       cur_expr->avail_occr = NULL;
1158     }
1159
1160   /* Now record the occurrence(s).  */
1161   if (antic_p)
1162     {
1163       antic_occr = cur_expr->antic_occr;
1164
1165       if (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1166         antic_occr = NULL;
1167
1168       if (antic_occr)
1169         /* Found another instance of the expression in the same basic block.
1170            Prefer the currently recorded one.  We want the first one in the
1171            block and the block is scanned from start to end.  */
1172         ; /* nothing to do */
1173       else
1174         {
1175           /* First occurrence of this expression in this basic block.  */
1176           antic_occr = GOBNEW (struct occr);
1177           bytes_used += sizeof (struct occr);
1178           antic_occr->insn = insn;
1179           antic_occr->next = cur_expr->antic_occr;
1180           antic_occr->deleted_p = 0;
1181           cur_expr->antic_occr = antic_occr;
1182         }
1183     }
1184
1185   if (avail_p)
1186     {
1187       avail_occr = cur_expr->avail_occr;
1188
1189       if (avail_occr && BLOCK_NUM (avail_occr->insn) == BLOCK_NUM (insn))
1190         {
1191           /* Found another instance of the expression in the same basic block.
1192              Prefer this occurrence to the currently recorded one.  We want
1193              the last one in the block and the block is scanned from start
1194              to end.  */
1195           avail_occr->insn = insn;
1196         }
1197       else
1198         {
1199           /* First occurrence of this expression in this basic block.  */
1200           avail_occr = GOBNEW (struct occr);
1201           bytes_used += sizeof (struct occr);
1202           avail_occr->insn = insn;
1203           avail_occr->next = cur_expr->avail_occr;
1204           avail_occr->deleted_p = 0;
1205           cur_expr->avail_occr = avail_occr;
1206         }
1207     }
1208 }
1209
1210 /* Insert pattern X in INSN in the hash table.
1211    X is a SET of a reg to either another reg or a constant.
1212    If it is already present, record it as the last occurrence in INSN's
1213    basic block.  */
1214
1215 static void
1216 insert_set_in_table (rtx x, rtx insn, struct hash_table_d *table)
1217 {
1218   int found;
1219   unsigned int hash;
1220   struct expr *cur_expr, *last_expr = NULL;
1221   struct occr *cur_occr;
1222
1223   gcc_assert (GET_CODE (x) == SET && REG_P (SET_DEST (x)));
1224
1225   hash = hash_set (REGNO (SET_DEST (x)), table->size);
1226
1227   cur_expr = table->table[hash];
1228   found = 0;
1229
1230   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1231     {
1232       /* If the expression isn't found, save a pointer to the end of
1233          the list.  */
1234       last_expr = cur_expr;
1235       cur_expr = cur_expr->next_same_hash;
1236     }
1237
1238   if (! found)
1239     {
1240       cur_expr = GOBNEW (struct expr);
1241       bytes_used += sizeof (struct expr);
1242       if (table->table[hash] == NULL)
1243         /* This is the first pattern that hashed to this index.  */
1244         table->table[hash] = cur_expr;
1245       else
1246         /* Add EXPR to end of this hash chain.  */
1247         last_expr->next_same_hash = cur_expr;
1248
1249       /* Set the fields of the expr element.
1250          We must copy X because it can be modified when copy propagation is
1251          performed on its operands.  */
1252       cur_expr->expr = copy_rtx (x);
1253       cur_expr->bitmap_index = table->n_elems++;
1254       cur_expr->next_same_hash = NULL;
1255       cur_expr->antic_occr = NULL;
1256       cur_expr->avail_occr = NULL;
1257     }
1258
1259   /* Now record the occurrence.  */
1260   cur_occr = cur_expr->avail_occr;
1261
1262   if (cur_occr && BLOCK_NUM (cur_occr->insn) == BLOCK_NUM (insn))
1263     {
1264       /* Found another instance of the expression in the same basic block.
1265          Prefer this occurrence to the currently recorded one.  We want
1266          the last one in the block and the block is scanned from start
1267          to end.  */
1268       cur_occr->insn = insn;
1269     }
1270   else
1271     {
1272       /* First occurrence of this expression in this basic block.  */
1273       cur_occr = GOBNEW (struct occr);
1274       bytes_used += sizeof (struct occr);
1275       cur_occr->insn = insn;
1276       cur_occr->next = cur_expr->avail_occr;
1277       cur_occr->deleted_p = 0;
1278       cur_expr->avail_occr = cur_occr;
1279     }
1280 }
1281
1282 /* Determine whether the rtx X should be treated as a constant for
1283    the purposes of GCSE's constant propagation.  */
1284
1285 static bool
1286 gcse_constant_p (const_rtx x)
1287 {
1288   /* Consider a COMPARE of two integers constant.  */
1289   if (GET_CODE (x) == COMPARE
1290       && CONST_INT_P (XEXP (x, 0))
1291       && CONST_INT_P (XEXP (x, 1)))
1292     return true;
1293
1294   /* Consider a COMPARE of the same registers is a constant
1295      if they are not floating point registers.  */
1296   if (GET_CODE(x) == COMPARE
1297       && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1))
1298       && REGNO (XEXP (x, 0)) == REGNO (XEXP (x, 1))
1299       && ! FLOAT_MODE_P (GET_MODE (XEXP (x, 0)))
1300       && ! FLOAT_MODE_P (GET_MODE (XEXP (x, 1))))
1301     return true;
1302
1303   /* Since X might be inserted more than once we have to take care that it
1304      is sharable.  */
1305   return CONSTANT_P (x) && (GET_CODE (x) != CONST || shared_const_p (x));
1306 }
1307
1308 /* Scan pattern PAT of INSN and add an entry to the hash TABLE (set or
1309    expression one).  */
1310
1311 static void
1312 hash_scan_set (rtx pat, rtx insn, struct hash_table_d *table)
1313 {
1314   rtx src = SET_SRC (pat);
1315   rtx dest = SET_DEST (pat);
1316   rtx note;
1317
1318   if (GET_CODE (src) == CALL)
1319     hash_scan_call (src, insn, table);
1320
1321   else if (REG_P (dest))
1322     {
1323       unsigned int regno = REGNO (dest);
1324       rtx tmp;
1325
1326       /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1327
1328          This allows us to do a single GCSE pass and still eliminate
1329          redundant constants, addresses or other expressions that are
1330          constructed with multiple instructions.
1331
1332          However, keep the original SRC if INSN is a simple reg-reg move.  In
1333          In this case, there will almost always be a REG_EQUAL note on the
1334          insn that sets SRC.  By recording the REG_EQUAL value here as SRC
1335          for INSN, we miss copy propagation opportunities and we perform the
1336          same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1337          do more than one PRE GCSE pass.
1338
1339          Note that this does not impede profitable constant propagations.  We
1340          "look through" reg-reg sets in lookup_avail_set.  */
1341       note = find_reg_equal_equiv_note (insn);
1342       if (note != 0
1343           && REG_NOTE_KIND (note) == REG_EQUAL
1344           && !REG_P (src)
1345           && (table->set_p
1346               ? gcse_constant_p (XEXP (note, 0))
1347               : want_to_gcse_p (XEXP (note, 0))))
1348         src = XEXP (note, 0), pat = gen_rtx_SET (VOIDmode, dest, src);
1349
1350       /* Only record sets of pseudo-regs in the hash table.  */
1351       if (! table->set_p
1352           && regno >= FIRST_PSEUDO_REGISTER
1353           /* Don't GCSE something if we can't do a reg/reg copy.  */
1354           && can_copy_p (GET_MODE (dest))
1355           /* GCSE commonly inserts instruction after the insn.  We can't
1356              do that easily for EH_REGION notes so disable GCSE on these
1357              for now.  */
1358           && !find_reg_note (insn, REG_EH_REGION, NULL_RTX)
1359           /* Is SET_SRC something we want to gcse?  */
1360           && want_to_gcse_p (src)
1361           /* Don't CSE a nop.  */
1362           && ! set_noop_p (pat)
1363           /* Don't GCSE if it has attached REG_EQUIV note.
1364              At this point this only function parameters should have
1365              REG_EQUIV notes and if the argument slot is used somewhere
1366              explicitly, it means address of parameter has been taken,
1367              so we should not extend the lifetime of the pseudo.  */
1368           && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1369         {
1370           /* An expression is not anticipatable if its operands are
1371              modified before this insn or if this is not the only SET in
1372              this insn.  The latter condition does not have to mean that
1373              SRC itself is not anticipatable, but we just will not be
1374              able to handle code motion of insns with multiple sets.  */
1375           int antic_p = oprs_anticipatable_p (src, insn)
1376                         && !multiple_sets (insn);
1377           /* An expression is not available if its operands are
1378              subsequently modified, including this insn.  It's also not
1379              available if this is a branch, because we can't insert
1380              a set after the branch.  */
1381           int avail_p = (oprs_available_p (src, insn)
1382                          && ! JUMP_P (insn));
1383
1384           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p, table);
1385         }
1386
1387       /* Record sets for constant/copy propagation.  */
1388       else if (table->set_p
1389                && regno >= FIRST_PSEUDO_REGISTER
1390                && ((REG_P (src)
1391                     && REGNO (src) >= FIRST_PSEUDO_REGISTER
1392                     && can_copy_p (GET_MODE (dest))
1393                     && REGNO (src) != regno)
1394                    || gcse_constant_p (src))
1395                /* A copy is not available if its src or dest is subsequently
1396                   modified.  Here we want to search from INSN+1 on, but
1397                   oprs_available_p searches from INSN on.  */
1398                && (insn == BB_END (BLOCK_FOR_INSN (insn))
1399                    || (tmp = next_nonnote_insn (insn)) == NULL_RTX
1400                    || BLOCK_FOR_INSN (tmp) != BLOCK_FOR_INSN (insn)
1401                    || oprs_available_p (pat, tmp)))
1402         insert_set_in_table (pat, insn, table);
1403     }
1404   /* In case of store we want to consider the memory value as available in
1405      the REG stored in that memory. This makes it possible to remove
1406      redundant loads from due to stores to the same location.  */
1407   else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1408       {
1409         unsigned int regno = REGNO (src);
1410
1411         /* Do not do this for constant/copy propagation.  */
1412         if (! table->set_p
1413             /* Only record sets of pseudo-regs in the hash table.  */
1414             && regno >= FIRST_PSEUDO_REGISTER
1415            /* Don't GCSE something if we can't do a reg/reg copy.  */
1416            && can_copy_p (GET_MODE (src))
1417            /* GCSE commonly inserts instruction after the insn.  We can't
1418               do that easily for EH_REGION notes so disable GCSE on these
1419               for now.  */
1420            && ! find_reg_note (insn, REG_EH_REGION, NULL_RTX)
1421            /* Is SET_DEST something we want to gcse?  */
1422            && want_to_gcse_p (dest)
1423            /* Don't CSE a nop.  */
1424            && ! set_noop_p (pat)
1425            /* Don't GCSE if it has attached REG_EQUIV note.
1426               At this point this only function parameters should have
1427               REG_EQUIV notes and if the argument slot is used somewhere
1428               explicitly, it means address of parameter has been taken,
1429               so we should not extend the lifetime of the pseudo.  */
1430            && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1431                || ! MEM_P (XEXP (note, 0))))
1432              {
1433                /* Stores are never anticipatable.  */
1434                int antic_p = 0;
1435                /* An expression is not available if its operands are
1436                   subsequently modified, including this insn.  It's also not
1437                   available if this is a branch, because we can't insert
1438                   a set after the branch.  */
1439                int avail_p = oprs_available_p (dest, insn)
1440                              && ! JUMP_P (insn);
1441
1442                /* Record the memory expression (DEST) in the hash table.  */
1443                insert_expr_in_table (dest, GET_MODE (dest), insn,
1444                                      antic_p, avail_p, table);
1445              }
1446       }
1447 }
1448
1449 static void
1450 hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1451                    struct hash_table_d *table ATTRIBUTE_UNUSED)
1452 {
1453   /* Currently nothing to do.  */
1454 }
1455
1456 static void
1457 hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1458                 struct hash_table_d *table ATTRIBUTE_UNUSED)
1459 {
1460   /* Currently nothing to do.  */
1461 }
1462
1463 /* Process INSN and add hash table entries as appropriate.
1464
1465    Only available expressions that set a single pseudo-reg are recorded.
1466
1467    Single sets in a PARALLEL could be handled, but it's an extra complication
1468    that isn't dealt with right now.  The trick is handling the CLOBBERs that
1469    are also in the PARALLEL.  Later.
1470
1471    If SET_P is nonzero, this is for the assignment hash table,
1472    otherwise it is for the expression hash table.  */
1473
1474 static void
1475 hash_scan_insn (rtx insn, struct hash_table_d *table)
1476 {
1477   rtx pat = PATTERN (insn);
1478   int i;
1479
1480   /* Pick out the sets of INSN and for other forms of instructions record
1481      what's been modified.  */
1482
1483   if (GET_CODE (pat) == SET)
1484     hash_scan_set (pat, insn, table);
1485   else if (GET_CODE (pat) == PARALLEL)
1486     for (i = 0; i < XVECLEN (pat, 0); i++)
1487       {
1488         rtx x = XVECEXP (pat, 0, i);
1489
1490         if (GET_CODE (x) == SET)
1491           hash_scan_set (x, insn, table);
1492         else if (GET_CODE (x) == CLOBBER)
1493           hash_scan_clobber (x, insn, table);
1494         else if (GET_CODE (x) == CALL)
1495           hash_scan_call (x, insn, table);
1496       }
1497
1498   else if (GET_CODE (pat) == CLOBBER)
1499     hash_scan_clobber (pat, insn, table);
1500   else if (GET_CODE (pat) == CALL)
1501     hash_scan_call (pat, insn, table);
1502 }
1503
1504 static void
1505 dump_hash_table (FILE *file, const char *name, struct hash_table_d *table)
1506 {
1507   int i;
1508   /* Flattened out table, so it's printed in proper order.  */
1509   struct expr **flat_table;
1510   unsigned int *hash_val;
1511   struct expr *expr;
1512
1513   flat_table = XCNEWVEC (struct expr *, table->n_elems);
1514   hash_val = XNEWVEC (unsigned int, table->n_elems);
1515
1516   for (i = 0; i < (int) table->size; i++)
1517     for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1518       {
1519         flat_table[expr->bitmap_index] = expr;
1520         hash_val[expr->bitmap_index] = i;
1521       }
1522
1523   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1524            name, table->size, table->n_elems);
1525
1526   for (i = 0; i < (int) table->n_elems; i++)
1527     if (flat_table[i] != 0)
1528       {
1529         expr = flat_table[i];
1530         fprintf (file, "Index %d (hash value %d)\n  ",
1531                  expr->bitmap_index, hash_val[i]);
1532         print_rtl (file, expr->expr);
1533         fprintf (file, "\n");
1534       }
1535
1536   fprintf (file, "\n");
1537
1538   free (flat_table);
1539   free (hash_val);
1540 }
1541
1542 /* Record register first/last/block set information for REGNO in INSN.
1543
1544    first_set records the first place in the block where the register
1545    is set and is used to compute "anticipatability".
1546
1547    last_set records the last place in the block where the register
1548    is set and is used to compute "availability".
1549
1550    last_bb records the block for which first_set and last_set are
1551    valid, as a quick test to invalidate them.  */
1552
1553 static void
1554 record_last_reg_set_info (rtx insn, int regno)
1555 {
1556   struct reg_avail_info *info = &reg_avail_info[regno];
1557   int luid = DF_INSN_LUID (insn);
1558
1559   info->last_set = luid;
1560   if (info->last_bb != current_bb)
1561     {
1562       info->last_bb = current_bb;
1563       info->first_set = luid;
1564     }
1565 }
1566
1567
1568 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
1569    Note we store a pair of elements in the list, so they have to be
1570    taken off pairwise.  */
1571
1572 static void
1573 canon_list_insert (rtx dest ATTRIBUTE_UNUSED, const_rtx unused1 ATTRIBUTE_UNUSED,
1574                    void * v_insn)
1575 {
1576   rtx dest_addr, insn;
1577   int bb;
1578
1579   while (GET_CODE (dest) == SUBREG
1580       || GET_CODE (dest) == ZERO_EXTRACT
1581       || GET_CODE (dest) == STRICT_LOW_PART)
1582     dest = XEXP (dest, 0);
1583
1584   /* If DEST is not a MEM, then it will not conflict with a load.  Note
1585      that function calls are assumed to clobber memory, but are handled
1586      elsewhere.  */
1587
1588   if (! MEM_P (dest))
1589     return;
1590
1591   dest_addr = get_addr (XEXP (dest, 0));
1592   dest_addr = canon_rtx (dest_addr);
1593   insn = (rtx) v_insn;
1594   bb = BLOCK_NUM (insn);
1595
1596   canon_modify_mem_list[bb] =
1597     alloc_EXPR_LIST (VOIDmode, dest_addr, canon_modify_mem_list[bb]);
1598   canon_modify_mem_list[bb] =
1599     alloc_EXPR_LIST (VOIDmode, dest, canon_modify_mem_list[bb]);
1600 }
1601
1602 /* Record memory modification information for INSN.  We do not actually care
1603    about the memory location(s) that are set, or even how they are set (consider
1604    a CALL_INSN).  We merely need to record which insns modify memory.  */
1605
1606 static void
1607 record_last_mem_set_info (rtx insn)
1608 {
1609   int bb = BLOCK_NUM (insn);
1610
1611   /* load_killed_in_block_p will handle the case of calls clobbering
1612      everything.  */
1613   modify_mem_list[bb] = alloc_INSN_LIST (insn, modify_mem_list[bb]);
1614   bitmap_set_bit (modify_mem_list_set, bb);
1615
1616   if (CALL_P (insn))
1617     {
1618       /* Note that traversals of this loop (other than for free-ing)
1619          will break after encountering a CALL_INSN.  So, there's no
1620          need to insert a pair of items, as canon_list_insert does.  */
1621       canon_modify_mem_list[bb] =
1622         alloc_INSN_LIST (insn, canon_modify_mem_list[bb]);
1623       bitmap_set_bit (blocks_with_calls, bb);
1624     }
1625   else
1626     note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
1627 }
1628
1629 /* Called from compute_hash_table via note_stores to handle one
1630    SET or CLOBBER in an insn.  DATA is really the instruction in which
1631    the SET is taking place.  */
1632
1633 static void
1634 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1635 {
1636   rtx last_set_insn = (rtx) data;
1637
1638   if (GET_CODE (dest) == SUBREG)
1639     dest = SUBREG_REG (dest);
1640
1641   if (REG_P (dest))
1642     record_last_reg_set_info (last_set_insn, REGNO (dest));
1643   else if (MEM_P (dest)
1644            /* Ignore pushes, they clobber nothing.  */
1645            && ! push_operand (dest, GET_MODE (dest)))
1646     record_last_mem_set_info (last_set_insn);
1647 }
1648
1649 /* Top level function to create an expression or assignment hash table.
1650
1651    Expression entries are placed in the hash table if
1652    - they are of the form (set (pseudo-reg) src),
1653    - src is something we want to perform GCSE on,
1654    - none of the operands are subsequently modified in the block
1655
1656    Assignment entries are placed in the hash table if
1657    - they are of the form (set (pseudo-reg) src),
1658    - src is something we want to perform const/copy propagation on,
1659    - none of the operands or target are subsequently modified in the block
1660
1661    Currently src must be a pseudo-reg or a const_int.
1662
1663    TABLE is the table computed.  */
1664
1665 static void
1666 compute_hash_table_work (struct hash_table_d *table)
1667 {
1668   int i;
1669
1670   /* re-Cache any INSN_LIST nodes we have allocated.  */
1671   clear_modify_mem_tables ();
1672   /* Some working arrays used to track first and last set in each block.  */
1673   reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1674
1675   for (i = 0; i < max_reg_num (); ++i)
1676     reg_avail_info[i].last_bb = NULL;
1677
1678   FOR_EACH_BB (current_bb)
1679     {
1680       rtx insn;
1681       unsigned int regno;
1682
1683       /* First pass over the instructions records information used to
1684          determine when registers and memory are first and last set.  */
1685       FOR_BB_INSNS (current_bb, insn)
1686         {
1687           if (! INSN_P (insn))
1688             continue;
1689
1690           if (CALL_P (insn))
1691             {
1692               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1693                 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
1694                   record_last_reg_set_info (insn, regno);
1695
1696               mark_call (insn);
1697             }
1698
1699           note_stores (PATTERN (insn), record_last_set_info, insn);
1700         }
1701
1702       /* Insert implicit sets in the hash table.  */
1703       if (table->set_p
1704           && implicit_sets[current_bb->index] != NULL_RTX)
1705         hash_scan_set (implicit_sets[current_bb->index],
1706                        BB_HEAD (current_bb), table);
1707
1708       /* The next pass builds the hash table.  */
1709       FOR_BB_INSNS (current_bb, insn)
1710         if (INSN_P (insn))
1711           hash_scan_insn (insn, table);
1712     }
1713
1714   free (reg_avail_info);
1715   reg_avail_info = NULL;
1716 }
1717
1718 /* Allocate space for the set/expr hash TABLE.
1719    N_INSNS is the number of instructions in the function.
1720    It is used to determine the number of buckets to use.
1721    SET_P determines whether set or expression table will
1722    be created.  */
1723
1724 static void
1725 alloc_hash_table (int n_insns, struct hash_table_d *table, int set_p)
1726 {
1727   int n;
1728
1729   table->size = n_insns / 4;
1730   if (table->size < 11)
1731     table->size = 11;
1732
1733   /* Attempt to maintain efficient use of hash table.
1734      Making it an odd number is simplest for now.
1735      ??? Later take some measurements.  */
1736   table->size |= 1;
1737   n = table->size * sizeof (struct expr *);
1738   table->table = GNEWVAR (struct expr *, n);
1739   table->set_p = set_p;
1740 }
1741
1742 /* Free things allocated by alloc_hash_table.  */
1743
1744 static void
1745 free_hash_table (struct hash_table_d *table)
1746 {
1747   free (table->table);
1748 }
1749
1750 /* Compute the hash TABLE for doing copy/const propagation or
1751    expression hash table.  */
1752
1753 static void
1754 compute_hash_table (struct hash_table_d *table)
1755 {
1756   /* Initialize count of number of entries in hash table.  */
1757   table->n_elems = 0;
1758   memset (table->table, 0, table->size * sizeof (struct expr *));
1759
1760   compute_hash_table_work (table);
1761 }
1762 \f
1763 /* Expression tracking support.  */
1764
1765 /* Lookup REGNO in the set TABLE.  The result is a pointer to the
1766    table entry, or NULL if not found.  */
1767
1768 static struct expr *
1769 lookup_set (unsigned int regno, struct hash_table_d *table)
1770 {
1771   unsigned int hash = hash_set (regno, table->size);
1772   struct expr *expr;
1773
1774   expr = table->table[hash];
1775
1776   while (expr && REGNO (SET_DEST (expr->expr)) != regno)
1777     expr = expr->next_same_hash;
1778
1779   return expr;
1780 }
1781
1782 /* Return the next entry for REGNO in list EXPR.  */
1783
1784 static struct expr *
1785 next_set (unsigned int regno, struct expr *expr)
1786 {
1787   do
1788     expr = expr->next_same_hash;
1789   while (expr && REGNO (SET_DEST (expr->expr)) != regno);
1790
1791   return expr;
1792 }
1793
1794 /* Like free_INSN_LIST_list or free_EXPR_LIST_list, except that the node
1795    types may be mixed.  */
1796
1797 static void
1798 free_insn_expr_list_list (rtx *listp)
1799 {
1800   rtx list, next;
1801
1802   for (list = *listp; list ; list = next)
1803     {
1804       next = XEXP (list, 1);
1805       if (GET_CODE (list) == EXPR_LIST)
1806         free_EXPR_LIST_node (list);
1807       else
1808         free_INSN_LIST_node (list);
1809     }
1810
1811   *listp = NULL;
1812 }
1813
1814 /* Clear canon_modify_mem_list and modify_mem_list tables.  */
1815 static void
1816 clear_modify_mem_tables (void)
1817 {
1818   unsigned i;
1819   bitmap_iterator bi;
1820
1821   EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1822     {
1823       free_INSN_LIST_list (modify_mem_list + i);
1824       free_insn_expr_list_list (canon_modify_mem_list + i);
1825     }
1826   bitmap_clear (modify_mem_list_set);
1827   bitmap_clear (blocks_with_calls);
1828 }
1829
1830 /* Release memory used by modify_mem_list_set.  */
1831
1832 static void
1833 free_modify_mem_tables (void)
1834 {
1835   clear_modify_mem_tables ();
1836   free (modify_mem_list);
1837   free (canon_modify_mem_list);
1838   modify_mem_list = 0;
1839   canon_modify_mem_list = 0;
1840 }
1841
1842 /* Reset tables used to keep track of what's still available [since the
1843    start of the block].  */
1844
1845 static void
1846 reset_opr_set_tables (void)
1847 {
1848   /* Maintain a bitmap of which regs have been set since beginning of
1849      the block.  */
1850   CLEAR_REG_SET (reg_set_bitmap);
1851
1852   /* Also keep a record of the last instruction to modify memory.
1853      For now this is very trivial, we only record whether any memory
1854      location has been modified.  */
1855   clear_modify_mem_tables ();
1856 }
1857
1858 /* Return nonzero if the operands of X are not set before INSN in
1859    INSN's basic block.  */
1860
1861 static int
1862 oprs_not_set_p (const_rtx x, const_rtx insn)
1863 {
1864   int i, j;
1865   enum rtx_code code;
1866   const char *fmt;
1867
1868   if (x == 0)
1869     return 1;
1870
1871   code = GET_CODE (x);
1872   switch (code)
1873     {
1874     case PC:
1875     case CC0:
1876     case CONST:
1877     case CONST_INT:
1878     case CONST_DOUBLE:
1879     case CONST_FIXED:
1880     case CONST_VECTOR:
1881     case SYMBOL_REF:
1882     case LABEL_REF:
1883     case ADDR_VEC:
1884     case ADDR_DIFF_VEC:
1885       return 1;
1886
1887     case MEM:
1888       if (load_killed_in_block_p (BLOCK_FOR_INSN (insn),
1889                                   DF_INSN_LUID (insn), x, 0))
1890         return 0;
1891       else
1892         return oprs_not_set_p (XEXP (x, 0), insn);
1893
1894     case REG:
1895       return ! REGNO_REG_SET_P (reg_set_bitmap, REGNO (x));
1896
1897     default:
1898       break;
1899     }
1900
1901   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1902     {
1903       if (fmt[i] == 'e')
1904         {
1905           /* If we are about to do the last recursive call
1906              needed at this level, change it into iteration.
1907              This function is called enough to be worth it.  */
1908           if (i == 0)
1909             return oprs_not_set_p (XEXP (x, i), insn);
1910
1911           if (! oprs_not_set_p (XEXP (x, i), insn))
1912             return 0;
1913         }
1914       else if (fmt[i] == 'E')
1915         for (j = 0; j < XVECLEN (x, i); j++)
1916           if (! oprs_not_set_p (XVECEXP (x, i, j), insn))
1917             return 0;
1918     }
1919
1920   return 1;
1921 }
1922
1923 /* Mark things set by a CALL.  */
1924
1925 static void
1926 mark_call (rtx insn)
1927 {
1928   if (! RTL_CONST_OR_PURE_CALL_P (insn))
1929     record_last_mem_set_info (insn);
1930 }
1931
1932 /* Mark things set by a SET.  */
1933
1934 static void
1935 mark_set (rtx pat, rtx insn)
1936 {
1937   rtx dest = SET_DEST (pat);
1938
1939   while (GET_CODE (dest) == SUBREG
1940          || GET_CODE (dest) == ZERO_EXTRACT
1941          || GET_CODE (dest) == STRICT_LOW_PART)
1942     dest = XEXP (dest, 0);
1943
1944   if (REG_P (dest))
1945     SET_REGNO_REG_SET (reg_set_bitmap, REGNO (dest));
1946   else if (MEM_P (dest))
1947     record_last_mem_set_info (insn);
1948
1949   if (GET_CODE (SET_SRC (pat)) == CALL)
1950     mark_call (insn);
1951 }
1952
1953 /* Record things set by a CLOBBER.  */
1954
1955 static void
1956 mark_clobber (rtx pat, rtx insn)
1957 {
1958   rtx clob = XEXP (pat, 0);
1959
1960   while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
1961     clob = XEXP (clob, 0);
1962
1963   if (REG_P (clob))
1964     SET_REGNO_REG_SET (reg_set_bitmap, REGNO (clob));
1965   else
1966     record_last_mem_set_info (insn);
1967 }
1968
1969 /* Record things set by INSN.
1970    This data is used by oprs_not_set_p.  */
1971
1972 static void
1973 mark_oprs_set (rtx insn)
1974 {
1975   rtx pat = PATTERN (insn);
1976   int i;
1977
1978   if (GET_CODE (pat) == SET)
1979     mark_set (pat, insn);
1980   else if (GET_CODE (pat) == PARALLEL)
1981     for (i = 0; i < XVECLEN (pat, 0); i++)
1982       {
1983         rtx x = XVECEXP (pat, 0, i);
1984
1985         if (GET_CODE (x) == SET)
1986           mark_set (x, insn);
1987         else if (GET_CODE (x) == CLOBBER)
1988           mark_clobber (x, insn);
1989         else if (GET_CODE (x) == CALL)
1990           mark_call (insn);
1991       }
1992
1993   else if (GET_CODE (pat) == CLOBBER)
1994     mark_clobber (pat, insn);
1995   else if (GET_CODE (pat) == CALL)
1996     mark_call (insn);
1997 }
1998
1999 \f
2000 /* Compute copy/constant propagation working variables.  */
2001
2002 /* Local properties of assignments.  */
2003 static sbitmap *cprop_pavloc;
2004 static sbitmap *cprop_absaltered;
2005
2006 /* Global properties of assignments (computed from the local properties).  */
2007 static sbitmap *cprop_avin;
2008 static sbitmap *cprop_avout;
2009
2010 /* Allocate vars used for copy/const propagation.  N_BLOCKS is the number of
2011    basic blocks.  N_SETS is the number of sets.  */
2012
2013 static void
2014 alloc_cprop_mem (int n_blocks, int n_sets)
2015 {
2016   cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
2017   cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
2018
2019   cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
2020   cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
2021 }
2022
2023 /* Free vars used by copy/const propagation.  */
2024
2025 static void
2026 free_cprop_mem (void)
2027 {
2028   sbitmap_vector_free (cprop_pavloc);
2029   sbitmap_vector_free (cprop_absaltered);
2030   sbitmap_vector_free (cprop_avin);
2031   sbitmap_vector_free (cprop_avout);
2032 }
2033
2034 /* For each block, compute whether X is transparent.  X is either an
2035    expression or an assignment [though we don't care which, for this context
2036    an assignment is treated as an expression].  For each block where an
2037    element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
2038    bit in BMAP.  */
2039
2040 static void
2041 compute_transp (const_rtx x, int indx, sbitmap *bmap, int set_p)
2042 {
2043   int i, j;
2044   enum rtx_code code;
2045   const char *fmt;
2046
2047   /* repeat is used to turn tail-recursion into iteration since GCC
2048      can't do it when there's no return value.  */
2049  repeat:
2050
2051   if (x == 0)
2052     return;
2053
2054   code = GET_CODE (x);
2055   switch (code)
2056     {
2057     case REG:
2058       if (set_p)
2059         {
2060           df_ref def;
2061           for (def = DF_REG_DEF_CHAIN (REGNO (x));
2062                def;
2063                def = DF_REF_NEXT_REG (def))
2064             SET_BIT (bmap[DF_REF_BB (def)->index], indx);
2065         }
2066       else
2067         {
2068           df_ref def;
2069           for (def = DF_REG_DEF_CHAIN (REGNO (x));
2070                def;
2071                def = DF_REF_NEXT_REG (def))
2072             RESET_BIT (bmap[DF_REF_BB (def)->index], indx);
2073         }
2074
2075       return;
2076
2077     case MEM:
2078       if (! MEM_READONLY_P (x))
2079         {
2080           bitmap_iterator bi;
2081           unsigned bb_index;
2082
2083           /* First handle all the blocks with calls.  We don't need to
2084              do any list walking for them.  */
2085           EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls, 0, bb_index, bi)
2086             {
2087               if (set_p)
2088                 SET_BIT (bmap[bb_index], indx);
2089               else
2090                 RESET_BIT (bmap[bb_index], indx);
2091             }
2092
2093             /* Now iterate over the blocks which have memory modifications
2094                but which do not have any calls.  */
2095             EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set,
2096                                             blocks_with_calls,
2097                                             0, bb_index, bi)
2098               {
2099                 rtx list_entry = canon_modify_mem_list[bb_index];
2100
2101                 while (list_entry)
2102                   {
2103                     rtx dest, dest_addr;
2104
2105                     /* LIST_ENTRY must be an INSN of some kind that sets memory.
2106                        Examine each hunk of memory that is modified.  */
2107
2108                     dest = XEXP (list_entry, 0);
2109                     list_entry = XEXP (list_entry, 1);
2110                     dest_addr = XEXP (list_entry, 0);
2111
2112                     if (canon_true_dependence (dest, GET_MODE (dest), dest_addr,
2113                                                x, NULL_RTX, rtx_addr_varies_p))
2114                       {
2115                         if (set_p)
2116                           SET_BIT (bmap[bb_index], indx);
2117                         else
2118                           RESET_BIT (bmap[bb_index], indx);
2119                         break;
2120                       }
2121                     list_entry = XEXP (list_entry, 1);
2122                   }
2123               }
2124         }
2125
2126       x = XEXP (x, 0);
2127       goto repeat;
2128
2129     case PC:
2130     case CC0: /*FIXME*/
2131     case CONST:
2132     case CONST_INT:
2133     case CONST_DOUBLE:
2134     case CONST_FIXED:
2135     case CONST_VECTOR:
2136     case SYMBOL_REF:
2137     case LABEL_REF:
2138     case ADDR_VEC:
2139     case ADDR_DIFF_VEC:
2140       return;
2141
2142     default:
2143       break;
2144     }
2145
2146   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2147     {
2148       if (fmt[i] == 'e')
2149         {
2150           /* If we are about to do the last recursive call
2151              needed at this level, change it into iteration.
2152              This function is called enough to be worth it.  */
2153           if (i == 0)
2154             {
2155               x = XEXP (x, i);
2156               goto repeat;
2157             }
2158
2159           compute_transp (XEXP (x, i), indx, bmap, set_p);
2160         }
2161       else if (fmt[i] == 'E')
2162         for (j = 0; j < XVECLEN (x, i); j++)
2163           compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
2164     }
2165 }
2166
2167 /* Top level routine to do the dataflow analysis needed by copy/const
2168    propagation.  */
2169
2170 static void
2171 compute_cprop_data (void)
2172 {
2173   compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, &set_hash_table);
2174   compute_available (cprop_pavloc, cprop_absaltered,
2175                      cprop_avout, cprop_avin);
2176 }
2177 \f
2178 /* Copy/constant propagation.  */
2179
2180 /* Maximum number of register uses in an insn that we handle.  */
2181 #define MAX_USES 8
2182
2183 /* Table of uses found in an insn.
2184    Allocated statically to avoid alloc/free complexity and overhead.  */
2185 static struct reg_use reg_use_table[MAX_USES];
2186
2187 /* Index into `reg_use_table' while building it.  */
2188 static int reg_use_count;
2189
2190 /* Set up a list of register numbers used in INSN.  The found uses are stored
2191    in `reg_use_table'.  `reg_use_count' is initialized to zero before entry,
2192    and contains the number of uses in the table upon exit.
2193
2194    ??? If a register appears multiple times we will record it multiple times.
2195    This doesn't hurt anything but it will slow things down.  */
2196
2197 static void
2198 find_used_regs (rtx *xptr, void *data ATTRIBUTE_UNUSED)
2199 {
2200   int i, j;
2201   enum rtx_code code;
2202   const char *fmt;
2203   rtx x = *xptr;
2204
2205   /* repeat is used to turn tail-recursion into iteration since GCC
2206      can't do it when there's no return value.  */
2207  repeat:
2208   if (x == 0)
2209     return;
2210
2211   code = GET_CODE (x);
2212   if (REG_P (x))
2213     {
2214       if (reg_use_count == MAX_USES)
2215         return;
2216
2217       reg_use_table[reg_use_count].reg_rtx = x;
2218       reg_use_count++;
2219     }
2220
2221   /* Recursively scan the operands of this expression.  */
2222
2223   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2224     {
2225       if (fmt[i] == 'e')
2226         {
2227           /* If we are about to do the last recursive call
2228              needed at this level, change it into iteration.
2229              This function is called enough to be worth it.  */
2230           if (i == 0)
2231             {
2232               x = XEXP (x, 0);
2233               goto repeat;
2234             }
2235
2236           find_used_regs (&XEXP (x, i), data);
2237         }
2238       else if (fmt[i] == 'E')
2239         for (j = 0; j < XVECLEN (x, i); j++)
2240           find_used_regs (&XVECEXP (x, i, j), data);
2241     }
2242 }
2243
2244 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
2245    Returns nonzero is successful.  */
2246
2247 static int
2248 try_replace_reg (rtx from, rtx to, rtx insn)
2249 {
2250   rtx note = find_reg_equal_equiv_note (insn);
2251   rtx src = 0;
2252   int success = 0;
2253   rtx set = single_set (insn);
2254
2255   /* Usually we substitute easy stuff, so we won't copy everything.
2256      We however need to take care to not duplicate non-trivial CONST
2257      expressions.  */
2258   to = copy_rtx (to);
2259
2260   validate_replace_src_group (from, to, insn);
2261   if (num_changes_pending () && apply_change_group ())
2262     success = 1;
2263
2264   /* Try to simplify SET_SRC if we have substituted a constant.  */
2265   if (success && set && CONSTANT_P (to))
2266     {
2267       src = simplify_rtx (SET_SRC (set));
2268
2269       if (src)
2270         validate_change (insn, &SET_SRC (set), src, 0);
2271     }
2272
2273   /* If there is already a REG_EQUAL note, update the expression in it
2274      with our replacement.  */
2275   if (note != 0 && REG_NOTE_KIND (note) == REG_EQUAL)
2276     set_unique_reg_note (insn, REG_EQUAL,
2277                          simplify_replace_rtx (XEXP (note, 0), from,
2278                          copy_rtx (to)));
2279   if (!success && set && reg_mentioned_p (from, SET_SRC (set)))
2280     {
2281       /* If above failed and this is a single set, try to simplify the source of
2282          the set given our substitution.  We could perhaps try this for multiple
2283          SETs, but it probably won't buy us anything.  */
2284       src = simplify_replace_rtx (SET_SRC (set), from, to);
2285
2286       if (!rtx_equal_p (src, SET_SRC (set))
2287           && validate_change (insn, &SET_SRC (set), src, 0))
2288         success = 1;
2289
2290       /* If we've failed to do replacement, have a single SET, don't already
2291          have a note, and have no special SET, add a REG_EQUAL note to not
2292          lose information.  */
2293       if (!success && note == 0 && set != 0
2294           && GET_CODE (SET_DEST (set)) != ZERO_EXTRACT
2295           && GET_CODE (SET_DEST (set)) != STRICT_LOW_PART)
2296         note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
2297     }
2298
2299   /* REG_EQUAL may get simplified into register.
2300      We don't allow that. Remove that note. This code ought
2301      not to happen, because previous code ought to synthesize
2302      reg-reg move, but be on the safe side.  */
2303   if (note && REG_NOTE_KIND (note) == REG_EQUAL && REG_P (XEXP (note, 0)))
2304     remove_note (insn, note);
2305
2306   return success;
2307 }
2308
2309 /* Find a set of REGNOs that are available on entry to INSN's block.  Returns
2310    NULL no such set is found.  */
2311
2312 static struct expr *
2313 find_avail_set (int regno, rtx insn)
2314 {
2315   /* SET1 contains the last set found that can be returned to the caller for
2316      use in a substitution.  */
2317   struct expr *set1 = 0;
2318
2319   /* Loops are not possible here.  To get a loop we would need two sets
2320      available at the start of the block containing INSN.  i.e. we would
2321      need two sets like this available at the start of the block:
2322
2323        (set (reg X) (reg Y))
2324        (set (reg Y) (reg X))
2325
2326      This can not happen since the set of (reg Y) would have killed the
2327      set of (reg X) making it unavailable at the start of this block.  */
2328   while (1)
2329     {
2330       rtx src;
2331       struct expr *set = lookup_set (regno, &set_hash_table);
2332
2333       /* Find a set that is available at the start of the block
2334          which contains INSN.  */
2335       while (set)
2336         {
2337           if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
2338             break;
2339           set = next_set (regno, set);
2340         }
2341
2342       /* If no available set was found we've reached the end of the
2343          (possibly empty) copy chain.  */
2344       if (set == 0)
2345         break;
2346
2347       gcc_assert (GET_CODE (set->expr) == SET);
2348
2349       src = SET_SRC (set->expr);
2350
2351       /* We know the set is available.
2352          Now check that SRC is ANTLOC (i.e. none of the source operands
2353          have changed since the start of the block).
2354
2355          If the source operand changed, we may still use it for the next
2356          iteration of this loop, but we may not use it for substitutions.  */
2357
2358       if (gcse_constant_p (src) || oprs_not_set_p (src, insn))
2359         set1 = set;
2360
2361       /* If the source of the set is anything except a register, then
2362          we have reached the end of the copy chain.  */
2363       if (! REG_P (src))
2364         break;
2365
2366       /* Follow the copy chain, i.e. start another iteration of the loop
2367          and see if we have an available copy into SRC.  */
2368       regno = REGNO (src);
2369     }
2370
2371   /* SET1 holds the last set that was available and anticipatable at
2372      INSN.  */
2373   return set1;
2374 }
2375
2376 /* Subroutine of cprop_insn that tries to propagate constants into
2377    JUMP_INSNS.  JUMP must be a conditional jump.  If SETCC is non-NULL
2378    it is the instruction that immediately precedes JUMP, and must be a
2379    single SET of a register.  FROM is what we will try to replace,
2380    SRC is the constant we will try to substitute for it.  Returns nonzero
2381    if a change was made.  */
2382
2383 static int
2384 cprop_jump (basic_block bb, rtx setcc, rtx jump, rtx from, rtx src)
2385 {
2386   rtx new_rtx, set_src, note_src;
2387   rtx set = pc_set (jump);
2388   rtx note = find_reg_equal_equiv_note (jump);
2389
2390   if (note)
2391     {
2392       note_src = XEXP (note, 0);
2393       if (GET_CODE (note_src) == EXPR_LIST)
2394         note_src = NULL_RTX;
2395     }
2396   else note_src = NULL_RTX;
2397
2398   /* Prefer REG_EQUAL notes except those containing EXPR_LISTs.  */
2399   set_src = note_src ? note_src : SET_SRC (set);
2400
2401   /* First substitute the SETCC condition into the JUMP instruction,
2402      then substitute that given values into this expanded JUMP.  */
2403   if (setcc != NULL_RTX
2404       && !modified_between_p (from, setcc, jump)
2405       && !modified_between_p (src, setcc, jump))
2406     {
2407       rtx setcc_src;
2408       rtx setcc_set = single_set (setcc);
2409       rtx setcc_note = find_reg_equal_equiv_note (setcc);
2410       setcc_src = (setcc_note && GET_CODE (XEXP (setcc_note, 0)) != EXPR_LIST)
2411                 ? XEXP (setcc_note, 0) : SET_SRC (setcc_set);
2412       set_src = simplify_replace_rtx (set_src, SET_DEST (setcc_set),
2413                                       setcc_src);
2414     }
2415   else
2416     setcc = NULL_RTX;
2417
2418   new_rtx = simplify_replace_rtx (set_src, from, src);
2419
2420   /* If no simplification can be made, then try the next register.  */
2421   if (rtx_equal_p (new_rtx, SET_SRC (set)))
2422     return 0;
2423
2424   /* If this is now a no-op delete it, otherwise this must be a valid insn.  */
2425   if (new_rtx == pc_rtx)
2426     delete_insn (jump);
2427   else
2428     {
2429       /* Ensure the value computed inside the jump insn to be equivalent
2430          to one computed by setcc.  */
2431       if (setcc && modified_in_p (new_rtx, setcc))
2432         return 0;
2433       if (! validate_unshare_change (jump, &SET_SRC (set), new_rtx, 0))
2434         {
2435           /* When (some) constants are not valid in a comparison, and there
2436              are two registers to be replaced by constants before the entire
2437              comparison can be folded into a constant, we need to keep
2438              intermediate information in REG_EQUAL notes.  For targets with
2439              separate compare insns, such notes are added by try_replace_reg.
2440              When we have a combined compare-and-branch instruction, however,
2441              we need to attach a note to the branch itself to make this
2442              optimization work.  */
2443
2444           if (!rtx_equal_p (new_rtx, note_src))
2445             set_unique_reg_note (jump, REG_EQUAL, copy_rtx (new_rtx));
2446           return 0;
2447         }
2448
2449       /* Remove REG_EQUAL note after simplification.  */
2450       if (note_src)
2451         remove_note (jump, note);
2452      }
2453
2454 #ifdef HAVE_cc0
2455   /* Delete the cc0 setter.  */
2456   if (setcc != NULL && CC0_P (SET_DEST (single_set (setcc))))
2457     delete_insn (setcc);
2458 #endif
2459
2460   global_const_prop_count++;
2461   if (dump_file != NULL)
2462     {
2463       fprintf (dump_file,
2464                "GLOBAL CONST-PROP: Replacing reg %d in jump_insn %d with constant ",
2465                REGNO (from), INSN_UID (jump));
2466       print_rtl (dump_file, src);
2467       fprintf (dump_file, "\n");
2468     }
2469   purge_dead_edges (bb);
2470
2471   /* If a conditional jump has been changed into unconditional jump, remove
2472      the jump and make the edge fallthru - this is always called in
2473      cfglayout mode.  */
2474   if (new_rtx != pc_rtx && simplejump_p (jump))
2475     {
2476       edge e;
2477       edge_iterator ei;
2478
2479       for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); ei_next (&ei))
2480         if (e->dest != EXIT_BLOCK_PTR
2481             && BB_HEAD (e->dest) == JUMP_LABEL (jump))
2482           {
2483             e->flags |= EDGE_FALLTHRU;
2484             break;
2485           }
2486       delete_insn (jump);
2487     }
2488
2489   return 1;
2490 }
2491
2492 static bool
2493 constprop_register (rtx insn, rtx from, rtx to)
2494 {
2495   rtx sset;
2496
2497   /* Check for reg or cc0 setting instructions followed by
2498      conditional branch instructions first.  */
2499   if ((sset = single_set (insn)) != NULL
2500       && NEXT_INSN (insn)
2501       && any_condjump_p (NEXT_INSN (insn)) && onlyjump_p (NEXT_INSN (insn)))
2502     {
2503       rtx dest = SET_DEST (sset);
2504       if ((REG_P (dest) || CC0_P (dest))
2505           && cprop_jump (BLOCK_FOR_INSN (insn), insn, NEXT_INSN (insn), from, to))
2506         return 1;
2507     }
2508
2509   /* Handle normal insns next.  */
2510   if (NONJUMP_INSN_P (insn)
2511       && try_replace_reg (from, to, insn))
2512     return 1;
2513
2514   /* Try to propagate a CONST_INT into a conditional jump.
2515      We're pretty specific about what we will handle in this
2516      code, we can extend this as necessary over time.
2517
2518      Right now the insn in question must look like
2519      (set (pc) (if_then_else ...))  */
2520   else if (any_condjump_p (insn) && onlyjump_p (insn))
2521     return cprop_jump (BLOCK_FOR_INSN (insn), NULL, insn, from, to);
2522   return 0;
2523 }
2524
2525 /* Perform constant and copy propagation on INSN.
2526    The result is nonzero if a change was made.  */
2527
2528 static int
2529 cprop_insn (rtx insn)
2530 {
2531   struct reg_use *reg_used;
2532   int changed = 0;
2533   rtx note;
2534
2535   if (!INSN_P (insn))
2536     return 0;
2537
2538   reg_use_count = 0;
2539   note_uses (&PATTERN (insn), find_used_regs, NULL);
2540
2541   note = find_reg_equal_equiv_note (insn);
2542
2543   /* We may win even when propagating constants into notes.  */
2544   if (note)
2545     find_used_regs (&XEXP (note, 0), NULL);
2546
2547   for (reg_used = &reg_use_table[0]; reg_use_count > 0;
2548        reg_used++, reg_use_count--)
2549     {
2550       unsigned int regno = REGNO (reg_used->reg_rtx);
2551       rtx pat, src;
2552       struct expr *set;
2553
2554       /* If the register has already been set in this block, there's
2555          nothing we can do.  */
2556       if (! oprs_not_set_p (reg_used->reg_rtx, insn))
2557         continue;
2558
2559       /* Find an assignment that sets reg_used and is available
2560          at the start of the block.  */
2561       set = find_avail_set (regno, insn);
2562       if (! set)
2563         continue;
2564
2565       pat = set->expr;
2566       /* ??? We might be able to handle PARALLELs.  Later.  */
2567       gcc_assert (GET_CODE (pat) == SET);
2568
2569       src = SET_SRC (pat);
2570
2571       /* Constant propagation.  */
2572       if (gcse_constant_p (src))
2573         {
2574           if (constprop_register (insn, reg_used->reg_rtx, src))
2575             {
2576               changed = 1;
2577               global_const_prop_count++;
2578               if (dump_file != NULL)
2579                 {
2580                   fprintf (dump_file, "GLOBAL CONST-PROP: Replacing reg %d in ", regno);
2581                   fprintf (dump_file, "insn %d with constant ", INSN_UID (insn));
2582                   print_rtl (dump_file, src);
2583                   fprintf (dump_file, "\n");
2584                 }
2585               if (INSN_DELETED_P (insn))
2586                 return 1;
2587             }
2588         }
2589       else if (REG_P (src)
2590                && REGNO (src) >= FIRST_PSEUDO_REGISTER
2591                && REGNO (src) != regno)
2592         {
2593           if (try_replace_reg (reg_used->reg_rtx, src, insn))
2594             {
2595               changed = 1;
2596               global_copy_prop_count++;
2597               if (dump_file != NULL)
2598                 {
2599                   fprintf (dump_file, "GLOBAL COPY-PROP: Replacing reg %d in insn %d",
2600                            regno, INSN_UID (insn));
2601                   fprintf (dump_file, " with reg %d\n", REGNO (src));
2602                 }
2603
2604               /* The original insn setting reg_used may or may not now be
2605                  deletable.  We leave the deletion to flow.  */
2606               /* FIXME: If it turns out that the insn isn't deletable,
2607                  then we may have unnecessarily extended register lifetimes
2608                  and made things worse.  */
2609             }
2610         }
2611     }
2612
2613   return changed;
2614 }
2615
2616 /* Like find_used_regs, but avoid recording uses that appear in
2617    input-output contexts such as zero_extract or pre_dec.  This
2618    restricts the cases we consider to those for which local cprop
2619    can legitimately make replacements.  */
2620
2621 static void
2622 local_cprop_find_used_regs (rtx *xptr, void *data)
2623 {
2624   rtx x = *xptr;
2625
2626   if (x == 0)
2627     return;
2628
2629   switch (GET_CODE (x))
2630     {
2631     case ZERO_EXTRACT:
2632     case SIGN_EXTRACT:
2633     case STRICT_LOW_PART:
2634       return;
2635
2636     case PRE_DEC:
2637     case PRE_INC:
2638     case POST_DEC:
2639     case POST_INC:
2640     case PRE_MODIFY:
2641     case POST_MODIFY:
2642       /* Can only legitimately appear this early in the context of
2643          stack pushes for function arguments, but handle all of the
2644          codes nonetheless.  */
2645       return;
2646
2647     case SUBREG:
2648       /* Setting a subreg of a register larger than word_mode leaves
2649          the non-written words unchanged.  */
2650       if (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x))) > BITS_PER_WORD)
2651         return;
2652       break;
2653
2654     default:
2655       break;
2656     }
2657
2658   find_used_regs (xptr, data);
2659 }
2660
2661 /* Try to perform local const/copy propagation on X in INSN.  */
2662
2663 static bool
2664 do_local_cprop (rtx x, rtx insn)
2665 {
2666   rtx newreg = NULL, newcnst = NULL;
2667
2668   /* Rule out USE instructions and ASM statements as we don't want to
2669      change the hard registers mentioned.  */
2670   if (REG_P (x)
2671       && (REGNO (x) >= FIRST_PSEUDO_REGISTER
2672           || (GET_CODE (PATTERN (insn)) != USE
2673               && asm_noperands (PATTERN (insn)) < 0)))
2674     {
2675       cselib_val *val = cselib_lookup (x, GET_MODE (x), 0);
2676       struct elt_loc_list *l;
2677
2678       if (!val)
2679         return false;
2680       for (l = val->locs; l; l = l->next)
2681         {
2682           rtx this_rtx = l->loc;
2683           rtx note;
2684
2685           if (gcse_constant_p (this_rtx))
2686             newcnst = this_rtx;
2687           if (REG_P (this_rtx) && REGNO (this_rtx) >= FIRST_PSEUDO_REGISTER
2688               /* Don't copy propagate if it has attached REG_EQUIV note.
2689                  At this point this only function parameters should have
2690                  REG_EQUIV notes and if the argument slot is used somewhere
2691                  explicitly, it means address of parameter has been taken,
2692                  so we should not extend the lifetime of the pseudo.  */
2693               && (!(note = find_reg_note (l->setting_insn, REG_EQUIV, NULL_RTX))
2694                   || ! MEM_P (XEXP (note, 0))))
2695             newreg = this_rtx;
2696         }
2697       if (newcnst && constprop_register (insn, x, newcnst))
2698         {
2699           if (dump_file != NULL)
2700             {
2701               fprintf (dump_file, "LOCAL CONST-PROP: Replacing reg %d in ",
2702                        REGNO (x));
2703               fprintf (dump_file, "insn %d with constant ",
2704                        INSN_UID (insn));
2705               print_rtl (dump_file, newcnst);
2706               fprintf (dump_file, "\n");
2707             }
2708           local_const_prop_count++;
2709           return true;
2710         }
2711       else if (newreg && newreg != x && try_replace_reg (x, newreg, insn))
2712         {
2713           if (dump_file != NULL)
2714             {
2715               fprintf (dump_file,
2716                        "LOCAL COPY-PROP: Replacing reg %d in insn %d",
2717                        REGNO (x), INSN_UID (insn));
2718               fprintf (dump_file, " with reg %d\n", REGNO (newreg));
2719             }
2720           local_copy_prop_count++;
2721           return true;
2722         }
2723     }
2724   return false;
2725 }
2726
2727 /* Do local const/copy propagation (i.e. within each basic block).  */
2728
2729 static int
2730 local_cprop_pass (void)
2731 {
2732   basic_block bb;
2733   rtx insn;
2734   struct reg_use *reg_used;
2735   bool changed = false;
2736
2737   cselib_init (false);
2738   FOR_EACH_BB (bb)
2739     {
2740       FOR_BB_INSNS (bb, insn)
2741         {
2742           if (INSN_P (insn))
2743             {
2744               rtx note = find_reg_equal_equiv_note (insn);
2745               do
2746                 {
2747                   reg_use_count = 0;
2748                   note_uses (&PATTERN (insn), local_cprop_find_used_regs,
2749                              NULL);
2750                   if (note)
2751                     local_cprop_find_used_regs (&XEXP (note, 0), NULL);
2752
2753                   for (reg_used = &reg_use_table[0]; reg_use_count > 0;
2754                        reg_used++, reg_use_count--)
2755                     {
2756                       if (do_local_cprop (reg_used->reg_rtx, insn))
2757                         {
2758                           changed = true;
2759                           break;
2760                         }
2761                     }
2762                   if (INSN_DELETED_P (insn))
2763                     break;
2764                 }
2765               while (reg_use_count);
2766             }
2767           cselib_process_insn (insn);
2768         }
2769
2770       /* Forget everything at the end of a basic block.  */
2771       cselib_clear_table ();
2772     }
2773
2774   cselib_finish ();
2775
2776   return changed;
2777 }
2778
2779 /* Similar to get_condition, only the resulting condition must be
2780    valid at JUMP, instead of at EARLIEST.
2781
2782    This differs from noce_get_condition in ifcvt.c in that we prefer not to
2783    settle for the condition variable in the jump instruction being integral.
2784    We prefer to be able to record the value of a user variable, rather than
2785    the value of a temporary used in a condition.  This could be solved by
2786    recording the value of *every* register scanned by canonicalize_condition,
2787    but this would require some code reorganization.  */
2788
2789 rtx
2790 fis_get_condition (rtx jump)
2791 {
2792   return get_condition (jump, NULL, false, true);
2793 }
2794
2795 /* Check the comparison COND to see if we can safely form an implicit set from
2796    it.  COND is either an EQ or NE comparison.  */
2797
2798 static bool
2799 implicit_set_cond_p (const_rtx cond)
2800 {
2801   const enum machine_mode mode = GET_MODE (XEXP (cond, 0));
2802   const_rtx cst = XEXP (cond, 1);
2803
2804   /* We can't perform this optimization if either operand might be or might
2805      contain a signed zero.  */
2806   if (HONOR_SIGNED_ZEROS (mode))
2807     {
2808       /* It is sufficient to check if CST is or contains a zero.  We must
2809          handle float, complex, and vector.  If any subpart is a zero, then
2810          the optimization can't be performed.  */
2811       /* ??? The complex and vector checks are not implemented yet.  We just
2812          always return zero for them.  */
2813       if (GET_CODE (cst) == CONST_DOUBLE)
2814         {
2815           REAL_VALUE_TYPE d;
2816           REAL_VALUE_FROM_CONST_DOUBLE (d, cst);
2817           if (REAL_VALUES_EQUAL (d, dconst0))
2818             return 0;
2819         }
2820       else
2821         return 0;
2822     }
2823
2824   return gcse_constant_p (cst);
2825 }
2826
2827 /* Find the implicit sets of a function.  An "implicit set" is a constraint
2828    on the value of a variable, implied by a conditional jump.  For example,
2829    following "if (x == 2)", the then branch may be optimized as though the
2830    conditional performed an "explicit set", in this example, "x = 2".  This
2831    function records the set patterns that are implicit at the start of each
2832    basic block.
2833
2834    FIXME: This would be more effective if critical edges are pre-split.  As
2835           it is now, we can't record implicit sets for blocks that have
2836           critical successor edges.  This results in missed optimizations
2837           and in more (unnecessary) work in cfgcleanup.c:thread_jump().  */
2838
2839 static void
2840 find_implicit_sets (void)
2841 {
2842   basic_block bb, dest;
2843   unsigned int count;
2844   rtx cond, new_rtx;
2845
2846   count = 0;
2847   FOR_EACH_BB (bb)
2848     /* Check for more than one successor.  */
2849     if (EDGE_COUNT (bb->succs) > 1)
2850       {
2851         cond = fis_get_condition (BB_END (bb));
2852
2853         if (cond
2854             && (GET_CODE (cond) == EQ || GET_CODE (cond) == NE)
2855             && REG_P (XEXP (cond, 0))
2856             && REGNO (XEXP (cond, 0)) >= FIRST_PSEUDO_REGISTER
2857             && implicit_set_cond_p (cond))
2858           {
2859             dest = GET_CODE (cond) == EQ ? BRANCH_EDGE (bb)->dest
2860                                          : FALLTHRU_EDGE (bb)->dest;
2861
2862             if (dest
2863                 /* Record nothing for a critical edge.  */
2864                 && single_pred_p (dest)
2865                 && dest != EXIT_BLOCK_PTR)
2866               {
2867                 new_rtx = gen_rtx_SET (VOIDmode, XEXP (cond, 0),
2868                                              XEXP (cond, 1));
2869                 implicit_sets[dest->index] = new_rtx;
2870                 if (dump_file)
2871                   {
2872                     fprintf(dump_file, "Implicit set of reg %d in ",
2873                             REGNO (XEXP (cond, 0)));
2874                     fprintf(dump_file, "basic block %d\n", dest->index);
2875                   }
2876                 count++;
2877               }
2878           }
2879       }
2880
2881   if (dump_file)
2882     fprintf (dump_file, "Found %d implicit sets\n", count);
2883 }
2884
2885 /* Bypass conditional jumps.  */
2886
2887 /* The value of last_basic_block at the beginning of the jump_bypass
2888    pass.  The use of redirect_edge_and_branch_force may introduce new
2889    basic blocks, but the data flow analysis is only valid for basic
2890    block indices less than bypass_last_basic_block.  */
2891
2892 static int bypass_last_basic_block;
2893
2894 /* Find a set of REGNO to a constant that is available at the end of basic
2895    block BB.  Returns NULL if no such set is found.  Based heavily upon
2896    find_avail_set.  */
2897
2898 static struct expr *
2899 find_bypass_set (int regno, int bb)
2900 {
2901   struct expr *result = 0;
2902
2903   for (;;)
2904     {
2905       rtx src;
2906       struct expr *set = lookup_set (regno, &set_hash_table);
2907
2908       while (set)
2909         {
2910           if (TEST_BIT (cprop_avout[bb], set->bitmap_index))
2911             break;
2912           set = next_set (regno, set);
2913         }
2914
2915       if (set == 0)
2916         break;
2917
2918       gcc_assert (GET_CODE (set->expr) == SET);
2919
2920       src = SET_SRC (set->expr);
2921       if (gcse_constant_p (src))
2922         result = set;
2923
2924       if (! REG_P (src))
2925         break;
2926
2927       regno = REGNO (src);
2928     }
2929   return result;
2930 }
2931
2932
2933 /* Subroutine of bypass_block that checks whether a pseudo is killed by
2934    any of the instructions inserted on an edge.  Jump bypassing places
2935    condition code setters on CFG edges using insert_insn_on_edge.  This
2936    function is required to check that our data flow analysis is still
2937    valid prior to commit_edge_insertions.  */
2938
2939 static bool
2940 reg_killed_on_edge (const_rtx reg, const_edge e)
2941 {
2942   rtx insn;
2943
2944   for (insn = e->insns.r; insn; insn = NEXT_INSN (insn))
2945     if (INSN_P (insn) && reg_set_p (reg, insn))
2946       return true;
2947
2948   return false;
2949 }
2950
2951 /* Subroutine of bypass_conditional_jumps that attempts to bypass the given
2952    basic block BB which has more than one predecessor.  If not NULL, SETCC
2953    is the first instruction of BB, which is immediately followed by JUMP_INSN
2954    JUMP.  Otherwise, SETCC is NULL, and JUMP is the first insn of BB.
2955    Returns nonzero if a change was made.
2956
2957    During the jump bypassing pass, we may place copies of SETCC instructions
2958    on CFG edges.  The following routine must be careful to pay attention to
2959    these inserted insns when performing its transformations.  */
2960
2961 static int
2962 bypass_block (basic_block bb, rtx setcc, rtx jump)
2963 {
2964   rtx insn, note;
2965   edge e, edest;
2966   int i, change;
2967   int may_be_loop_header;
2968   unsigned removed_p;
2969   edge_iterator ei;
2970
2971   insn = (setcc != NULL) ? setcc : jump;
2972
2973   /* Determine set of register uses in INSN.  */
2974   reg_use_count = 0;
2975   note_uses (&PATTERN (insn), find_used_regs, NULL);
2976   note = find_reg_equal_equiv_note (insn);
2977   if (note)
2978     find_used_regs (&XEXP (note, 0), NULL);
2979
2980   may_be_loop_header = false;
2981   FOR_EACH_EDGE (e, ei, bb->preds)
2982     if (e->flags & EDGE_DFS_BACK)
2983       {
2984         may_be_loop_header = true;
2985         break;
2986       }
2987
2988   change = 0;
2989   for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei)); )
2990     {
2991       removed_p = 0;
2992
2993       if (e->flags & EDGE_COMPLEX)
2994         {
2995           ei_next (&ei);
2996           continue;
2997         }
2998
2999       /* We can't redirect edges from new basic blocks.  */
3000       if (e->src->index >= bypass_last_basic_block)
3001         {
3002           ei_next (&ei);
3003           continue;
3004         }
3005
3006       /* The irreducible loops created by redirecting of edges entering the
3007          loop from outside would decrease effectiveness of some of the following
3008          optimizations, so prevent this.  */
3009       if (may_be_loop_header
3010           && !(e->flags & EDGE_DFS_BACK))
3011         {
3012           ei_next (&ei);
3013           continue;
3014         }
3015
3016       for (i = 0; i < reg_use_count; i++)
3017         {
3018           struct reg_use *reg_used = &reg_use_table[i];
3019           unsigned int regno = REGNO (reg_used->reg_rtx);
3020           basic_block dest, old_dest;
3021           struct expr *set;
3022           rtx src, new_rtx;
3023
3024           set = find_bypass_set (regno, e->src->index);
3025
3026           if (! set)
3027             continue;
3028
3029           /* Check the data flow is valid after edge insertions.  */
3030           if (e->insns.r && reg_killed_on_edge (reg_used->reg_rtx, e))
3031             continue;
3032
3033           src = SET_SRC (pc_set (jump));
3034
3035           if (setcc != NULL)
3036               src = simplify_replace_rtx (src,
3037                                           SET_DEST (PATTERN (setcc)),
3038                                           SET_SRC (PATTERN (setcc)));
3039
3040           new_rtx = simplify_replace_rtx (src, reg_used->reg_rtx,
3041                                       SET_SRC (set->expr));
3042
3043           /* Jump bypassing may have already placed instructions on
3044              edges of the CFG.  We can't bypass an outgoing edge that
3045              has instructions associated with it, as these insns won't
3046              get executed if the incoming edge is redirected.  */
3047
3048           if (new_rtx == pc_rtx)
3049             {
3050               edest = FALLTHRU_EDGE (bb);
3051               dest = edest->insns.r ? NULL : edest->dest;
3052             }
3053           else if (GET_CODE (new_rtx) == LABEL_REF)
3054             {
3055               dest = BLOCK_FOR_INSN (XEXP (new_rtx, 0));
3056               /* Don't bypass edges containing instructions.  */
3057               edest = find_edge (bb, dest);
3058               if (edest && edest->insns.r)
3059                 dest = NULL;
3060             }
3061           else
3062             dest = NULL;
3063
3064           /* Avoid unification of the edge with other edges from original
3065              branch.  We would end up emitting the instruction on "both"
3066              edges.  */
3067
3068           if (dest && setcc && !CC0_P (SET_DEST (PATTERN (setcc)))
3069               && find_edge (e->src, dest))
3070             dest = NULL;
3071
3072           old_dest = e->dest;
3073           if (dest != NULL
3074               && dest != old_dest
3075               && dest != EXIT_BLOCK_PTR)
3076             {
3077               redirect_edge_and_branch_force (e, dest);
3078
3079               /* Copy the register setter to the redirected edge.
3080                  Don't copy CC0 setters, as CC0 is dead after jump.  */
3081               if (setcc)
3082                 {
3083                   rtx pat = PATTERN (setcc);
3084                   if (!CC0_P (SET_DEST (pat)))
3085                     insert_insn_on_edge (copy_insn (pat), e);
3086                 }
3087
3088               if (dump_file != NULL)
3089                 {
3090                   fprintf (dump_file, "JUMP-BYPASS: Proved reg %d "
3091                                       "in jump_insn %d equals constant ",
3092                            regno, INSN_UID (jump));
3093                   print_rtl (dump_file, SET_SRC (set->expr));
3094                   fprintf (dump_file, "\nBypass edge from %d->%d to %d\n",
3095                            e->src->index, old_dest->index, dest->index);
3096                 }
3097               change = 1;
3098               removed_p = 1;
3099               break;
3100             }
3101         }
3102       if (!removed_p)
3103         ei_next (&ei);
3104     }
3105   return change;
3106 }
3107
3108 /* Find basic blocks with more than one predecessor that only contain a
3109    single conditional jump.  If the result of the comparison is known at
3110    compile-time from any incoming edge, redirect that edge to the
3111    appropriate target.  Returns nonzero if a change was made.
3112
3113    This function is now mis-named, because we also handle indirect jumps.  */
3114
3115 static int
3116 bypass_conditional_jumps (void)
3117 {
3118   basic_block bb;
3119   int changed;
3120   rtx setcc;
3121   rtx insn;
3122   rtx dest;
3123
3124   /* Note we start at block 1.  */
3125   if (ENTRY_BLOCK_PTR->next_bb == EXIT_BLOCK_PTR)
3126     return 0;
3127
3128   bypass_last_basic_block = last_basic_block;
3129   mark_dfs_back_edges ();
3130
3131   changed = 0;
3132   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR->next_bb->next_bb,
3133                   EXIT_BLOCK_PTR, next_bb)
3134     {
3135       /* Check for more than one predecessor.  */
3136       if (!single_pred_p (bb))
3137         {
3138           setcc = NULL_RTX;
3139           FOR_BB_INSNS (bb, insn)
3140             if (NONJUMP_INSN_P (insn))
3141               {
3142                 if (setcc)
3143                   break;
3144                 if (GET_CODE (PATTERN (insn)) != SET)
3145                   break;
3146
3147                 dest = SET_DEST (PATTERN (insn));
3148                 if (REG_P (dest) || CC0_P (dest))
3149                   setcc = insn;
3150                 else
3151                   break;
3152               }
3153             else if (JUMP_P (insn))
3154               {
3155                 if ((any_condjump_p (insn) || computed_jump_p (insn))
3156                     && onlyjump_p (insn))
3157                   changed |= bypass_block (bb, setcc, insn);
3158                 break;
3159               }
3160             else if (INSN_P (insn))
3161               break;
3162         }
3163     }
3164
3165   /* If we bypassed any register setting insns, we inserted a
3166      copy on the redirected edge.  These need to be committed.  */
3167   if (changed)
3168     commit_edge_insertions ();
3169
3170   return changed;
3171 }
3172 \f
3173 /* Compute PRE+LCM working variables.  */
3174
3175 /* Local properties of expressions.  */
3176 /* Nonzero for expressions that are transparent in the block.  */
3177 static sbitmap *transp;
3178
3179 /* Nonzero for expressions that are transparent at the end of the block.
3180    This is only zero for expressions killed by abnormal critical edge
3181    created by a calls.  */
3182 static sbitmap *transpout;
3183
3184 /* Nonzero for expressions that are computed (available) in the block.  */
3185 static sbitmap *comp;
3186
3187 /* Nonzero for expressions that are locally anticipatable in the block.  */
3188 static sbitmap *antloc;
3189
3190 /* Nonzero for expressions where this block is an optimal computation
3191    point.  */
3192 static sbitmap *pre_optimal;
3193
3194 /* Nonzero for expressions which are redundant in a particular block.  */
3195 static sbitmap *pre_redundant;
3196
3197 /* Nonzero for expressions which should be inserted on a specific edge.  */
3198 static sbitmap *pre_insert_map;
3199
3200 /* Nonzero for expressions which should be deleted in a specific block.  */
3201 static sbitmap *pre_delete_map;
3202
3203 /* Contains the edge_list returned by pre_edge_lcm.  */
3204 static struct edge_list *edge_list;
3205
3206 /* Allocate vars used for PRE analysis.  */
3207
3208 static void
3209 alloc_pre_mem (int n_blocks, int n_exprs)
3210 {
3211   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
3212   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
3213   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
3214
3215   pre_optimal = NULL;
3216   pre_redundant = NULL;
3217   pre_insert_map = NULL;
3218   pre_delete_map = NULL;
3219   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
3220
3221   /* pre_insert and pre_delete are allocated later.  */
3222 }
3223
3224 /* Free vars used for PRE analysis.  */
3225
3226 static void
3227 free_pre_mem (void)
3228 {
3229   sbitmap_vector_free (transp);
3230   sbitmap_vector_free (comp);
3231
3232   /* ANTLOC and AE_KILL are freed just after pre_lcm finishes.  */
3233
3234   if (pre_optimal)
3235     sbitmap_vector_free (pre_optimal);
3236   if (pre_redundant)
3237     sbitmap_vector_free (pre_redundant);
3238   if (pre_insert_map)
3239     sbitmap_vector_free (pre_insert_map);
3240   if (pre_delete_map)
3241     sbitmap_vector_free (pre_delete_map);
3242
3243   transp = comp = NULL;
3244   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
3245 }
3246
3247 /* Top level routine to do the dataflow analysis needed by PRE.  */
3248
3249 static void
3250 compute_pre_data (void)
3251 {
3252   sbitmap trapping_expr;
3253   basic_block bb;
3254   unsigned int ui;
3255
3256   compute_local_properties (transp, comp, antloc, &expr_hash_table);
3257   sbitmap_vector_zero (ae_kill, last_basic_block);
3258
3259   /* Collect expressions which might trap.  */
3260   trapping_expr = sbitmap_alloc (expr_hash_table.n_elems);
3261   sbitmap_zero (trapping_expr);
3262   for (ui = 0; ui < expr_hash_table.size; ui++)
3263     {
3264       struct expr *e;
3265       for (e = expr_hash_table.table[ui]; e != NULL; e = e->next_same_hash)
3266         if (may_trap_p (e->expr))
3267           SET_BIT (trapping_expr, e->bitmap_index);
3268     }
3269
3270   /* Compute ae_kill for each basic block using:
3271
3272      ~(TRANSP | COMP)
3273   */
3274
3275   FOR_EACH_BB (bb)
3276     {
3277       edge e;
3278       edge_iterator ei;
3279
3280       /* If the current block is the destination of an abnormal edge, we
3281          kill all trapping expressions because we won't be able to properly
3282          place the instruction on the edge.  So make them neither
3283          anticipatable nor transparent.  This is fairly conservative.  */
3284       FOR_EACH_EDGE (e, ei, bb->preds)
3285         if (e->flags & EDGE_ABNORMAL)
3286           {
3287             sbitmap_difference (antloc[bb->index], antloc[bb->index], trapping_expr);
3288             sbitmap_difference (transp[bb->index], transp[bb->index], trapping_expr);
3289             break;
3290           }
3291
3292       sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
3293       sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
3294     }
3295
3296   edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
3297                             ae_kill, &pre_insert_map, &pre_delete_map);
3298   sbitmap_vector_free (antloc);
3299   antloc = NULL;
3300   sbitmap_vector_free (ae_kill);
3301   ae_kill = NULL;
3302   sbitmap_free (trapping_expr);
3303 }
3304 \f
3305 /* PRE utilities */
3306
3307 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
3308    block BB.
3309
3310    VISITED is a pointer to a working buffer for tracking which BB's have
3311    been visited.  It is NULL for the top-level call.
3312
3313    We treat reaching expressions that go through blocks containing the same
3314    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
3315    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
3316    2 as not reaching.  The intent is to improve the probability of finding
3317    only one reaching expression and to reduce register lifetimes by picking
3318    the closest such expression.  */
3319
3320 static int
3321 pre_expr_reaches_here_p_work (basic_block occr_bb, struct expr *expr, basic_block bb, char *visited)
3322 {
3323   edge pred;
3324   edge_iterator ei;
3325
3326   FOR_EACH_EDGE (pred, ei, bb->preds)
3327     {
3328       basic_block pred_bb = pred->src;
3329
3330       if (pred->src == ENTRY_BLOCK_PTR
3331           /* Has predecessor has already been visited?  */
3332           || visited[pred_bb->index])
3333         ;/* Nothing to do.  */
3334
3335       /* Does this predecessor generate this expression?  */
3336       else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index))
3337         {
3338           /* Is this the occurrence we're looking for?
3339              Note that there's only one generating occurrence per block
3340              so we just need to check the block number.  */
3341           if (occr_bb == pred_bb)
3342             return 1;
3343
3344           visited[pred_bb->index] = 1;
3345         }
3346       /* Ignore this predecessor if it kills the expression.  */
3347       else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index))
3348         visited[pred_bb->index] = 1;
3349
3350       /* Neither gen nor kill.  */
3351       else
3352         {
3353           visited[pred_bb->index] = 1;
3354           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
3355             return 1;
3356         }
3357     }
3358
3359   /* All paths have been checked.  */
3360   return 0;
3361 }
3362
3363 /* The wrapper for pre_expr_reaches_here_work that ensures that any
3364    memory allocated for that function is returned.  */
3365
3366 static int
3367 pre_expr_reaches_here_p (basic_block occr_bb, struct expr *expr, basic_block bb)
3368 {
3369   int rval;
3370   char *visited = XCNEWVEC (char, last_basic_block);
3371
3372   rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
3373
3374   free (visited);
3375   return rval;
3376 }
3377 \f
3378
3379 /* Given an expr, generate RTL which we can insert at the end of a BB,
3380    or on an edge.  Set the block number of any insns generated to
3381    the value of BB.  */
3382
3383 static rtx
3384 process_insert_insn (struct expr *expr)
3385 {
3386   rtx reg = expr->reaching_reg;
3387   rtx exp = copy_rtx (expr->expr);
3388   rtx pat;
3389
3390   start_sequence ();
3391
3392   /* If the expression is something that's an operand, like a constant,
3393      just copy it to a register.  */
3394   if (general_operand (exp, GET_MODE (reg)))
3395     emit_move_insn (reg, exp);
3396
3397   /* Otherwise, make a new insn to compute this expression and make sure the
3398      insn will be recognized (this also adds any needed CLOBBERs).  Copy the
3399      expression to make sure we don't have any sharing issues.  */
3400   else
3401     {
3402       rtx insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp));
3403
3404       if (insn_invalid_p (insn))
3405         gcc_unreachable ();
3406     }
3407
3408
3409   pat = get_insns ();
3410   end_sequence ();
3411
3412   return pat;
3413 }
3414
3415 /* Add EXPR to the end of basic block BB.
3416
3417    This is used by both the PRE and code hoisting.
3418
3419    For PRE, we want to verify that the expr is either transparent
3420    or locally anticipatable in the target block.  This check makes
3421    no sense for code hoisting.  */
3422
3423 static void
3424 insert_insn_end_basic_block (struct expr *expr, basic_block bb, int pre)
3425 {
3426   rtx insn = BB_END (bb);
3427   rtx new_insn;
3428   rtx reg = expr->reaching_reg;
3429   int regno = REGNO (reg);
3430   rtx pat, pat_end;
3431
3432   pat = process_insert_insn (expr);
3433   gcc_assert (pat && INSN_P (pat));
3434
3435   pat_end = pat;
3436   while (NEXT_INSN (pat_end) != NULL_RTX)
3437     pat_end = NEXT_INSN (pat_end);
3438
3439   /* If the last insn is a jump, insert EXPR in front [taking care to
3440      handle cc0, etc. properly].  Similarly we need to care trapping
3441      instructions in presence of non-call exceptions.  */
3442
3443   if (JUMP_P (insn)
3444       || (NONJUMP_INSN_P (insn)
3445           && (!single_succ_p (bb)
3446               || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
3447     {
3448 #ifdef HAVE_cc0
3449       rtx note;
3450 #endif
3451       /* It should always be the case that we can put these instructions
3452          anywhere in the basic block with performing PRE optimizations.
3453          Check this.  */
3454       gcc_assert (!NONJUMP_INSN_P (insn) || !pre
3455                   || TEST_BIT (antloc[bb->index], expr->bitmap_index)
3456                   || TEST_BIT (transp[bb->index], expr->bitmap_index));
3457
3458       /* If this is a jump table, then we can't insert stuff here.  Since
3459          we know the previous real insn must be the tablejump, we insert
3460          the new instruction just before the tablejump.  */
3461       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
3462           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
3463         insn = prev_real_insn (insn);
3464
3465 #ifdef HAVE_cc0
3466       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
3467          if cc0 isn't set.  */
3468       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
3469       if (note)
3470         insn = XEXP (note, 0);
3471       else
3472         {
3473           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
3474           if (maybe_cc0_setter
3475               && INSN_P (maybe_cc0_setter)
3476               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
3477             insn = maybe_cc0_setter;
3478         }
3479 #endif
3480       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
3481       new_insn = emit_insn_before_noloc (pat, insn, bb);
3482     }
3483
3484   /* Likewise if the last insn is a call, as will happen in the presence
3485      of exception handling.  */
3486   else if (CALL_P (insn)
3487            && (!single_succ_p (bb)
3488                || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
3489     {
3490       /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
3491          we search backward and place the instructions before the first
3492          parameter is loaded.  Do this for everyone for consistency and a
3493          presumption that we'll get better code elsewhere as well.
3494
3495          It should always be the case that we can put these instructions
3496          anywhere in the basic block with performing PRE optimizations.
3497          Check this.  */
3498
3499       gcc_assert (!pre
3500                   || TEST_BIT (antloc[bb->index], expr->bitmap_index)
3501                   || TEST_BIT (transp[bb->index], expr->bitmap_index));
3502
3503       /* Since different machines initialize their parameter registers
3504          in different orders, assume nothing.  Collect the set of all
3505          parameter registers.  */
3506       insn = find_first_parameter_load (insn, BB_HEAD (bb));
3507
3508       /* If we found all the parameter loads, then we want to insert
3509          before the first parameter load.
3510
3511          If we did not find all the parameter loads, then we might have
3512          stopped on the head of the block, which could be a CODE_LABEL.
3513          If we inserted before the CODE_LABEL, then we would be putting
3514          the insn in the wrong basic block.  In that case, put the insn
3515          after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
3516       while (LABEL_P (insn)
3517              || NOTE_INSN_BASIC_BLOCK_P (insn))
3518         insn = NEXT_INSN (insn);
3519
3520       new_insn = emit_insn_before_noloc (pat, insn, bb);
3521     }
3522   else
3523     new_insn = emit_insn_after_noloc (pat, insn, bb);
3524
3525   while (1)
3526     {
3527       if (INSN_P (pat))
3528         add_label_notes (PATTERN (pat), new_insn);
3529       if (pat == pat_end)
3530         break;
3531       pat = NEXT_INSN (pat);
3532     }
3533
3534   gcse_create_count++;
3535
3536   if (dump_file)
3537     {
3538       fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
3539                bb->index, INSN_UID (new_insn));
3540       fprintf (dump_file, "copying expression %d to reg %d\n",
3541                expr->bitmap_index, regno);
3542     }
3543 }
3544
3545 /* Insert partially redundant expressions on edges in the CFG to make
3546    the expressions fully redundant.  */
3547
3548 static int
3549 pre_edge_insert (struct edge_list *edge_list, struct expr **index_map)
3550 {
3551   int e, i, j, num_edges, set_size, did_insert = 0;
3552   sbitmap *inserted;
3553
3554   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
3555      if it reaches any of the deleted expressions.  */
3556
3557   set_size = pre_insert_map[0]->size;
3558   num_edges = NUM_EDGES (edge_list);
3559   inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
3560   sbitmap_vector_zero (inserted, num_edges);
3561
3562   for (e = 0; e < num_edges; e++)
3563     {
3564       int indx;
3565       basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
3566
3567       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
3568         {
3569           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
3570
3571           for (j = indx; insert && j < (int) expr_hash_table.n_elems; j++, insert >>= 1)
3572             if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
3573               {
3574                 struct expr *expr = index_map[j];
3575                 struct occr *occr;
3576
3577                 /* Now look at each deleted occurrence of this expression.  */
3578                 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
3579                   {
3580                     if (! occr->deleted_p)
3581                       continue;
3582
3583                     /* Insert this expression on this edge if it would
3584                        reach the deleted occurrence in BB.  */
3585                     if (!TEST_BIT (inserted[e], j))
3586                       {
3587                         rtx insn;
3588                         edge eg = INDEX_EDGE (edge_list, e);
3589
3590                         /* We can't insert anything on an abnormal and
3591                            critical edge, so we insert the insn at the end of
3592                            the previous block. There are several alternatives
3593                            detailed in Morgans book P277 (sec 10.5) for
3594                            handling this situation.  This one is easiest for
3595                            now.  */
3596
3597                         if (eg->flags & EDGE_ABNORMAL)
3598                           insert_insn_end_basic_block (index_map[j], bb, 0);
3599                         else
3600                           {
3601                             insn = process_insert_insn (index_map[j]);
3602                             insert_insn_on_edge (insn, eg);
3603                           }
3604
3605                         if (dump_file)
3606                           {
3607                             fprintf (dump_file, "PRE: edge (%d,%d), ",
3608                                      bb->index,
3609                                      INDEX_EDGE_SUCC_BB (edge_list, e)->index);
3610                             fprintf (dump_file, "copy expression %d\n",
3611                                      expr->bitmap_index);
3612                           }
3613
3614                         update_ld_motion_stores (expr);
3615                         SET_BIT (inserted[e], j);
3616                         did_insert = 1;
3617                         gcse_create_count++;
3618                       }
3619                   }
3620               }
3621         }
3622     }
3623
3624   sbitmap_vector_free (inserted);
3625   return did_insert;
3626 }
3627
3628 /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
3629    Given "old_reg <- expr" (INSN), instead of adding after it
3630      reaching_reg <- old_reg
3631    it's better to do the following:
3632      reaching_reg <- expr
3633      old_reg      <- reaching_reg
3634    because this way copy propagation can discover additional PRE
3635    opportunities.  But if this fails, we try the old way.
3636    When "expr" is a store, i.e.
3637    given "MEM <- old_reg", instead of adding after it
3638      reaching_reg <- old_reg
3639    it's better to add it before as follows:
3640      reaching_reg <- old_reg
3641      MEM          <- reaching_reg.  */
3642
3643 static void
3644 pre_insert_copy_insn (struct expr *expr, rtx insn)
3645 {
3646   rtx reg = expr->reaching_reg;
3647   int regno = REGNO (reg);
3648   int indx = expr->bitmap_index;
3649   rtx pat = PATTERN (insn);
3650   rtx set, first_set, new_insn;
3651   rtx old_reg;
3652   int i;
3653
3654   /* This block matches the logic in hash_scan_insn.  */
3655   switch (GET_CODE (pat))
3656     {
3657     case SET:
3658       set = pat;
3659       break;
3660
3661     case PARALLEL:
3662       /* Search through the parallel looking for the set whose
3663          source was the expression that we're interested in.  */
3664       first_set = NULL_RTX;
3665       set = NULL_RTX;
3666       for (i = 0; i < XVECLEN (pat, 0); i++)
3667         {
3668           rtx x = XVECEXP (pat, 0, i);
3669           if (GET_CODE (x) == SET)
3670             {
3671               /* If the source was a REG_EQUAL or REG_EQUIV note, we
3672                  may not find an equivalent expression, but in this
3673                  case the PARALLEL will have a single set.  */
3674               if (first_set == NULL_RTX)
3675                 first_set = x;
3676               if (expr_equiv_p (SET_SRC (x), expr->expr))
3677                 {
3678                   set = x;
3679                   break;
3680                 }
3681             }
3682         }
3683
3684       gcc_assert (first_set);
3685       if (set == NULL_RTX)
3686         set = first_set;
3687       break;
3688
3689     default:
3690       gcc_unreachable ();
3691     }
3692
3693   if (REG_P (SET_DEST (set)))
3694     {
3695       old_reg = SET_DEST (set);
3696       /* Check if we can modify the set destination in the original insn.  */
3697       if (validate_change (insn, &SET_DEST (set), reg, 0))
3698         {
3699           new_insn = gen_move_insn (old_reg, reg);
3700           new_insn = emit_insn_after (new_insn, insn);
3701         }
3702       else
3703         {
3704           new_insn = gen_move_insn (reg, old_reg);
3705           new_insn = emit_insn_after (new_insn, insn);
3706         }
3707     }
3708   else /* This is possible only in case of a store to memory.  */
3709     {
3710       old_reg = SET_SRC (set);
3711       new_insn = gen_move_insn (reg, old_reg);
3712
3713       /* Check if we can modify the set source in the original insn.  */
3714       if (validate_change (insn, &SET_SRC (set), reg, 0))
3715         new_insn = emit_insn_before (new_insn, insn);
3716       else
3717         new_insn = emit_insn_after (new_insn, insn);
3718     }
3719
3720   gcse_create_count++;
3721
3722   if (dump_file)
3723     fprintf (dump_file,
3724              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
3725               BLOCK_NUM (insn), INSN_UID (new_insn), indx,
3726               INSN_UID (insn), regno);
3727 }
3728
3729 /* Copy available expressions that reach the redundant expression
3730    to `reaching_reg'.  */
3731
3732 static void
3733 pre_insert_copies (void)
3734 {
3735   unsigned int i, added_copy;
3736   struct expr *expr;
3737   struct occr *occr;
3738   struct occr *avail;
3739
3740   /* For each available expression in the table, copy the result to
3741      `reaching_reg' if the expression reaches a deleted one.
3742
3743      ??? The current algorithm is rather brute force.
3744      Need to do some profiling.  */
3745
3746   for (i = 0; i < expr_hash_table.size; i++)
3747     for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
3748       {
3749         /* If the basic block isn't reachable, PPOUT will be TRUE.  However,
3750            we don't want to insert a copy here because the expression may not
3751            really be redundant.  So only insert an insn if the expression was
3752            deleted.  This test also avoids further processing if the
3753            expression wasn't deleted anywhere.  */
3754         if (expr->reaching_reg == NULL)
3755           continue;
3756
3757         /* Set when we add a copy for that expression.  */
3758         added_copy = 0;
3759
3760         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
3761           {
3762             if (! occr->deleted_p)
3763               continue;
3764
3765             for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
3766               {
3767                 rtx insn = avail->insn;
3768
3769                 /* No need to handle this one if handled already.  */
3770                 if (avail->copied_p)
3771                   continue;
3772
3773                 /* Don't handle this one if it's a redundant one.  */
3774                 if (INSN_DELETED_P (insn))
3775                   continue;
3776
3777                 /* Or if the expression doesn't reach the deleted one.  */
3778                 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
3779                                                expr,
3780                                                BLOCK_FOR_INSN (occr->insn)))
3781                   continue;
3782
3783                 added_copy = 1;
3784
3785                 /* Copy the result of avail to reaching_reg.  */
3786                 pre_insert_copy_insn (expr, insn);
3787                 avail->copied_p = 1;
3788               }
3789           }
3790
3791           if (added_copy)
3792             update_ld_motion_stores (expr);
3793       }
3794 }
3795
3796 /* Emit move from SRC to DEST noting the equivalence with expression computed
3797    in INSN.  */
3798 static rtx
3799 gcse_emit_move_after (rtx src, rtx dest, rtx insn)
3800 {
3801   rtx new_rtx;
3802   rtx set = single_set (insn), set2;
3803   rtx note;
3804   rtx eqv;
3805
3806   /* This should never fail since we're creating a reg->reg copy
3807      we've verified to be valid.  */
3808
3809   new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
3810
3811   /* Note the equivalence for local CSE pass.  */
3812   set2 = single_set (new_rtx);
3813   if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
3814     return new_rtx;
3815   if ((note = find_reg_equal_equiv_note (insn)))
3816     eqv = XEXP (note, 0);
3817   else
3818     eqv = SET_SRC (set);
3819
3820   set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
3821
3822   return new_rtx;
3823 }
3824
3825 /* Delete redundant computations.
3826    Deletion is done by changing the insn to copy the `reaching_reg' of
3827    the expression into the result of the SET.  It is left to later passes
3828    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
3829
3830    Returns nonzero if a change is made.  */
3831
3832 static int
3833 pre_delete (void)
3834 {
3835   unsigned int i;
3836   int changed;
3837   struct expr *expr;
3838   struct occr *occr;
3839
3840   changed = 0;
3841   for (i = 0; i < expr_hash_table.size; i++)
3842     for (expr = expr_hash_table.table[i];
3843          expr != NULL;
3844          expr = expr->next_same_hash)
3845       {
3846         int indx = expr->bitmap_index;
3847
3848         /* We only need to search antic_occr since we require
3849            ANTLOC != 0.  */
3850
3851         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
3852           {
3853             rtx insn = occr->insn;
3854             rtx set;
3855             basic_block bb = BLOCK_FOR_INSN (insn);
3856
3857             /* We only delete insns that have a single_set.  */
3858             if (TEST_BIT (pre_delete_map[bb->index], indx)
3859                 && (set = single_set (insn)) != 0
3860                 && dbg_cnt (pre_insn))
3861               {
3862                 /* Create a pseudo-reg to store the result of reaching
3863                    expressions into.  Get the mode for the new pseudo from
3864                    the mode of the original destination pseudo.  */
3865                 if (expr->reaching_reg == NULL)
3866                   expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
3867
3868                 gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), insn);
3869                 delete_insn (insn);
3870                 occr->deleted_p = 1;
3871                 changed = 1;
3872                 gcse_subst_count++;
3873
3874                 if (dump_file)
3875                   {
3876                     fprintf (dump_file,
3877                              "PRE: redundant insn %d (expression %d) in ",
3878                                INSN_UID (insn), indx);
3879                     fprintf (dump_file, "bb %d, reaching reg is %d\n",
3880                              bb->index, REGNO (expr->reaching_reg));
3881                   }
3882               }
3883           }
3884       }
3885
3886   return changed;
3887 }
3888
3889 /* Perform GCSE optimizations using PRE.
3890    This is called by one_pre_gcse_pass after all the dataflow analysis
3891    has been done.
3892
3893    This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
3894    lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
3895    Compiler Design and Implementation.
3896
3897    ??? A new pseudo reg is created to hold the reaching expression.  The nice
3898    thing about the classical approach is that it would try to use an existing
3899    reg.  If the register can't be adequately optimized [i.e. we introduce
3900    reload problems], one could add a pass here to propagate the new register
3901    through the block.
3902
3903    ??? We don't handle single sets in PARALLELs because we're [currently] not
3904    able to copy the rest of the parallel when we insert copies to create full
3905    redundancies from partial redundancies.  However, there's no reason why we
3906    can't handle PARALLELs in the cases where there are no partial
3907    redundancies.  */
3908
3909 static int
3910 pre_gcse (void)
3911 {
3912   unsigned int i;
3913   int did_insert, changed;
3914   struct expr **index_map;
3915   struct expr *expr;
3916
3917   /* Compute a mapping from expression number (`bitmap_index') to
3918      hash table entry.  */
3919
3920   index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
3921   for (i = 0; i < expr_hash_table.size; i++)
3922     for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
3923       index_map[expr->bitmap_index] = expr;
3924
3925   /* Delete the redundant insns first so that
3926      - we know what register to use for the new insns and for the other
3927        ones with reaching expressions
3928      - we know which insns are redundant when we go to create copies  */
3929
3930   changed = pre_delete ();
3931   did_insert = pre_edge_insert (edge_list, index_map);
3932
3933   /* In other places with reaching expressions, copy the expression to the
3934      specially allocated pseudo-reg that reaches the redundant expr.  */
3935   pre_insert_copies ();
3936   if (did_insert)
3937     {
3938       commit_edge_insertions ();
3939       changed = 1;
3940     }
3941
3942   free (index_map);
3943   return changed;
3944 }
3945
3946 /* Top level routine to perform one PRE GCSE pass.
3947
3948    Return nonzero if a change was made.  */
3949
3950 static int
3951 one_pre_gcse_pass (void)
3952 {
3953   int changed = 0;
3954
3955   gcse_subst_count = 0;
3956   gcse_create_count = 0;
3957
3958   /* Return if there's nothing to do, or it is too expensive.  */
3959   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
3960       || is_too_expensive (_("PRE disabled")))
3961     return 0;
3962
3963   /* We need alias.  */
3964   init_alias_analysis ();
3965
3966   bytes_used = 0;
3967   gcc_obstack_init (&gcse_obstack);
3968   alloc_gcse_mem ();
3969
3970   alloc_hash_table (get_max_uid (), &expr_hash_table, 0);
3971   add_noreturn_fake_exit_edges ();
3972   if (flag_gcse_lm)
3973     compute_ld_motion_mems ();
3974
3975   compute_hash_table (&expr_hash_table);
3976   trim_ld_motion_mems ();
3977   if (dump_file)
3978     dump_hash_table (dump_file, "Expression", &expr_hash_table);
3979
3980   if (expr_hash_table.n_elems > 0)
3981     {
3982       alloc_pre_mem (last_basic_block, expr_hash_table.n_elems);
3983       compute_pre_data ();
3984       changed |= pre_gcse ();
3985       free_edge_list (edge_list);
3986       free_pre_mem ();
3987     }
3988
3989   free_ldst_mems ();
3990   remove_fake_exit_edges ();
3991   free_hash_table (&expr_hash_table);
3992
3993   free_gcse_mem ();
3994   obstack_free (&gcse_obstack, NULL);
3995
3996   /* We are finished with alias.  */
3997   end_alias_analysis ();
3998
3999   if (dump_file)
4000     {
4001       fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
4002                current_function_name (), n_basic_blocks, bytes_used);
4003       fprintf (dump_file, "%d substs, %d insns created\n",
4004                gcse_subst_count, gcse_create_count);
4005     }
4006
4007   return changed;
4008 }
4009 \f
4010 /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
4011    to INSN.  If such notes are added to an insn which references a
4012    CODE_LABEL, the LABEL_NUSES count is incremented.  We have to add
4013    that note, because the following loop optimization pass requires
4014    them.  */
4015
4016 /* ??? If there was a jump optimization pass after gcse and before loop,
4017    then we would not need to do this here, because jump would add the
4018    necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes.  */
4019
4020 static void
4021 add_label_notes (rtx x, rtx insn)
4022 {
4023   enum rtx_code code = GET_CODE (x);
4024   int i, j;
4025   const char *fmt;
4026
4027   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4028     {
4029       /* This code used to ignore labels that referred to dispatch tables to
4030          avoid flow generating (slightly) worse code.
4031
4032          We no longer ignore such label references (see LABEL_REF handling in
4033          mark_jump_label for additional information).  */
4034
4035       /* There's no reason for current users to emit jump-insns with
4036          such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
4037          notes.  */
4038       gcc_assert (!JUMP_P (insn));
4039       add_reg_note (insn, REG_LABEL_OPERAND, XEXP (x, 0));
4040
4041       if (LABEL_P (XEXP (x, 0)))
4042         LABEL_NUSES (XEXP (x, 0))++;
4043
4044       return;
4045     }
4046
4047   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
4048     {
4049       if (fmt[i] == 'e')
4050         add_label_notes (XEXP (x, i), insn);
4051       else if (fmt[i] == 'E')
4052         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4053           add_label_notes (XVECEXP (x, i, j), insn);
4054     }
4055 }
4056
4057 /* Compute transparent outgoing information for each block.
4058
4059    An expression is transparent to an edge unless it is killed by
4060    the edge itself.  This can only happen with abnormal control flow,
4061    when the edge is traversed through a call.  This happens with
4062    non-local labels and exceptions.
4063
4064    This would not be necessary if we split the edge.  While this is
4065    normally impossible for abnormal critical edges, with some effort
4066    it should be possible with exception handling, since we still have
4067    control over which handler should be invoked.  But due to increased
4068    EH table sizes, this may not be worthwhile.  */
4069
4070 static void
4071 compute_transpout (void)
4072 {
4073   basic_block bb;
4074   unsigned int i;
4075   struct expr *expr;
4076
4077   sbitmap_vector_ones (transpout, last_basic_block);
4078
4079   FOR_EACH_BB (bb)
4080     {
4081       /* Note that flow inserted a nop at the end of basic blocks that
4082          end in call instructions for reasons other than abnormal
4083          control flow.  */
4084       if (! CALL_P (BB_END (bb)))
4085         continue;
4086
4087       for (i = 0; i < expr_hash_table.size; i++)
4088         for (expr = expr_hash_table.table[i]; expr ; expr = expr->next_same_hash)
4089           if (MEM_P (expr->expr))
4090             {
4091               if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
4092                   && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
4093                 continue;
4094
4095               /* ??? Optimally, we would use interprocedural alias
4096                  analysis to determine if this mem is actually killed
4097                  by this call.  */
4098               RESET_BIT (transpout[bb->index], expr->bitmap_index);
4099             }
4100     }
4101 }
4102
4103 /* Code Hoisting variables and subroutines.  */
4104
4105 /* Very busy expressions.  */
4106 static sbitmap *hoist_vbein;
4107 static sbitmap *hoist_vbeout;
4108
4109 /* Hoistable expressions.  */
4110 static sbitmap *hoist_exprs;
4111
4112 /* ??? We could compute post dominators and run this algorithm in
4113    reverse to perform tail merging, doing so would probably be
4114    more effective than the tail merging code in jump.c.
4115
4116    It's unclear if tail merging could be run in parallel with
4117    code hoisting.  It would be nice.  */
4118
4119 /* Allocate vars used for code hoisting analysis.  */
4120
4121 static void
4122 alloc_code_hoist_mem (int n_blocks, int n_exprs)
4123 {
4124   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4125   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4126   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4127
4128   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
4129   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
4130   hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
4131   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
4132 }
4133
4134 /* Free vars used for code hoisting analysis.  */
4135
4136 static void
4137 free_code_hoist_mem (void)
4138 {
4139   sbitmap_vector_free (antloc);
4140   sbitmap_vector_free (transp);
4141   sbitmap_vector_free (comp);
4142
4143   sbitmap_vector_free (hoist_vbein);
4144   sbitmap_vector_free (hoist_vbeout);
4145   sbitmap_vector_free (hoist_exprs);
4146   sbitmap_vector_free (transpout);
4147
4148   free_dominance_info (CDI_DOMINATORS);
4149 }
4150
4151 /* Compute the very busy expressions at entry/exit from each block.
4152
4153    An expression is very busy if all paths from a given point
4154    compute the expression.  */
4155
4156 static void
4157 compute_code_hoist_vbeinout (void)
4158 {
4159   int changed, passes;
4160   basic_block bb;
4161
4162   sbitmap_vector_zero (hoist_vbeout, last_basic_block);
4163   sbitmap_vector_zero (hoist_vbein, last_basic_block);
4164
4165   passes = 0;
4166   changed = 1;
4167
4168   while (changed)
4169     {
4170       changed = 0;
4171
4172       /* We scan the blocks in the reverse order to speed up
4173          the convergence.  */
4174       FOR_EACH_BB_REVERSE (bb)
4175         {
4176           if (bb->next_bb != EXIT_BLOCK_PTR)
4177             sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
4178                                            hoist_vbein, bb->index);
4179
4180           changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
4181                                               antloc[bb->index],
4182                                               hoist_vbeout[bb->index],
4183                                               transp[bb->index]);
4184         }
4185
4186       passes++;
4187     }
4188
4189   if (dump_file)
4190     fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
4191 }
4192
4193 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
4194
4195 static void
4196 compute_code_hoist_data (void)
4197 {
4198   compute_local_properties (transp, comp, antloc, &expr_hash_table);
4199   compute_transpout ();
4200   compute_code_hoist_vbeinout ();
4201   calculate_dominance_info (CDI_DOMINATORS);
4202   if (dump_file)
4203     fprintf (dump_file, "\n");
4204 }
4205
4206 /* Determine if the expression identified by EXPR_INDEX would
4207    reach BB unimpared if it was placed at the end of EXPR_BB.
4208
4209    It's unclear exactly what Muchnick meant by "unimpared".  It seems
4210    to me that the expression must either be computed or transparent in
4211    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
4212    would allow the expression to be hoisted out of loops, even if
4213    the expression wasn't a loop invariant.
4214
4215    Contrast this to reachability for PRE where an expression is
4216    considered reachable if *any* path reaches instead of *all*
4217    paths.  */
4218
4219 static int
4220 hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb, char *visited)
4221 {
4222   edge pred;
4223   edge_iterator ei;
4224   int visited_allocated_locally = 0;
4225
4226
4227   if (visited == NULL)
4228     {
4229       visited_allocated_locally = 1;
4230       visited = XCNEWVEC (char, last_basic_block);
4231     }
4232
4233   FOR_EACH_EDGE (pred, ei, bb->preds)
4234     {
4235       basic_block pred_bb = pred->src;
4236
4237       if (pred->src == ENTRY_BLOCK_PTR)
4238         break;
4239       else if (pred_bb == expr_bb)
4240         continue;
4241       else if (visited[pred_bb->index])
4242         continue;
4243
4244       /* Does this predecessor generate this expression?  */
4245       else if (TEST_BIT (comp[pred_bb->index], expr_index))
4246         break;
4247       else if (! TEST_BIT (transp[pred_bb->index], expr_index))
4248         break;
4249
4250       /* Not killed.  */
4251       else
4252         {
4253           visited[pred_bb->index] = 1;
4254           if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
4255                                            pred_bb, visited))
4256             break;
4257         }
4258     }
4259   if (visited_allocated_locally)
4260     free (visited);
4261
4262   return (pred == NULL);
4263 }
4264 \f
4265 /* Actually perform code hoisting.  */
4266
4267 static int
4268 hoist_code (void)
4269 {
4270   basic_block bb, dominated;
4271   VEC (basic_block, heap) *domby;
4272   unsigned int i,j;
4273   struct expr **index_map;
4274   struct expr *expr;
4275   int changed = 0;
4276
4277   sbitmap_vector_zero (hoist_exprs, last_basic_block);
4278
4279   /* Compute a mapping from expression number (`bitmap_index') to
4280      hash table entry.  */
4281
4282   index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
4283   for (i = 0; i < expr_hash_table.size; i++)
4284     for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
4285       index_map[expr->bitmap_index] = expr;
4286
4287   /* Walk over each basic block looking for potentially hoistable
4288      expressions, nothing gets hoisted from the entry block.  */
4289   FOR_EACH_BB (bb)
4290     {
4291       int found = 0;
4292       int insn_inserted_p;
4293
4294       domby = get_dominated_by (CDI_DOMINATORS, bb);
4295       /* Examine each expression that is very busy at the exit of this
4296          block.  These are the potentially hoistable expressions.  */
4297       for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++)
4298         {
4299           int hoistable = 0;
4300
4301           if (TEST_BIT (hoist_vbeout[bb->index], i)
4302               && TEST_BIT (transpout[bb->index], i))
4303             {
4304               /* We've found a potentially hoistable expression, now
4305                  we look at every block BB dominates to see if it
4306                  computes the expression.  */
4307               for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++)
4308                 {
4309                   /* Ignore self dominance.  */
4310                   if (bb == dominated)
4311                     continue;
4312                   /* We've found a dominated block, now see if it computes
4313                      the busy expression and whether or not moving that
4314                      expression to the "beginning" of that block is safe.  */
4315                   if (!TEST_BIT (antloc[dominated->index], i))
4316                     continue;
4317
4318                   /* Note if the expression would reach the dominated block
4319                      unimpared if it was placed at the end of BB.
4320
4321                      Keep track of how many times this expression is hoistable
4322                      from a dominated block into BB.  */
4323                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
4324                     hoistable++;
4325                 }
4326
4327               /* If we found more than one hoistable occurrence of this
4328                  expression, then note it in the bitmap of expressions to
4329                  hoist.  It makes no sense to hoist things which are computed
4330                  in only one BB, and doing so tends to pessimize register
4331                  allocation.  One could increase this value to try harder
4332                  to avoid any possible code expansion due to register
4333                  allocation issues; however experiments have shown that
4334                  the vast majority of hoistable expressions are only movable
4335                  from two successors, so raising this threshold is likely
4336                  to nullify any benefit we get from code hoisting.  */
4337               if (hoistable > 1)
4338                 {
4339                   SET_BIT (hoist_exprs[bb->index], i);
4340                   found = 1;
4341                 }
4342             }
4343         }
4344       /* If we found nothing to hoist, then quit now.  */
4345       if (! found)
4346         {
4347           VEC_free (basic_block, heap, domby);
4348           continue;
4349         }
4350
4351       /* Loop over all the hoistable expressions.  */
4352       for (i = 0; i < hoist_exprs[bb->index]->n_bits; i++)
4353         {
4354           /* We want to insert the expression into BB only once, so
4355              note when we've inserted it.  */
4356           insn_inserted_p = 0;
4357
4358           /* These tests should be the same as the tests above.  */
4359           if (TEST_BIT (hoist_exprs[bb->index], i))
4360             {
4361               /* We've found a potentially hoistable expression, now
4362                  we look at every block BB dominates to see if it
4363                  computes the expression.  */
4364               for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++)
4365                 {
4366                   /* Ignore self dominance.  */
4367                   if (bb == dominated)
4368                     continue;
4369
4370                   /* We've found a dominated block, now see if it computes
4371                      the busy expression and whether or not moving that
4372                      expression to the "beginning" of that block is safe.  */
4373                   if (!TEST_BIT (antloc[dominated->index], i))
4374                     continue;
4375
4376                   /* The expression is computed in the dominated block and
4377                      it would be safe to compute it at the start of the
4378                      dominated block.  Now we have to determine if the
4379                      expression would reach the dominated block if it was
4380                      placed at the end of BB.  */
4381                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
4382                     {
4383                       struct expr *expr = index_map[i];
4384                       struct occr *occr = expr->antic_occr;
4385                       rtx insn;
4386                       rtx set;
4387
4388                       /* Find the right occurrence of this expression.  */
4389                       while (BLOCK_FOR_INSN (occr->insn) != dominated && occr)
4390                         occr = occr->next;
4391
4392                       gcc_assert (occr);
4393                       insn = occr->insn;
4394                       set = single_set (insn);
4395                       gcc_assert (set);
4396
4397                       /* Create a pseudo-reg to store the result of reaching
4398                          expressions into.  Get the mode for the new pseudo
4399                          from the mode of the original destination pseudo.  */
4400                       if (expr->reaching_reg == NULL)
4401                         expr->reaching_reg
4402                           = gen_reg_rtx_and_attrs (SET_DEST (set));
4403
4404                       gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), insn);
4405                       delete_insn (insn);
4406                       occr->deleted_p = 1;
4407                       changed = 1;
4408                       gcse_subst_count++;
4409
4410                       if (!insn_inserted_p)
4411                         {
4412                           insert_insn_end_basic_block (index_map[i], bb, 0);
4413                           insn_inserted_p = 1;
4414                         }
4415                     }
4416                 }
4417             }
4418         }
4419       VEC_free (basic_block, heap, domby);
4420     }
4421
4422   free (index_map);
4423
4424   return changed;
4425 }
4426
4427 /* Top level routine to perform one code hoisting (aka unification) pass
4428
4429    Return nonzero if a change was made.  */
4430
4431 static int
4432 one_code_hoisting_pass (void)
4433 {
4434   int changed = 0;
4435
4436   gcse_subst_count = 0;
4437   gcse_create_count = 0;
4438
4439   /* Return if there's nothing to do, or it is too expensive.  */
4440   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
4441       || is_too_expensive (_("GCSE disabled")))
4442     return 0;
4443
4444   /* We need alias.  */
4445   init_alias_analysis ();
4446
4447   bytes_used = 0;
4448   gcc_obstack_init (&gcse_obstack);
4449   alloc_gcse_mem ();
4450
4451   alloc_hash_table (get_max_uid (), &expr_hash_table, 0);
4452   compute_hash_table (&expr_hash_table);
4453   if (dump_file)
4454     dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
4455
4456   if (expr_hash_table.n_elems > 0)
4457     {
4458       alloc_code_hoist_mem (last_basic_block, expr_hash_table.n_elems);
4459       compute_code_hoist_data ();
4460       changed = hoist_code ();
4461       free_code_hoist_mem ();
4462     }
4463
4464   free_hash_table (&expr_hash_table);
4465   free_gcse_mem ();
4466   obstack_free (&gcse_obstack, NULL);
4467
4468   /* We are finished with alias.  */
4469   end_alias_analysis ();
4470
4471   if (dump_file)
4472     {
4473       fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
4474                current_function_name (), n_basic_blocks, bytes_used);
4475       fprintf (dump_file, "%d substs, %d insns created\n",
4476                gcse_subst_count, gcse_create_count);
4477     }
4478
4479   return changed;
4480 }
4481 \f
4482 /*  Here we provide the things required to do store motion towards
4483     the exit. In order for this to be effective, gcse also needed to
4484     be taught how to move a load when it is kill only by a store to itself.
4485
4486             int i;
4487             float a[10];
4488
4489             void foo(float scale)
4490             {
4491               for (i=0; i<10; i++)
4492                 a[i] *= scale;
4493             }
4494
4495     'i' is both loaded and stored to in the loop. Normally, gcse cannot move
4496     the load out since its live around the loop, and stored at the bottom
4497     of the loop.
4498
4499       The 'Load Motion' referred to and implemented in this file is
4500     an enhancement to gcse which when using edge based lcm, recognizes
4501     this situation and allows gcse to move the load out of the loop.
4502
4503       Once gcse has hoisted the load, store motion can then push this
4504     load towards the exit, and we end up with no loads or stores of 'i'
4505     in the loop.  */
4506
4507 static hashval_t
4508 pre_ldst_expr_hash (const void *p)
4509 {
4510   int do_not_record_p = 0;
4511   const struct ls_expr *const x = (const struct ls_expr *) p;
4512   return hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
4513 }
4514
4515 static int
4516 pre_ldst_expr_eq (const void *p1, const void *p2)
4517 {
4518   const struct ls_expr *const ptr1 = (const struct ls_expr *) p1,
4519     *const ptr2 = (const struct ls_expr *) p2;
4520   return expr_equiv_p (ptr1->pattern, ptr2->pattern);
4521 }
4522
4523 /* This will search the ldst list for a matching expression. If it
4524    doesn't find one, we create one and initialize it.  */
4525
4526 static struct ls_expr *
4527 ldst_entry (rtx x)
4528 {
4529   int do_not_record_p = 0;
4530   struct ls_expr * ptr;
4531   unsigned int hash;
4532   void **slot;
4533   struct ls_expr e;
4534
4535   hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
4536                    NULL,  /*have_reg_qty=*/false);
4537
4538   e.pattern = x;
4539   slot = htab_find_slot_with_hash (pre_ldst_table, &e, hash, INSERT);
4540   if (*slot)
4541     return (struct ls_expr *)*slot;
4542
4543   ptr = XNEW (struct ls_expr);
4544
4545   ptr->next         = pre_ldst_mems;
4546   ptr->expr         = NULL;
4547   ptr->pattern      = x;
4548   ptr->pattern_regs = NULL_RTX;
4549   ptr->loads        = NULL_RTX;
4550   ptr->stores       = NULL_RTX;
4551   ptr->reaching_reg = NULL_RTX;
4552   ptr->invalid      = 0;
4553   ptr->index        = 0;
4554   ptr->hash_index   = hash;
4555   pre_ldst_mems     = ptr;
4556   *slot = ptr;
4557
4558   return ptr;
4559 }
4560
4561 /* Free up an individual ldst entry.  */
4562
4563 static void
4564 free_ldst_entry (struct ls_expr * ptr)
4565 {
4566   free_INSN_LIST_list (& ptr->loads);
4567   free_INSN_LIST_list (& ptr->stores);
4568
4569   free (ptr);
4570 }
4571
4572 /* Free up all memory associated with the ldst list.  */
4573
4574 static void
4575 free_ldst_mems (void)
4576 {
4577   if (pre_ldst_table)
4578     htab_delete (pre_ldst_table);
4579   pre_ldst_table = NULL;
4580
4581   while (pre_ldst_mems)
4582     {
4583       struct ls_expr * tmp = pre_ldst_mems;
4584
4585       pre_ldst_mems = pre_ldst_mems->next;
4586
4587       free_ldst_entry (tmp);
4588     }
4589
4590   pre_ldst_mems = NULL;
4591 }
4592
4593 /* Dump debugging info about the ldst list.  */
4594
4595 static void
4596 print_ldst_list (FILE * file)
4597 {
4598   struct ls_expr * ptr;
4599
4600   fprintf (file, "LDST list: \n");
4601
4602   for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
4603     {
4604       fprintf (file, "  Pattern (%3d): ", ptr->index);
4605
4606       print_rtl (file, ptr->pattern);
4607
4608       fprintf (file, "\n         Loads : ");
4609
4610       if (ptr->loads)
4611         print_rtl (file, ptr->loads);
4612       else
4613         fprintf (file, "(nil)");
4614
4615       fprintf (file, "\n        Stores : ");
4616
4617       if (ptr->stores)
4618         print_rtl (file, ptr->stores);
4619       else
4620         fprintf (file, "(nil)");
4621
4622       fprintf (file, "\n\n");
4623     }
4624
4625   fprintf (file, "\n");
4626 }
4627
4628 /* Returns 1 if X is in the list of ldst only expressions.  */
4629
4630 static struct ls_expr *
4631 find_rtx_in_ldst (rtx x)
4632 {
4633   struct ls_expr e;
4634   void **slot;
4635   if (!pre_ldst_table)
4636     return NULL;
4637   e.pattern = x;
4638   slot = htab_find_slot (pre_ldst_table, &e, NO_INSERT);
4639   if (!slot || ((struct ls_expr *)*slot)->invalid)
4640     return NULL;
4641   return (struct ls_expr *) *slot;
4642 }
4643
4644 /* Return first item in the list.  */
4645
4646 static inline struct ls_expr *
4647 first_ls_expr (void)
4648 {
4649   return pre_ldst_mems;
4650 }
4651
4652 /* Return the next item in the list after the specified one.  */
4653
4654 static inline struct ls_expr *
4655 next_ls_expr (struct ls_expr * ptr)
4656 {
4657   return ptr->next;
4658 }
4659 \f
4660 /* Load Motion for loads which only kill themselves.  */
4661
4662 /* Return true if x is a simple MEM operation, with no registers or
4663    side effects. These are the types of loads we consider for the
4664    ld_motion list, otherwise we let the usual aliasing take care of it.  */
4665
4666 static int
4667 simple_mem (const_rtx x)
4668 {
4669   if (! MEM_P (x))
4670     return 0;
4671
4672   if (MEM_VOLATILE_P (x))
4673     return 0;
4674
4675   if (GET_MODE (x) == BLKmode)
4676     return 0;
4677
4678   /* If we are handling exceptions, we must be careful with memory references
4679      that may trap. If we are not, the behavior is undefined, so we may just
4680      continue.  */
4681   if (flag_non_call_exceptions && may_trap_p (x))
4682     return 0;
4683
4684   if (side_effects_p (x))
4685     return 0;
4686
4687   /* Do not consider function arguments passed on stack.  */
4688   if (reg_mentioned_p (stack_pointer_rtx, x))
4689     return 0;
4690
4691   if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
4692     return 0;
4693
4694   return 1;
4695 }
4696
4697 /* Make sure there isn't a buried reference in this pattern anywhere.
4698    If there is, invalidate the entry for it since we're not capable
4699    of fixing it up just yet.. We have to be sure we know about ALL
4700    loads since the aliasing code will allow all entries in the
4701    ld_motion list to not-alias itself.  If we miss a load, we will get
4702    the wrong value since gcse might common it and we won't know to
4703    fix it up.  */
4704
4705 static void
4706 invalidate_any_buried_refs (rtx x)
4707 {
4708   const char * fmt;
4709   int i, j;
4710   struct ls_expr * ptr;
4711
4712   /* Invalidate it in the list.  */
4713   if (MEM_P (x) && simple_mem (x))
4714     {
4715       ptr = ldst_entry (x);
4716       ptr->invalid = 1;
4717     }
4718
4719   /* Recursively process the insn.  */
4720   fmt = GET_RTX_FORMAT (GET_CODE (x));
4721
4722   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4723     {
4724       if (fmt[i] == 'e')
4725         invalidate_any_buried_refs (XEXP (x, i));
4726       else if (fmt[i] == 'E')
4727         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4728           invalidate_any_buried_refs (XVECEXP (x, i, j));
4729     }
4730 }
4731
4732 /* Find all the 'simple' MEMs which are used in LOADs and STORES.  Simple
4733    being defined as MEM loads and stores to symbols, with no side effects
4734    and no registers in the expression.  For a MEM destination, we also
4735    check that the insn is still valid if we replace the destination with a
4736    REG, as is done in update_ld_motion_stores.  If there are any uses/defs
4737    which don't match this criteria, they are invalidated and trimmed out
4738    later.  */
4739
4740 static void
4741 compute_ld_motion_mems (void)
4742 {
4743   struct ls_expr * ptr;
4744   basic_block bb;
4745   rtx insn;
4746
4747   pre_ldst_mems = NULL;
4748   pre_ldst_table = htab_create (13, pre_ldst_expr_hash,
4749                                 pre_ldst_expr_eq, NULL);
4750
4751   FOR_EACH_BB (bb)
4752     {
4753       FOR_BB_INSNS (bb, insn)
4754         {
4755           if (INSN_P (insn))
4756             {
4757               if (GET_CODE (PATTERN (insn)) == SET)
4758                 {
4759                   rtx src = SET_SRC (PATTERN (insn));
4760                   rtx dest = SET_DEST (PATTERN (insn));
4761
4762                   /* Check for a simple LOAD...  */
4763                   if (MEM_P (src) && simple_mem (src))
4764                     {
4765                       ptr = ldst_entry (src);
4766                       if (REG_P (dest))
4767                         ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
4768                       else
4769                         ptr->invalid = 1;
4770                     }
4771                   else
4772                     {
4773                       /* Make sure there isn't a buried load somewhere.  */
4774                       invalidate_any_buried_refs (src);
4775                     }
4776
4777                   /* Check for stores. Don't worry about aliased ones, they
4778                      will block any movement we might do later. We only care
4779                      about this exact pattern since those are the only
4780                      circumstance that we will ignore the aliasing info.  */
4781                   if (MEM_P (dest) && simple_mem (dest))
4782                     {
4783                       ptr = ldst_entry (dest);
4784
4785                       if (! MEM_P (src)
4786                           && GET_CODE (src) != ASM_OPERANDS
4787                           /* Check for REG manually since want_to_gcse_p
4788                              returns 0 for all REGs.  */
4789                           && can_assign_to_reg_without_clobbers_p (src))
4790                         ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
4791                       else
4792                         ptr->invalid = 1;
4793                     }
4794                 }
4795               else
4796                 invalidate_any_buried_refs (PATTERN (insn));
4797             }
4798         }
4799     }
4800 }
4801
4802 /* Remove any references that have been either invalidated or are not in the
4803    expression list for pre gcse.  */
4804
4805 static void
4806 trim_ld_motion_mems (void)
4807 {
4808   struct ls_expr * * last = & pre_ldst_mems;
4809   struct ls_expr * ptr = pre_ldst_mems;
4810
4811   while (ptr != NULL)
4812     {
4813       struct expr * expr;
4814
4815       /* Delete if entry has been made invalid.  */
4816       if (! ptr->invalid)
4817         {
4818           /* Delete if we cannot find this mem in the expression list.  */
4819           unsigned int hash = ptr->hash_index % expr_hash_table.size;
4820
4821           for (expr = expr_hash_table.table[hash];
4822                expr != NULL;
4823                expr = expr->next_same_hash)
4824             if (expr_equiv_p (expr->expr, ptr->pattern))
4825               break;
4826         }
4827       else
4828         expr = (struct expr *) 0;
4829
4830       if (expr)
4831         {
4832           /* Set the expression field if we are keeping it.  */
4833           ptr->expr = expr;
4834           last = & ptr->next;
4835           ptr = ptr->next;
4836         }
4837       else
4838         {
4839           *last = ptr->next;
4840           htab_remove_elt_with_hash (pre_ldst_table, ptr, ptr->hash_index);
4841           free_ldst_entry (ptr);
4842           ptr = * last;
4843         }
4844     }
4845
4846   /* Show the world what we've found.  */
4847   if (dump_file && pre_ldst_mems != NULL)
4848     print_ldst_list (dump_file);
4849 }
4850
4851 /* This routine will take an expression which we are replacing with
4852    a reaching register, and update any stores that are needed if
4853    that expression is in the ld_motion list.  Stores are updated by
4854    copying their SRC to the reaching register, and then storing
4855    the reaching register into the store location. These keeps the
4856    correct value in the reaching register for the loads.  */
4857
4858 static void
4859 update_ld_motion_stores (struct expr * expr)
4860 {
4861   struct ls_expr * mem_ptr;
4862
4863   if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
4864     {
4865       /* We can try to find just the REACHED stores, but is shouldn't
4866          matter to set the reaching reg everywhere...  some might be
4867          dead and should be eliminated later.  */
4868
4869       /* We replace (set mem expr) with (set reg expr) (set mem reg)
4870          where reg is the reaching reg used in the load.  We checked in
4871          compute_ld_motion_mems that we can replace (set mem expr) with
4872          (set reg expr) in that insn.  */
4873       rtx list = mem_ptr->stores;
4874
4875       for ( ; list != NULL_RTX; list = XEXP (list, 1))
4876         {
4877           rtx insn = XEXP (list, 0);
4878           rtx pat = PATTERN (insn);
4879           rtx src = SET_SRC (pat);
4880           rtx reg = expr->reaching_reg;
4881           rtx copy, new_rtx;
4882
4883           /* If we've already copied it, continue.  */
4884           if (expr->reaching_reg == src)
4885             continue;
4886
4887           if (dump_file)
4888             {
4889               fprintf (dump_file, "PRE:  store updated with reaching reg ");
4890               print_rtl (dump_file, expr->reaching_reg);
4891               fprintf (dump_file, ":\n  ");
4892               print_inline_rtx (dump_file, insn, 8);
4893               fprintf (dump_file, "\n");
4894             }
4895
4896           copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
4897           new_rtx = emit_insn_before (copy, insn);
4898           SET_SRC (pat) = reg;
4899           df_insn_rescan (insn);
4900
4901           /* un-recognize this pattern since it's probably different now.  */
4902           INSN_CODE (insn) = -1;
4903           gcse_create_count++;
4904         }
4905     }
4906 }
4907 \f
4908 /* Return true if the graph is too expensive to optimize. PASS is the
4909    optimization about to be performed.  */
4910
4911 static bool
4912 is_too_expensive (const char *pass)
4913 {
4914   /* Trying to perform global optimizations on flow graphs which have
4915      a high connectivity will take a long time and is unlikely to be
4916      particularly useful.
4917
4918      In normal circumstances a cfg should have about twice as many
4919      edges as blocks.  But we do not want to punish small functions
4920      which have a couple switch statements.  Rather than simply
4921      threshold the number of blocks, uses something with a more
4922      graceful degradation.  */
4923   if (n_edges > 20000 + n_basic_blocks * 4)
4924     {
4925       warning (OPT_Wdisabled_optimization,
4926                "%s: %d basic blocks and %d edges/basic block",
4927                pass, n_basic_blocks, n_edges / n_basic_blocks);
4928
4929       return true;
4930     }
4931
4932   /* If allocating memory for the cprop bitmap would take up too much
4933      storage it's better just to disable the optimization.  */
4934   if ((n_basic_blocks
4935        * SBITMAP_SET_SIZE (max_reg_num ())
4936        * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
4937     {
4938       warning (OPT_Wdisabled_optimization,
4939                "%s: %d basic blocks and %d registers",
4940                pass, n_basic_blocks, max_reg_num ());
4941
4942       return true;
4943     }
4944
4945   return false;
4946 }
4947
4948 \f
4949 /* Main function for the CPROP pass.  */
4950
4951 static int
4952 one_cprop_pass (void)
4953 {
4954   int changed = 0;
4955
4956   /* Return if there's nothing to do, or it is too expensive.  */
4957   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
4958       || is_too_expensive (_ ("const/copy propagation disabled")))
4959     return 0;
4960
4961   global_const_prop_count = local_const_prop_count = 0;
4962   global_copy_prop_count = local_copy_prop_count = 0;
4963
4964   bytes_used = 0;
4965   gcc_obstack_init (&gcse_obstack);
4966   alloc_gcse_mem ();
4967
4968   /* Do a local const/copy propagation pass first.  The global pass
4969      only handles global opportunities.
4970      If the local pass changes something, remove any unreachable blocks
4971      because the CPROP global dataflow analysis may get into infinite
4972      loops for CFGs with unreachable blocks.
4973
4974      FIXME: This local pass should not be necessary after CSE (but for
4975             some reason it still is).  It is also (proven) not necessary
4976             to run the local pass right after FWPWOP.
4977
4978      FIXME: The global analysis would not get into infinite loops if it
4979             would use the DF solver (via df_simple_dataflow) instead of
4980             the solver implemented in this file.  */
4981   if (local_cprop_pass ())
4982     {
4983       delete_unreachable_blocks ();
4984       df_analyze ();
4985     }
4986
4987   /* Determine implicit sets.  */
4988   implicit_sets = XCNEWVEC (rtx, last_basic_block);
4989   find_implicit_sets ();
4990
4991   alloc_hash_table (get_max_uid (), &set_hash_table, 1);
4992   compute_hash_table (&set_hash_table);
4993
4994   /* Free implicit_sets before peak usage.  */
4995   free (implicit_sets);
4996   implicit_sets = NULL;
4997
4998   if (dump_file)
4999     dump_hash_table (dump_file, "SET", &set_hash_table);
5000   if (set_hash_table.n_elems > 0)
5001     {
5002       basic_block bb;
5003       rtx insn;
5004
5005       alloc_cprop_mem (last_basic_block, set_hash_table.n_elems);
5006       compute_cprop_data ();
5007
5008       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR->next_bb->next_bb, EXIT_BLOCK_PTR, next_bb)
5009         {
5010           /* Reset tables used to keep track of what's still valid [since
5011              the start of the block].  */
5012           reset_opr_set_tables ();
5013
5014           FOR_BB_INSNS (bb, insn)
5015             if (INSN_P (insn))
5016               {
5017                 changed |= cprop_insn (insn);
5018
5019                 /* Keep track of everything modified by this insn.  */
5020                 /* ??? Need to be careful w.r.t. mods done to INSN.
5021                        Don't call mark_oprs_set if we turned the
5022                        insn into a NOTE.  */
5023                 if (! NOTE_P (insn))
5024                   mark_oprs_set (insn);
5025               }
5026         }
5027
5028       changed |= bypass_conditional_jumps ();
5029       free_cprop_mem ();
5030     }
5031
5032   free_hash_table (&set_hash_table);
5033   free_gcse_mem ();
5034   obstack_free (&gcse_obstack, NULL);
5035
5036   if (dump_file)
5037     {
5038       fprintf (dump_file, "CPROP of %s, %d basic blocks, %d bytes needed, ",
5039                current_function_name (), n_basic_blocks, bytes_used);
5040       fprintf (dump_file, "%d local const props, %d local copy props, ",
5041                local_const_prop_count, local_copy_prop_count);
5042       fprintf (dump_file, "%d global const props, %d global copy props\n\n",
5043                global_const_prop_count, global_copy_prop_count);
5044     }
5045
5046   return changed;
5047 }
5048
5049 \f
5050 /* All the passes implemented in this file.  Each pass has its
5051    own gate and execute function, and at the end of the file a
5052    pass definition for passes.c.
5053
5054    We do not construct an accurate cfg in functions which call
5055    setjmp, so none of these passes runs if the function calls
5056    setjmp.
5057    FIXME: Should just handle setjmp via REG_SETJMP notes.  */
5058
5059 static bool
5060 gate_rtl_cprop (void)
5061 {
5062   return optimize > 0 && flag_gcse
5063     && !cfun->calls_setjmp
5064     && dbg_cnt (cprop);
5065 }
5066
5067 static unsigned int
5068 execute_rtl_cprop (void)
5069 {
5070   delete_unreachable_blocks ();
5071   df_note_add_problem ();
5072   df_set_flags (DF_LR_RUN_DCE);
5073   df_analyze ();
5074   flag_rerun_cse_after_global_opts |= one_cprop_pass ();
5075   return 0;
5076 }
5077
5078 static bool
5079 gate_rtl_pre (void)
5080 {
5081   return optimize > 0 && flag_gcse
5082     && !cfun->calls_setjmp
5083     && optimize_function_for_speed_p (cfun)
5084     && dbg_cnt (pre);
5085 }
5086
5087 static unsigned int
5088 execute_rtl_pre (void)
5089 {
5090   delete_unreachable_blocks ();
5091   df_note_add_problem ();
5092   df_analyze ();
5093   flag_rerun_cse_after_global_opts |= one_pre_gcse_pass ();
5094   return 0;
5095 }
5096
5097 static bool
5098 gate_rtl_hoist (void)
5099 {
5100   return optimize > 0 && flag_gcse
5101     && !cfun->calls_setjmp
5102     /* It does not make sense to run code hoisting unless we are optimizing
5103        for code size -- it rarely makes programs faster, and can make then
5104        bigger if we did PRE (when optimizing for space, we don't run PRE).  */
5105     && optimize_function_for_size_p (cfun)
5106     && dbg_cnt (hoist);
5107 }
5108
5109 static unsigned int
5110 execute_rtl_hoist (void)
5111 {
5112   delete_unreachable_blocks ();
5113   df_note_add_problem ();
5114   df_analyze ();
5115   flag_rerun_cse_after_global_opts |= one_code_hoisting_pass ();
5116   return 0;
5117 }
5118
5119 struct rtl_opt_pass pass_rtl_cprop =
5120 {
5121  {
5122   RTL_PASS,
5123   "cprop",                              /* name */
5124   gate_rtl_cprop,                       /* gate */
5125   execute_rtl_cprop,                    /* execute */
5126   NULL,                                 /* sub */
5127   NULL,                                 /* next */
5128   0,                                    /* static_pass_number */
5129   TV_CPROP,                             /* tv_id */
5130   PROP_cfglayout,                       /* properties_required */
5131   0,                                    /* properties_provided */
5132   0,                                    /* properties_destroyed */
5133   0,                                    /* todo_flags_start */
5134   TODO_df_finish | TODO_verify_rtl_sharing |
5135   TODO_dump_func |
5136   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
5137  }
5138 };
5139
5140 struct rtl_opt_pass pass_rtl_pre =
5141 {
5142  {
5143   RTL_PASS,
5144   "pre",                                /* name */
5145   gate_rtl_pre,                         /* gate */
5146   execute_rtl_pre,                      /* execute */
5147   NULL,                                 /* sub */
5148   NULL,                                 /* next */
5149   0,                                    /* static_pass_number */
5150   TV_PRE,                               /* tv_id */
5151   PROP_cfglayout,                       /* properties_required */
5152   0,                                    /* properties_provided */
5153   0,                                    /* properties_destroyed */
5154   0,                                    /* todo_flags_start */
5155   TODO_df_finish | TODO_verify_rtl_sharing |
5156   TODO_dump_func |
5157   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
5158  }
5159 };
5160
5161 struct rtl_opt_pass pass_rtl_hoist =
5162 {
5163  {
5164   RTL_PASS,
5165   "hoist",                              /* name */
5166   gate_rtl_hoist,                       /* gate */
5167   execute_rtl_hoist,                    /* execute */
5168   NULL,                                 /* sub */
5169   NULL,                                 /* next */
5170   0,                                    /* static_pass_number */
5171   TV_HOIST,                             /* tv_id */
5172   PROP_cfglayout,                       /* properties_required */
5173   0,                                    /* properties_provided */
5174   0,                                    /* properties_destroyed */
5175   0,                                    /* todo_flags_start */
5176   TODO_df_finish | TODO_verify_rtl_sharing |
5177   TODO_dump_func |
5178   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
5179  }
5180 };
5181
5182 #include "gt-gcse.h"