gcc/gcse.c

   1 /* Global common subexpression elimination/Partial redundancy elimination
   2    and global constant/copy propagation for GNU compiler.
   3    Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* TODO
  23    - reordering of memory allocation and freeing to be more space efficient
  24    - do rough calc of how many regs are needed in each block, and a rough
  25      calc of how many regs are available in each class and use that to
  26      throttle back the code in cases where RTX_COST is minimal.
  27    - dead store elimination
  28    - a store to the same address as a load does not kill the load if the
  29      source of the store is also the destination of the load.  Handling this
  30      allows more load motion, particularly out of loops.
  31    - ability to realloc sbitmap vectors would allow one initial computation
  32      of reg_set_in_block with only subsequent additions, rather than
  33      recomputing it for each pass
  34
  35 */
  36
  37 /* References searched while implementing this.
  38
  39    Compilers Principles, Techniques and Tools
  40    Aho, Sethi, Ullman
  41    Addison-Wesley, 1988
  42
  43    Global Optimization by Suppression of Partial Redundancies
  44    E. Morel, C. Renvoise
  45    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  46
  47    A Portable Machine-Independent Global Optimizer - Design and Measurements
  48    Frederick Chow
  49    Stanford Ph.D. thesis, Dec. 1983
  50
  51    A Fast Algorithm for Code Movement Optimization
  52    D.M. Dhamdhere
  53    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  54
  55    A Solution to a Problem with Morel and Renvoise's
  56    Global Optimization by Suppression of Partial Redundancies
  57    K-H Drechsler, M.P. Stadel
  58    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  59
  60    Practical Adaptation of the Global Optimization
  61    Algorithm of Morel and Renvoise
  62    D.M. Dhamdhere
  63    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  64
  65    Efficiently Computing Static Single Assignment Form and the Control
  66    Dependence Graph
  67    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  68    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  69
  70    Lazy Code Motion
  71    J. Knoop, O. Ruthing, B. Steffen
  72    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  73
  74    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  75    Time for Reducible Flow Control
  76    Thomas Ball
  77    ACM Letters on Programming Languages and Systems,
  78    Vol. 2, Num. 1-4, Mar-Dec 1993
  79
  80    An Efficient Representation for Sparse Sets
  81    Preston Briggs, Linda Torczon
  82    ACM Letters on Programming Languages and Systems,
  83    Vol. 2, Num. 1-4, Mar-Dec 1993
  84
  85    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  86    K-H Drechsler, M.P. Stadel
  87    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  88
  89    Partial Dead Code Elimination
  90    J. Knoop, O. Ruthing, B. Steffen
  91    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  92
  93    Effective Partial Redundancy Elimination
  94    P. Briggs, K.D. Cooper
  95    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  96
  97    The Program Structure Tree: Computing Control Regions in Linear Time
  98    R. Johnson, D. Pearson, K. Pingali
  99    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
 100
 101    Optimal Code Motion: Theory and Practice
 102    J. Knoop, O. Ruthing, B. Steffen
 103    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
 104
 105    The power of assignment motion
 106    J. Knoop, O. Ruthing, B. Steffen
 107    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 108
 109    Global code motion / global value numbering
 110    C. Click
 111    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 112
 113    Value Driven Redundancy Elimination
 114    L.T. Simpson
 115    Rice University Ph.D. thesis, Apr. 1996
 116
 117    Value Numbering
 118    L.T. Simpson
 119    Massively Scalar Compiler Project, Rice University, Sep. 1996
 120
 121    High Performance Compilers for Parallel Computing
 122    Michael Wolfe
 123    Addison-Wesley, 1996
 124
 125    Advanced Compiler Design and Implementation
 126    Steven Muchnick
 127    Morgan Kaufmann, 1997
 128
 129    Building an Optimizing Compiler
 130    Robert Morgan
 131    Digital Press, 1998
 132
 133    People wishing to speed up the code here should read:
 134      Elimination Algorithms for Data Flow Analysis
 135      B.G. Ryder, M.C. Paull
 136      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 137
 138      How to Analyze Large Programs Efficiently and Informatively
 139      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 140      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 141
 142    People wishing to do something different can find various possibilities
 143    in the above papers and elsewhere.
 144 */
 145
 146 #include "config.h"
 147 #include "system.h"
 148 #include "toplev.h"
 149
 150 #include "rtl.h"
 151 #include "tm_p.h"
 152 #include "regs.h"
 153 #include "hard-reg-set.h"
 154 #include "flags.h"
 155 #include "real.h"
 156 #include "insn-config.h"
 157 #include "recog.h"
 158 #include "basic-block.h"
 159 #include "output.h"
 160 #include "function.h"
 161 #include "expr.h"
 162 #include "ggc.h"
 163
 164 #include "obstack.h"
 165 #define obstack_chunk_alloc gmalloc
 166 #define obstack_chunk_free free
 167
 168 /* Maximum number of passes to perform.  */
 169 #define MAX_PASSES 1
 170
 171 /* Propagate flow information through back edges and thus enable PRE's
 172    moving loop invariant calculations out of loops.
 173
 174    Originally this tended to create worse overall code, but several
 175    improvements during the development of PRE seem to have made following
 176    back edges generally a win.
 177
 178    Note much of the loop invariant code motion done here would normally
 179    be done by loop.c, which has more heuristics for when to move invariants
 180    out of loops.  At some point we might need to move some of those
 181    heuristics into gcse.c.  */
 182 #define FOLLOW_BACK_EDGES 1
 183
 184 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 185    are a superset of those done by GCSE.
 186
 187    We perform the following steps:
 188
 189    1) Compute basic block information.
 190
 191    2) Compute table of places where registers are set.
 192
 193    3) Perform copy/constant propagation.
 194
 195    4) Perform global cse.
 196
 197    5) Perform another pass of copy/constant propagation.
 198
 199    Two passes of copy/constant propagation are done because the first one
 200    enables more GCSE and the second one helps to clean up the copies that
 201    GCSE creates.  This is needed more for PRE than for Classic because Classic
 202    GCSE will try to use an existing register containing the common
 203    subexpression rather than create a new one.  This is harder to do for PRE
 204    because of the code motion (which Classic GCSE doesn't do).
 205
 206    Expressions we are interested in GCSE-ing are of the form
 207    (set (pseudo-reg) (expression)).
 208    Function want_to_gcse_p says what these are.
 209
 210    PRE handles moving invariant expressions out of loops (by treating them as
 211    partially redundant).
 212
 213    Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
 214    assignment) based GVN (global value numbering).  L. T. Simpson's paper
 215    (Rice University) on value numbering is a useful reference for this.
 216
 217    **********************
 218
 219    We used to support multiple passes but there are diminishing returns in
 220    doing so.  The first pass usually makes 90% of the changes that are doable.
 221    A second pass can make a few more changes made possible by the first pass.
 222    Experiments show any further passes don't make enough changes to justify
 223    the expense.
 224
 225    A study of spec92 using an unlimited number of passes:
 226    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 227    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 228    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 229
 230    It was found doing copy propagation between each pass enables further
 231    substitutions.
 232
 233    PRE is quite expensive in complicated functions because the DFA can take
 234    awhile to converge.  Hence we only perform one pass.  Macro MAX_PASSES can
 235    be modified if one wants to experiment.
 236
 237    **********************
 238
 239    The steps for PRE are:
 240
 241    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 242
 243    2) Perform the data flow analysis for PRE.
 244
 245    3) Delete the redundant instructions
 246
 247    4) Insert the required copies [if any] that make the partially
 248       redundant instructions fully redundant.
 249
 250    5) For other reaching expressions, insert an instruction to copy the value
 251       to a newly created pseudo that will reach the redundant instruction.
 252
 253    The deletion is done first so that when we do insertions we
 254    know which pseudo reg to use.
 255
 256    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 257    argue it is not.  The number of iterations for the algorithm to converge
 258    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 259
 260    PRE GCSE depends heavily on the second CSE pass to clean up the copies
 261    we create.  To make an expression reach the place where it's redundant,
 262    the result of the expression is copied to a new register, and the redundant
 263    expression is deleted by replacing it with this new register.  Classic GCSE
 264    doesn't have this problem as much as it computes the reaching defs of
 265    each register in each block and thus can try to use an existing register.
 266
 267    **********************
 268
 269    A fair bit of simplicity is created by creating small functions for simple
 270    tasks, even when the function is only called in one place.  This may
 271    measurably slow things down [or may not] by creating more function call
 272    overhead than is necessary.  The source is laid out so that it's trivial
 273    to make the affected functions inline so that one can measure what speed
 274    up, if any, can be achieved, and maybe later when things settle things can
 275    be rearranged.
 276
 277    Help stamp out big monolithic functions!  */
 278 \f
 279 /* GCSE global vars.  */
 280
 281 /* -dG dump file.  */
 282 static FILE *gcse_file;
 283
 284 /* Note whether or not we should run jump optimization after gcse.  We
 285    want to do this for two cases.
 286
 287     * If we changed any jumps via cprop.
 288
 289     * If we added any labels via edge splitting.  */
 290
 291 static int run_jump_opt_after_gcse;
 292
 293 /* Bitmaps are normally not included in debugging dumps.
 294    However it's useful to be able to print them from GDB.
 295    We could create special functions for this, but it's simpler to
 296    just allow passing stderr to the dump_foo fns.  Since stderr can
 297    be a macro, we store a copy here.  */
 298 static FILE *debug_stderr;
 299
 300 /* An obstack for our working variables.  */
 301 static struct obstack gcse_obstack;
 302
 303 /* Non-zero for each mode that supports (set (reg) (reg)).
 304    This is trivially true for integer and floating point values.
 305    It may or may not be true for condition codes.  */
 306 static char can_copy_p[(int) NUM_MACHINE_MODES];
 307
 308 /* Non-zero if can_copy_p has been initialized.  */
 309 static int can_copy_init_p;
 310
 311 struct reg_use {rtx reg_rtx; };
 312
 313 /* Hash table of expressions.  */
 314
 315 struct expr
 316 {
 317   /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
 318   rtx expr;
 319   /* Index in the available expression bitmaps.  */
 320   int bitmap_index;
 321   /* Next entry with the same hash.  */
 322   struct expr *next_same_hash;
 323   /* List of anticipatable occurrences in basic blocks in the function.
 324      An "anticipatable occurrence" is one that is the first occurrence in the
 325      basic block, the operands are not modified in the basic block prior
 326      to the occurrence and the output is not used between the start of
 327      the block and the occurrence.  */
 328   struct occr *antic_occr;
 329   /* List of available occurrence in basic blocks in the function.
 330      An "available occurrence" is one that is the last occurrence in the
 331      basic block and the operands are not modified by following statements in
 332      the basic block [including this insn].  */
 333   struct occr *avail_occr;
 334   /* Non-null if the computation is PRE redundant.
 335      The value is the newly created pseudo-reg to record a copy of the
 336      expression in all the places that reach the redundant copy.  */
 337   rtx reaching_reg;
 338 };
 339
 340 /* Occurrence of an expression.
 341    There is one per basic block.  If a pattern appears more than once the
 342    last appearance is used [or first for anticipatable expressions].  */
 343
 344 struct occr
 345 {
 346   /* Next occurrence of this expression.  */
 347   struct occr *next;
 348   /* The insn that computes the expression.  */
 349   rtx insn;
 350   /* Non-zero if this [anticipatable] occurrence has been deleted.  */
 351   char deleted_p;
 352   /* Non-zero if this [available] occurrence has been copied to
 353      reaching_reg.  */
 354   /* ??? This is mutually exclusive with deleted_p, so they could share
 355      the same byte.  */
 356   char copied_p;
 357 };
 358
 359 /* Expression and copy propagation hash tables.
 360    Each hash table is an array of buckets.
 361    ??? It is known that if it were an array of entries, structure elements
 362    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 363    not clear whether in the final analysis a sufficient amount of memory would
 364    be saved as the size of the available expression bitmaps would be larger
 365    [one could build a mapping table without holes afterwards though].
 366    Someday I'll perform the computation and figure it out.  */
 367
 368 /* Total size of the expression hash table, in elements.  */
 369 static unsigned int expr_hash_table_size;
 370
 371 /* The table itself.
 372    This is an array of `expr_hash_table_size' elements.  */
 373 static struct expr **expr_hash_table;
 374
 375 /* Total size of the copy propagation hash table, in elements.  */
 376 static unsigned int set_hash_table_size;
 377
 378 /* The table itself.
 379    This is an array of `set_hash_table_size' elements.  */
 380 static struct expr **set_hash_table;
 381
 382 /* Mapping of uids to cuids.
 383    Only real insns get cuids.  */
 384 static int *uid_cuid;
 385
 386 /* Highest UID in UID_CUID.  */
 387 static int max_uid;
 388
 389 /* Get the cuid of an insn.  */
 390 #ifdef ENABLE_CHECKING
 391 #define INSN_CUID(INSN) (INSN_UID (INSN) > max_uid ? (abort (), 0) : uid_cuid[INSN_UID (INSN)])
 392 #else
 393 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
 394 #endif
 395
 396 /* Number of cuids.  */
 397 static int max_cuid;
 398
 399 /* Mapping of cuids to insns.  */
 400 static rtx *cuid_insn;
 401
 402 /* Get insn from cuid.  */
 403 #define CUID_INSN(CUID) (cuid_insn[CUID])
 404
 405 /* Maximum register number in function prior to doing gcse + 1.
 406    Registers created during this pass have regno >= max_gcse_regno.
 407    This is named with "gcse" to not collide with global of same name.  */
 408 static unsigned int max_gcse_regno;
 409
 410 /* Maximum number of cse-able expressions found.  */
 411 static int n_exprs;
 412
 413 /* Maximum number of assignments for copy propagation found.  */
 414 static int n_sets;
 415
 416 /* Table of registers that are modified.
 417
 418    For each register, each element is a list of places where the pseudo-reg
 419    is set.
 420
 421    For simplicity, GCSE is done on sets of pseudo-regs only.  PRE GCSE only
 422    requires knowledge of which blocks kill which regs [and thus could use
 423    a bitmap instead of the lists `reg_set_table' uses].
 424
 425    `reg_set_table' and could be turned into an array of bitmaps (num-bbs x
 426    num-regs) [however perhaps it may be useful to keep the data as is].  One
 427    advantage of recording things this way is that `reg_set_table' is fairly
 428    sparse with respect to pseudo regs but for hard regs could be fairly dense
 429    [relatively speaking].  And recording sets of pseudo-regs in lists speeds
 430    up functions like compute_transp since in the case of pseudo-regs we only
 431    need to iterate over the number of times a pseudo-reg is set, not over the
 432    number of basic blocks [clearly there is a bit of a slow down in the cases
 433    where a pseudo is set more than once in a block, however it is believed
 434    that the net effect is to speed things up].  This isn't done for hard-regs
 435    because recording call-clobbered hard-regs in `reg_set_table' at each
 436    function call can consume a fair bit of memory, and iterating over
 437    hard-regs stored this way in compute_transp will be more expensive.  */
 438
 439 typedef struct reg_set
 440 {
 441   /* The next setting of this register.  */
 442   struct reg_set *next;
 443   /* The insn where it was set.  */
 444   rtx insn;
 445 } reg_set;
 446
 447 static reg_set **reg_set_table;
 448
 449 /* Size of `reg_set_table'.
 450    The table starts out at max_gcse_regno + slop, and is enlarged as
 451    necessary.  */
 452 static int reg_set_table_size;
 453
 454 /* Amount to grow `reg_set_table' by when it's full.  */
 455 #define REG_SET_TABLE_SLOP 100
 456
 457 /* Bitmap containing one bit for each register in the program.
 458    Used when performing GCSE to track which registers have been set since
 459    the start of the basic block.  */
 460 static sbitmap reg_set_bitmap;
 461
 462 /* For each block, a bitmap of registers set in the block.
 463    This is used by expr_killed_p and compute_transp.
 464    It is computed during hash table computation and not by compute_sets
 465    as it includes registers added since the last pass (or between cprop and
 466    gcse) and it's currently not easy to realloc sbitmap vectors.  */
 467 static sbitmap *reg_set_in_block;
 468
 469 /* For each block, non-zero if memory is set in that block.
 470    This is computed during hash table computation and is used by
 471    expr_killed_p and compute_transp.
 472    ??? Handling of memory is very simple, we don't make any attempt
 473    to optimize things (later).
 474    ??? This can be computed by compute_sets since the information
 475    doesn't change.  */
 476 static char *mem_set_in_block;
 477
 478 /* Various variables for statistics gathering.  */
 479
 480 /* Memory used in a pass.
 481    This isn't intended to be absolutely precise.  Its intent is only
 482    to keep an eye on memory usage.  */
 483 static int bytes_used;
 484
 485 /* GCSE substitutions made.  */
 486 static int gcse_subst_count;
 487 /* Number of copy instructions created.  */
 488 static int gcse_create_count;
 489 /* Number of constants propagated.  */
 490 static int const_prop_count;
 491 /* Number of copys propagated.  */
 492 static int copy_prop_count;
 493 \f
 494 /* These variables are used by classic GCSE.
 495    Normally they'd be defined a bit later, but `rd_gen' needs to
 496    be declared sooner.  */
 497
 498 /* Each block has a bitmap of each type.
 499    The length of each blocks bitmap is:
 500
 501        max_cuid  - for reaching definitions
 502        n_exprs - for available expressions
 503
 504    Thus we view the bitmaps as 2 dimensional arrays.  i.e.
 505    rd_kill[block_num][cuid_num]
 506    ae_kill[block_num][expr_num]                  */
 507
 508 /* For reaching defs */
 509 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
 510
 511 /* for available exprs */
 512 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
 513
 514 /* Objects of this type are passed around by the null-pointer check
 515    removal routines.  */
 516 struct null_pointer_info
 517 {
 518   /* The basic block being processed.  */
 519   int current_block;
 520   /* The first register to be handled in this pass.  */
 521   unsigned int min_reg;
 522   /* One greater than the last register to be handled in this pass.  */
 523   unsigned int max_reg;
 524   sbitmap *nonnull_local;
 525   sbitmap *nonnull_killed;
 526 };
 527 \f
 528 static void compute_can_copy    PARAMS ((void));
 529 static char *gmalloc            PARAMS ((unsigned int));
 530 static char *grealloc           PARAMS ((char *, unsigned int));
 531 static char *gcse_alloc         PARAMS ((unsigned long));
 532 static void alloc_gcse_mem      PARAMS ((rtx));
 533 static void free_gcse_mem       PARAMS ((void));
 534 static void alloc_reg_set_mem   PARAMS ((int));
 535 static void free_reg_set_mem    PARAMS ((void));
 536 static int get_bitmap_width     PARAMS ((int, int, int));
 537 static void record_one_set      PARAMS ((int, rtx));
 538 static void record_set_info     PARAMS ((rtx, rtx, void *));
 539 static void compute_sets        PARAMS ((rtx));
 540 static void hash_scan_insn      PARAMS ((rtx, int, int));
 541 static void hash_scan_set       PARAMS ((rtx, rtx, int));
 542 static void hash_scan_clobber   PARAMS ((rtx, rtx));
 543 static void hash_scan_call      PARAMS ((rtx, rtx));
 544 static int want_to_gcse_p       PARAMS ((rtx));
 545 static int oprs_unchanged_p     PARAMS ((rtx, rtx, int));
 546 static int oprs_anticipatable_p PARAMS ((rtx, rtx));
 547 static int oprs_available_p     PARAMS ((rtx, rtx));
 548 static void insert_expr_in_table PARAMS ((rtx, enum machine_mode, rtx,
 549                                           int, int));
 550 static void insert_set_in_table PARAMS ((rtx, rtx));
 551 static unsigned int hash_expr   PARAMS ((rtx, enum machine_mode, int *, int));
 552 static unsigned int hash_expr_1 PARAMS ((rtx, enum machine_mode, int *));
 553 static unsigned int hash_string_1 PARAMS ((const char *));
 554 static unsigned int hash_set    PARAMS ((int, int));
 555 static int expr_equiv_p         PARAMS ((rtx, rtx));
 556 static void record_last_reg_set_info PARAMS ((rtx, int));
 557 static void record_last_mem_set_info PARAMS ((rtx));
 558 static void record_last_set_info PARAMS ((rtx, rtx, void *));
 559 static void compute_hash_table  PARAMS ((int));
 560 static void alloc_set_hash_table PARAMS ((int));
 561 static void free_set_hash_table PARAMS ((void));
 562 static void compute_set_hash_table PARAMS ((void));
 563 static void alloc_expr_hash_table PARAMS ((unsigned int));
 564 static void free_expr_hash_table PARAMS ((void));
 565 static void compute_expr_hash_table PARAMS ((void));
 566 static void dump_hash_table     PARAMS ((FILE *, const char *, struct expr **,
 567                                          int, int));
 568 static struct expr *lookup_expr PARAMS ((rtx));
 569 static struct expr *lookup_set  PARAMS ((unsigned int, rtx));
 570 static struct expr *next_set    PARAMS ((unsigned int, struct expr *));
 571 static void reset_opr_set_tables PARAMS ((void));
 572 static int oprs_not_set_p       PARAMS ((rtx, rtx));
 573 static void mark_call           PARAMS ((rtx));
 574 static void mark_set            PARAMS ((rtx, rtx));
 575 static void mark_clobber        PARAMS ((rtx, rtx));
 576 static void mark_oprs_set       PARAMS ((rtx));
 577 static void alloc_cprop_mem     PARAMS ((int, int));
 578 static void free_cprop_mem      PARAMS ((void));
 579 static void compute_transp      PARAMS ((rtx, int, sbitmap *, int));
 580 static void compute_transpout   PARAMS ((void));
 581 static void compute_local_properties PARAMS ((sbitmap *, sbitmap *, sbitmap *,
 582                                               int));
 583 static void compute_cprop_data  PARAMS ((void));
 584 static void find_used_regs      PARAMS ((rtx));
 585 static int try_replace_reg      PARAMS ((rtx, rtx, rtx));
 586 static struct expr *find_avail_set PARAMS ((int, rtx));
 587 static int cprop_jump           PARAMS ((rtx, rtx, rtx));
 588 #ifdef HAVE_cc0
 589 static int cprop_cc0_jump       PARAMS ((rtx, struct reg_use *, rtx));
 590 #endif
 591 static int cprop_insn           PARAMS ((rtx, int));
 592 static int cprop                PARAMS ((int));
 593 static int one_cprop_pass       PARAMS ((int, int));
 594 static void alloc_pre_mem       PARAMS ((int, int));
 595 static void free_pre_mem        PARAMS ((void));
 596 static void compute_pre_data    PARAMS ((void));
 597 static int pre_expr_reaches_here_p PARAMS ((int, struct expr *, int));
 598 static void insert_insn_end_bb  PARAMS ((struct expr *, int, int));
 599 static void pre_insert_copy_insn PARAMS ((struct expr *, rtx));
 600 static void pre_insert_copies   PARAMS ((void));
 601 static int pre_delete           PARAMS ((void));
 602 static int pre_gcse             PARAMS ((void));
 603 static int one_pre_gcse_pass    PARAMS ((int));
 604 static void add_label_notes     PARAMS ((rtx, rtx));
 605 static void alloc_code_hoist_mem PARAMS ((int, int));
 606 static void free_code_hoist_mem PARAMS ((void));
 607 static void compute_code_hoist_vbeinout PARAMS ((void));
 608 static void compute_code_hoist_data PARAMS ((void));
 609 static int hoist_expr_reaches_here_p PARAMS ((int, int, int, char *));
 610 static void hoist_code          PARAMS ((void));
 611 static int one_code_hoisting_pass PARAMS ((void));
 612 static void alloc_rd_mem        PARAMS ((int, int));
 613 static void free_rd_mem         PARAMS ((void));
 614 static void handle_rd_kill_set  PARAMS ((rtx, int, int));
 615 static void compute_kill_rd     PARAMS ((void));
 616 static void compute_rd          PARAMS ((void));
 617 static void alloc_avail_expr_mem PARAMS ((int, int));
 618 static void free_avail_expr_mem PARAMS ((void));
 619 static void compute_ae_gen      PARAMS ((void));
 620 static int expr_killed_p        PARAMS ((rtx, int));
 621 static void compute_ae_kill     PARAMS ((sbitmap *, sbitmap *));
 622 static int expr_reaches_here_p  PARAMS ((struct occr *, struct expr *,
 623                                          int, int));
 624 static rtx computing_insn       PARAMS ((struct expr *, rtx));
 625 static int def_reaches_here_p   PARAMS ((rtx, rtx));
 626 static int can_disregard_other_sets PARAMS ((struct reg_set **, rtx, int));
 627 static int handle_avail_expr    PARAMS ((rtx, struct expr *));
 628 static int classic_gcse         PARAMS ((void));
 629 static int one_classic_gcse_pass PARAMS ((int));
 630 static void invalidate_nonnull_info PARAMS ((rtx, rtx, void *));
 631 static void delete_null_pointer_checks_1 PARAMS ((unsigned int *, sbitmap *,
 632                                                   sbitmap *,
 633                                                   struct null_pointer_info *));
 634 static rtx process_insert_insn  PARAMS ((struct expr *));
 635 static int pre_edge_insert      PARAMS ((struct edge_list *, struct expr **));
 636 static int expr_reaches_here_p_work PARAMS ((struct occr *, struct expr *,
 637                                              int, int, char *));
 638 static int pre_expr_reaches_here_p_work PARAMS ((int, struct expr *,
 639                                                  int, char *));
 640 \f
 641 /* Entry point for global common subexpression elimination.
 642    F is the first instruction in the function.  */
 643
 644 int
 645 gcse_main (f, file)
 646      rtx f;
 647      FILE *file;
 648 {
 649   int changed, pass;
 650   /* Bytes used at start of pass.  */
 651   int initial_bytes_used;
 652   /* Maximum number of bytes used by a pass.  */
 653   int max_pass_bytes;
 654   /* Point to release obstack data from for each pass.  */
 655   char *gcse_obstack_bottom;
 656
 657   /* We do not construct an accurate cfg in functions which call
 658      setjmp, so just punt to be safe.  */
 659   if (current_function_calls_setjmp)
 660     return 0;
 661
 662   /* Assume that we do not need to run jump optimizations after gcse.  */
 663   run_jump_opt_after_gcse = 0;
 664
 665   /* For calling dump_foo fns from gdb.  */
 666   debug_stderr = stderr;
 667   gcse_file = file;
 668
 669   /* Identify the basic block information for this function, including
 670      successors and predecessors.  */
 671   max_gcse_regno = max_reg_num ();
 672
 673   if (file)
 674     dump_flow_info (file);
 675
 676   /* Return if there's nothing to do.  */
 677   if (n_basic_blocks <= 1)
 678     return 0;
 679
 680   /* Trying to perform global optimizations on flow graphs which have
 681      a high connectivity will take a long time and is unlikely to be
 682      particularly useful.
 683
 684      In normal circumstances a cfg should have about twice has many edges
 685      as blocks.  But we do not want to punish small functions which have
 686      a couple switch statements.  So we require a relatively large number
 687      of basic blocks and the ratio of edges to blocks to be high.  */
 688   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
 689     {
 690       if (warn_disabled_optimization)
 691       warning ("GCSE disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
 692                n_basic_blocks, n_edges / n_basic_blocks);
 693       return 0;
 694     }
 695
 696   /* See what modes support reg/reg copy operations.  */
 697   if (! can_copy_init_p)
 698     {
 699       compute_can_copy ();
 700       can_copy_init_p = 1;
 701     }
 702
 703   gcc_obstack_init (&gcse_obstack);
 704   bytes_used = 0;
 705
 706   /* Record where pseudo-registers are set.  This data is kept accurate
 707      during each pass.  ??? We could also record hard-reg information here
 708      [since it's unchanging], however it is currently done during hash table
 709      computation.
 710
 711      It may be tempting to compute MEM set information here too, but MEM sets
 712      will be subject to code motion one day and thus we need to compute
 713      information about memory sets when we build the hash tables.  */
 714
 715   alloc_reg_set_mem (max_gcse_regno);
 716   compute_sets (f);
 717
 718   pass = 0;
 719   initial_bytes_used = bytes_used;
 720   max_pass_bytes = 0;
 721   gcse_obstack_bottom = gcse_alloc (1);
 722   changed = 1;
 723   while (changed && pass < MAX_PASSES)
 724     {
 725       changed = 0;
 726       if (file)
 727         fprintf (file, "GCSE pass %d\n\n", pass + 1);
 728
 729       /* Initialize bytes_used to the space for the pred/succ lists,
 730          and the reg_set_table data.  */
 731       bytes_used = initial_bytes_used;
 732
 733       /* Each pass may create new registers, so recalculate each time.  */
 734       max_gcse_regno = max_reg_num ();
 735
 736       alloc_gcse_mem (f);
 737
 738       /* Don't allow constant propagation to modify jumps
 739          during this pass.  */
 740       changed = one_cprop_pass (pass + 1, 0);
 741
 742       if (optimize_size)
 743         changed |= one_classic_gcse_pass (pass + 1);
 744       else
 745         {
 746           changed |= one_pre_gcse_pass (pass + 1);
 747           free_reg_set_mem ();
 748           alloc_reg_set_mem (max_reg_num ());
 749           compute_sets (f);
 750           run_jump_opt_after_gcse = 1;
 751         }
 752
 753       if (max_pass_bytes < bytes_used)
 754         max_pass_bytes = bytes_used;
 755
 756       /* Free up memory, then reallocate for code hoisting.  We can
 757          not re-use the existing allocated memory because the tables
 758          will not have info for the insns or registers created by
 759          partial redundancy elimination.  */
 760       free_gcse_mem ();
 761
 762       /* It does not make sense to run code hoisting unless we optimizing
 763          for code size -- it rarely makes programs faster, and can make
 764          them bigger if we did partial redundancy elimination (when optimizing
 765          for space, we use a classic gcse algorithm instead of partial
 766          redundancy algorithms).  */
 767       if (optimize_size)
 768         {
 769           max_gcse_regno = max_reg_num ();
 770           alloc_gcse_mem (f);
 771           changed |= one_code_hoisting_pass ();
 772           free_gcse_mem ();
 773
 774           if (max_pass_bytes < bytes_used)
 775             max_pass_bytes = bytes_used;
 776         }
 777
 778       if (file)
 779         {
 780           fprintf (file, "\n");
 781           fflush (file);
 782         }
 783
 784       obstack_free (&gcse_obstack, gcse_obstack_bottom);
 785       pass++;
 786     }
 787
 788   /* Do one last pass of copy propagation, including cprop into
 789      conditional jumps.  */
 790
 791   max_gcse_regno = max_reg_num ();
 792   alloc_gcse_mem (f);
 793   /* This time, go ahead and allow cprop to alter jumps.  */
 794   one_cprop_pass (pass + 1, 1);
 795   free_gcse_mem ();
 796
 797   if (file)
 798     {
 799       fprintf (file, "GCSE of %s: %d basic blocks, ",
 800                current_function_name, n_basic_blocks);
 801       fprintf (file, "%d pass%s, %d bytes\n\n",
 802                pass, pass > 1 ? "es" : "", max_pass_bytes);
 803     }
 804
 805   obstack_free (&gcse_obstack, NULL_PTR);
 806   free_reg_set_mem ();
 807   return run_jump_opt_after_gcse;
 808 }
 809 \f
 810 /* Misc. utilities.  */
 811
 812 /* Compute which modes support reg/reg copy operations.  */
 813
 814 static void
 815 compute_can_copy ()
 816 {
 817   int i;
 818 #ifndef AVOID_CCMODE_COPIES
 819   rtx reg,insn;
 820 #endif
 821   memset (can_copy_p, 0, NUM_MACHINE_MODES);
 822
 823   start_sequence ();
 824   for (i = 0; i < NUM_MACHINE_MODES; i++)
 825     if (GET_MODE_CLASS (i) == MODE_CC)
 826       {
 827 #ifdef AVOID_CCMODE_COPIES
 828         can_copy_p[i] = 0;
 829 #else
 830         reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 831         insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 832         if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
 833           can_copy_p[i] = 1;
 834 #endif
 835       }
 836     else
 837       can_copy_p[i] = 1;
 838
 839   end_sequence ();
 840 }
 841 \f
 842 /* Cover function to xmalloc to record bytes allocated.  */
 843
 844 static char *
 845 gmalloc (size)
 846      unsigned int size;
 847 {
 848   bytes_used += size;
 849   return xmalloc (size);
 850 }
 851
 852 /* Cover function to xrealloc.
 853    We don't record the additional size since we don't know it.
 854    It won't affect memory usage stats much anyway.  */
 855
 856 static char *
 857 grealloc (ptr, size)
 858      char *ptr;
 859      unsigned int size;
 860 {
 861   return xrealloc (ptr, size);
 862 }
 863
 864 /* Cover function to obstack_alloc.
 865    We don't need to record the bytes allocated here since
 866    obstack_chunk_alloc is set to gmalloc.  */
 867
 868 static char *
 869 gcse_alloc (size)
 870      unsigned long size;
 871 {
 872   return (char *) obstack_alloc (&gcse_obstack, size);
 873 }
 874
 875 /* Allocate memory for the cuid mapping array,
 876    and reg/memory set tracking tables.
 877
 878    This is called at the start of each pass.  */
 879
 880 static void
 881 alloc_gcse_mem (f)
 882      rtx f;
 883 {
 884   int i,n;
 885   rtx insn;
 886
 887   /* Find the largest UID and create a mapping from UIDs to CUIDs.
 888      CUIDs are like UIDs except they increase monotonically, have no gaps,
 889      and only apply to real insns.  */
 890
 891   max_uid = get_max_uid ();
 892   n = (max_uid + 1) * sizeof (int);
 893   uid_cuid = (int *) gmalloc (n);
 894   memset ((char *) uid_cuid, 0, n);
 895   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 896     {
 897       if (INSN_P (insn))
 898         uid_cuid[INSN_UID (insn)] = i++;
 899       else
 900         uid_cuid[INSN_UID (insn)] = i;
 901     }
 902
 903   /* Create a table mapping cuids to insns.  */
 904
 905   max_cuid = i;
 906   n = (max_cuid + 1) * sizeof (rtx);
 907   cuid_insn = (rtx *) gmalloc (n);
 908   memset ((char *) cuid_insn, 0, n);
 909   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 910     if (INSN_P (insn))
 911       CUID_INSN (i++) = insn;
 912
 913   /* Allocate vars to track sets of regs.  */
 914   reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
 915
 916   /* Allocate vars to track sets of regs, memory per block.  */
 917   reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
 918                                                        max_gcse_regno);
 919   mem_set_in_block = (char *) gmalloc (n_basic_blocks);
 920 }
 921
 922 /* Free memory allocated by alloc_gcse_mem.  */
 923
 924 static void
 925 free_gcse_mem ()
 926 {
 927   free (uid_cuid);
 928   free (cuid_insn);
 929
 930   free (reg_set_bitmap);
 931
 932   free (reg_set_in_block);
 933   free (mem_set_in_block);
 934 }
 935
 936 /* Many of the global optimization algorithms work by solving dataflow
 937    equations for various expressions.  Initially, some local value is
 938    computed for each expression in each block.  Then, the values across the
 939    various blocks are combined (by following flow graph edges) to arrive at
 940    global values.  Conceptually, each set of equations is independent.  We
 941    may therefore solve all the equations in parallel, solve them one at a
 942    time, or pick any intermediate approach.
 943
 944    When you're going to need N two-dimensional bitmaps, each X (say, the
 945    number of blocks) by Y (say, the number of expressions), call this
 946    function.  It's not important what X and Y represent; only that Y
 947    correspond to the things that can be done in parallel.  This function will
 948    return an appropriate chunking factor C; you should solve C sets of
 949    equations in parallel.  By going through this function, we can easily
 950    trade space against time; by solving fewer equations in parallel we use
 951    less space.  */
 952
 953 static int
 954 get_bitmap_width (n, x, y)
 955      int n;
 956      int x;
 957      int y;
 958 {
 959   /* It's not really worth figuring out *exactly* how much memory will
 960      be used by a particular choice.  The important thing is to get
 961      something approximately right.  */
 962   size_t max_bitmap_memory = 10 * 1024 * 1024;
 963
 964   /* The number of bytes we'd use for a single column of minimum
 965      width.  */
 966   size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
 967
 968   /* Often, it's reasonable just to solve all the equations in
 969      parallel.  */
 970   if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
 971     return y;
 972
 973   /* Otherwise, pick the largest width we can, without going over the
 974      limit.  */
 975   return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
 976                              / column_size);
 977 }
 978 \f
 979 /* Compute the local properties of each recorded expression.
 980
 981    Local properties are those that are defined by the block, irrespective of
 982    other blocks.
 983
 984    An expression is transparent in a block if its operands are not modified
 985    in the block.
 986
 987    An expression is computed (locally available) in a block if it is computed
 988    at least once and expression would contain the same value if the
 989    computation was moved to the end of the block.
 990
 991    An expression is locally anticipatable in a block if it is computed at
 992    least once and expression would contain the same value if the computation
 993    was moved to the beginning of the block.
 994
 995    We call this routine for cprop, pre and code hoisting.  They all compute
 996    basically the same information and thus can easily share this code.
 997
 998    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
 999    properties.  If NULL, then it is not necessary to compute or record that
1000    particular property.
1001
1002    SETP controls which hash table to look at.  If zero, this routine looks at
1003    the expr hash table; if nonzero this routine looks at the set hash table.
1004    Additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
1005    ABSALTERED.  */
1006
1007 static void
1008 compute_local_properties (transp, comp, antloc, setp)
1009      sbitmap *transp;
1010      sbitmap *comp;
1011      sbitmap *antloc;
1012      int setp;
1013 {
1014   unsigned int i, hash_table_size;
1015   struct expr **hash_table;
1016
1017   /* Initialize any bitmaps that were passed in.  */
1018   if (transp)
1019     {
1020       if (setp)
1021         sbitmap_vector_zero (transp, n_basic_blocks);
1022       else
1023         sbitmap_vector_ones (transp, n_basic_blocks);
1024     }
1025
1026   if (comp)
1027     sbitmap_vector_zero (comp, n_basic_blocks);
1028   if (antloc)
1029     sbitmap_vector_zero (antloc, n_basic_blocks);
1030
1031   /* We use the same code for cprop, pre and hoisting.  For cprop
1032      we care about the set hash table, for pre and hoisting we
1033      care about the expr hash table.  */
1034   hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1035   hash_table = setp ? set_hash_table : expr_hash_table;
1036
1037   for (i = 0; i < hash_table_size; i++)
1038     {
1039       struct expr *expr;
1040
1041       for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1042         {
1043           int indx = expr->bitmap_index;
1044           struct occr *occr;
1045
1046           /* The expression is transparent in this block if it is not killed.
1047              We start by assuming all are transparent [none are killed], and
1048              then reset the bits for those that are.  */
1049           if (transp)
1050             compute_transp (expr->expr, indx, transp, setp);
1051
1052           /* The occurrences recorded in antic_occr are exactly those that
1053              we want to set to non-zero in ANTLOC.  */
1054           if (antloc)
1055             for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1056               {
1057                 SET_BIT (antloc[BLOCK_NUM (occr->insn)], indx);
1058
1059                 /* While we're scanning the table, this is a good place to
1060                    initialize this.  */
1061                 occr->deleted_p = 0;
1062               }
1063
1064           /* The occurrences recorded in avail_occr are exactly those that
1065              we want to set to non-zero in COMP.  */
1066           if (comp)
1067             for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1068               {
1069                 SET_BIT (comp[BLOCK_NUM (occr->insn)], indx);
1070
1071                 /* While we're scanning the table, this is a good place to
1072                    initialize this.  */
1073                 occr->copied_p = 0;
1074               }
1075
1076           /* While we're scanning the table, this is a good place to
1077              initialize this.  */
1078           expr->reaching_reg = 0;
1079         }
1080     }
1081 }
1082 \f
1083 /* Register set information.
1084
1085    `reg_set_table' records where each register is set or otherwise
1086    modified.  */
1087
1088 static struct obstack reg_set_obstack;
1089
1090 static void
1091 alloc_reg_set_mem (n_regs)
1092      int n_regs;
1093 {
1094   unsigned int n;
1095
1096   reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1097   n = reg_set_table_size * sizeof (struct reg_set *);
1098   reg_set_table = (struct reg_set **) gmalloc (n);
1099   memset ((char *) reg_set_table, 0, n);
1100
1101   gcc_obstack_init (&reg_set_obstack);
1102 }
1103
1104 static void
1105 free_reg_set_mem ()
1106 {
1107   free (reg_set_table);
1108   obstack_free (&reg_set_obstack, NULL_PTR);
1109 }
1110
1111 /* Record REGNO in the reg_set table.  */
1112
1113 static void
1114 record_one_set (regno, insn)
1115      int regno;
1116      rtx insn;
1117 {
1118   /* Allocate a new reg_set element and link it onto the list.  */
1119   struct reg_set *new_reg_info;
1120
1121   /* If the table isn't big enough, enlarge it.  */
1122   if (regno >= reg_set_table_size)
1123     {
1124       int new_size = regno + REG_SET_TABLE_SLOP;
1125
1126       reg_set_table
1127         = (struct reg_set **) grealloc ((char *) reg_set_table,
1128                                         new_size * sizeof (struct reg_set *));
1129       memset ((char *) (reg_set_table + reg_set_table_size), 0,
1130              (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1131       reg_set_table_size = new_size;
1132     }
1133
1134   new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1135                                                    sizeof (struct reg_set));
1136   bytes_used += sizeof (struct reg_set);
1137   new_reg_info->insn = insn;
1138   new_reg_info->next = reg_set_table[regno];
1139   reg_set_table[regno] = new_reg_info;
1140 }
1141
1142 /* Called from compute_sets via note_stores to handle one SET or CLOBBER in
1143    an insn.  The DATA is really the instruction in which the SET is
1144    occurring.  */
1145
1146 static void
1147 record_set_info (dest, setter, data)
1148      rtx dest, setter ATTRIBUTE_UNUSED;
1149      void *data;
1150 {
1151   rtx record_set_insn = (rtx) data;
1152
1153   if (GET_CODE (dest) == REG && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1154     record_one_set (REGNO (dest), record_set_insn);
1155 }
1156
1157 /* Scan the function and record each set of each pseudo-register.
1158
1159    This is called once, at the start of the gcse pass.  See the comments for
1160    `reg_set_table' for further documenation.  */
1161
1162 static void
1163 compute_sets (f)
1164      rtx f;
1165 {
1166   rtx insn;
1167
1168   for (insn = f; insn != 0; insn = NEXT_INSN (insn))
1169     if (INSN_P (insn))
1170       note_stores (PATTERN (insn), record_set_info, insn);
1171 }
1172 \f
1173 /* Hash table support.  */
1174
1175 /* For each register, the cuid of the first/last insn in the block to set it,
1176    or -1 if not set.  */
1177 #define NEVER_SET -1
1178 static int *reg_first_set;
1179 static int *reg_last_set;
1180
1181 /* While computing "first/last set" info, this is the CUID of first/last insn
1182    to set memory or -1 if not set.  `mem_last_set' is also used when
1183    performing GCSE to record whether memory has been set since the beginning
1184    of the block.
1185
1186    Note that handling of memory is very simple, we don't make any attempt
1187    to optimize things (later).  */
1188 static int mem_first_set;
1189 static int mem_last_set;
1190
1191 /* See whether X, the source of a set, is something we want to consider for
1192    GCSE.  */
1193
1194 static int
1195 want_to_gcse_p (x)
1196      rtx x;
1197 {
1198   static rtx test_insn = 0;
1199   int num_clobbers = 0;
1200   int icode;
1201
1202   switch (GET_CODE (x))
1203     {
1204     case REG:
1205     case SUBREG:
1206     case CONST_INT:
1207     case CONST_DOUBLE:
1208     case CALL:
1209       return 0;
1210
1211     default:
1212       break;
1213     }
1214
1215   /* If this is a valid operand, we are OK.  If it's VOIDmode, we aren't.  */
1216   if (general_operand (x, GET_MODE (x)))
1217     return 1;
1218   else if (GET_MODE (x) == VOIDmode)
1219     return 0;
1220
1221   /* Otherwise, check if we can make a valid insn from it.  First initialize
1222      our test insn if we haven't already.  */
1223   if (test_insn == 0)
1224     {
1225       test_insn
1226         = make_insn_raw (gen_rtx_SET (VOIDmode,
1227                                       gen_rtx_REG (word_mode,
1228                                                    FIRST_PSEUDO_REGISTER * 2),
1229                                       const0_rtx));
1230       NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
1231       ggc_add_rtx_root (&test_insn, 1);
1232     }
1233
1234   /* Now make an insn like the one we would make when GCSE'ing and see if
1235      valid.  */
1236   PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
1237   SET_SRC (PATTERN (test_insn)) = x;
1238   return ((icode = recog (PATTERN (test_insn), test_insn, &num_clobbers)) >= 0
1239           && (num_clobbers == 0 || ! added_clobbers_hard_reg_p (icode)));
1240 }
1241
1242 /* Return non-zero if the operands of expression X are unchanged from the
1243    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1244    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
1245
1246 static int
1247 oprs_unchanged_p (x, insn, avail_p)
1248      rtx x, insn;
1249      int avail_p;
1250 {
1251   int i, j;
1252   enum rtx_code code;
1253   const char *fmt;
1254
1255   if (x == 0)
1256     return 1;
1257
1258   code = GET_CODE (x);
1259   switch (code)
1260     {
1261     case REG:
1262       if (avail_p)
1263         return (reg_last_set[REGNO (x)] == NEVER_SET
1264                 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1265       else
1266         return (reg_first_set[REGNO (x)] == NEVER_SET
1267                 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1268
1269     case MEM:
1270       if (avail_p && mem_last_set != NEVER_SET
1271           && mem_last_set >= INSN_CUID (insn))
1272         return 0;
1273       else if (! avail_p && mem_first_set != NEVER_SET
1274                && mem_first_set < INSN_CUID (insn))
1275         return 0;
1276       else
1277         return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
1278
1279     case PRE_DEC:
1280     case PRE_INC:
1281     case POST_DEC:
1282     case POST_INC:
1283     case PRE_MODIFY:
1284     case POST_MODIFY:
1285       return 0;
1286
1287     case PC:
1288     case CC0: /*FIXME*/
1289     case CONST:
1290     case CONST_INT:
1291     case CONST_DOUBLE:
1292     case SYMBOL_REF:
1293     case LABEL_REF:
1294     case ADDR_VEC:
1295     case ADDR_DIFF_VEC:
1296       return 1;
1297
1298     default:
1299       break;
1300     }
1301
1302   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1303     {
1304       if (fmt[i] == 'e')
1305         {
1306           /* If we are about to do the last recursive call needed at this
1307              level, change it into iteration.  This function is called enough
1308              to be worth it.  */
1309           if (i == 0)
1310             return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
1311
1312           else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
1313             return 0;
1314         }
1315       else if (fmt[i] == 'E')
1316         for (j = 0; j < XVECLEN (x, i); j++)
1317           if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1318             return 0;
1319     }
1320
1321   return 1;
1322 }
1323
1324 /* Return non-zero if the operands of expression X are unchanged from
1325    the start of INSN's basic block up to but not including INSN.  */
1326
1327 static int
1328 oprs_anticipatable_p (x, insn)
1329      rtx x, insn;
1330 {
1331   return oprs_unchanged_p (x, insn, 0);
1332 }
1333
1334 /* Return non-zero if the operands of expression X are unchanged from
1335    INSN to the end of INSN's basic block.  */
1336
1337 static int
1338 oprs_available_p (x, insn)
1339      rtx x, insn;
1340 {
1341   return oprs_unchanged_p (x, insn, 1);
1342 }
1343
1344 /* Hash expression X.
1345
1346    MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
1347    indicating if a volatile operand is found or if the expression contains
1348    something we don't want to insert in the table.
1349
1350    ??? One might want to merge this with canon_hash.  Later.  */
1351
1352 static unsigned int
1353 hash_expr (x, mode, do_not_record_p, hash_table_size)
1354      rtx x;
1355      enum machine_mode mode;
1356      int *do_not_record_p;
1357      int hash_table_size;
1358 {
1359   unsigned int hash;
1360
1361   *do_not_record_p = 0;
1362
1363   hash = hash_expr_1 (x, mode, do_not_record_p);
1364   return hash % hash_table_size;
1365 }
1366
1367 /* Hash a string.  Just add its bytes up.  */
1368
1369 static inline unsigned
1370 hash_string_1 (ps)
1371      const char *ps;
1372 {
1373   unsigned hash = 0;
1374   const unsigned char *p = (const unsigned char *)ps;
1375
1376   if (p)
1377     while (*p)
1378       hash += *p++;
1379
1380   return hash;
1381 }
1382
1383 /* Subroutine of hash_expr to do the actual work.  */
1384
1385 static unsigned int
1386 hash_expr_1 (x, mode, do_not_record_p)
1387      rtx x;
1388      enum machine_mode mode;
1389      int *do_not_record_p;
1390 {
1391   int i, j;
1392   unsigned hash = 0;
1393   enum rtx_code code;
1394   const char *fmt;
1395
1396   /* Used to turn recursion into iteration.  We can't rely on GCC's
1397      tail-recursion eliminatio since we need to keep accumulating values
1398      in HASH.  */
1399
1400   if (x == 0)
1401     return hash;
1402
1403  repeat:
1404   code = GET_CODE (x);
1405   switch (code)
1406     {
1407     case REG:
1408       hash += ((unsigned int) REG << 7) + REGNO (x);
1409       return hash;
1410
1411     case CONST_INT:
1412       hash += (((unsigned int) CONST_INT << 7) + (unsigned int) mode
1413                + (unsigned int) INTVAL (x));
1414       return hash;
1415
1416     case CONST_DOUBLE:
1417       /* This is like the general case, except that it only counts
1418          the integers representing the constant.  */
1419       hash += (unsigned int) code + (unsigned int) GET_MODE (x);
1420       if (GET_MODE (x) != VOIDmode)
1421         for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1422           hash += (unsigned int) XWINT (x, i);
1423       else
1424         hash += ((unsigned int) CONST_DOUBLE_LOW (x)
1425                  + (unsigned int) CONST_DOUBLE_HIGH (x));
1426       return hash;
1427
1428       /* Assume there is only one rtx object for any given label.  */
1429     case LABEL_REF:
1430       /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1431          differences and differences between each stage's debugging dumps.  */
1432       hash += (((unsigned int) LABEL_REF << 7)
1433                + CODE_LABEL_NUMBER (XEXP (x, 0)));
1434       return hash;
1435
1436     case SYMBOL_REF:
1437       {
1438         /* Don't hash on the symbol's address to avoid bootstrap differences.
1439            Different hash values may cause expressions to be recorded in
1440            different orders and thus different registers to be used in the
1441            final assembler.  This also avoids differences in the dump files
1442            between various stages.  */
1443         unsigned int h = 0;
1444         const unsigned char *p = (const unsigned char *) XSTR (x, 0);
1445
1446         while (*p)
1447           h += (h << 7) + *p++; /* ??? revisit */
1448
1449         hash += ((unsigned int) SYMBOL_REF << 7) + h;
1450         return hash;
1451       }
1452
1453     case MEM:
1454       if (MEM_VOLATILE_P (x))
1455         {
1456           *do_not_record_p = 1;
1457           return 0;
1458         }
1459
1460       hash += (unsigned int) MEM;
1461       hash += MEM_ALIAS_SET (x);
1462       x = XEXP (x, 0);
1463       goto repeat;
1464
1465     case PRE_DEC:
1466     case PRE_INC:
1467     case POST_DEC:
1468     case POST_INC:
1469     case PC:
1470     case CC0:
1471     case CALL:
1472     case UNSPEC_VOLATILE:
1473       *do_not_record_p = 1;
1474       return 0;
1475
1476     case ASM_OPERANDS:
1477       if (MEM_VOLATILE_P (x))
1478         {
1479           *do_not_record_p = 1;
1480           return 0;
1481         }
1482       else
1483         {
1484           /* We don't want to take the filename and line into account.  */
1485           hash += (unsigned) code + (unsigned) GET_MODE (x)
1486             + hash_string_1 (ASM_OPERANDS_TEMPLATE (x))
1487             + hash_string_1 (ASM_OPERANDS_OUTPUT_CONSTRAINT (x))
1488             + (unsigned) ASM_OPERANDS_OUTPUT_IDX (x);
1489
1490           if (ASM_OPERANDS_INPUT_LENGTH (x))
1491             {
1492               for (i = 1; i < ASM_OPERANDS_INPUT_LENGTH (x); i++)
1493                 {
1494                   hash += (hash_expr_1 (ASM_OPERANDS_INPUT (x, i),
1495                                         GET_MODE (ASM_OPERANDS_INPUT (x, i)),
1496                                         do_not_record_p)
1497                            + hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT
1498                                             (x, i)));
1499                 }
1500
1501               hash += hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT (x, 0));
1502               x = ASM_OPERANDS_INPUT (x, 0);
1503               mode = GET_MODE (x);
1504               goto repeat;
1505             }
1506           return hash;
1507         }
1508
1509     default:
1510       break;
1511     }
1512
1513   hash += (unsigned) code + (unsigned) GET_MODE (x);
1514   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1515     {
1516       if (fmt[i] == 'e')
1517         {
1518           /* If we are about to do the last recursive call
1519              needed at this level, change it into iteration.
1520              This function is called enough to be worth it.  */
1521           if (i == 0)
1522             {
1523               x = XEXP (x, i);
1524               goto repeat;
1525             }
1526
1527           hash += hash_expr_1 (XEXP (x, i), 0, do_not_record_p);
1528           if (*do_not_record_p)
1529             return 0;
1530         }
1531
1532       else if (fmt[i] == 'E')
1533         for (j = 0; j < XVECLEN (x, i); j++)
1534           {
1535             hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1536             if (*do_not_record_p)
1537               return 0;
1538           }
1539
1540       else if (fmt[i] == 's')
1541         hash += hash_string_1 (XSTR (x, i));
1542       else if (fmt[i] == 'i')
1543         hash += (unsigned int) XINT (x, i);
1544       else
1545         abort ();
1546     }
1547
1548   return hash;
1549 }
1550
1551 /* Hash a set of register REGNO.
1552
1553    Sets are hashed on the register that is set.  This simplifies the PRE copy
1554    propagation code.
1555
1556    ??? May need to make things more elaborate.  Later, as necessary.  */
1557
1558 static unsigned int
1559 hash_set (regno, hash_table_size)
1560      int regno;
1561      int hash_table_size;
1562 {
1563   unsigned int hash;
1564
1565   hash = regno;
1566   return hash % hash_table_size;
1567 }
1568
1569 /* Return non-zero if exp1 is equivalent to exp2.
1570    ??? Borrowed from cse.c.  Might want to remerge with cse.c.  Later.  */
1571
1572 static int
1573 expr_equiv_p (x, y)
1574      rtx x, y;
1575 {
1576   register int i, j;
1577   register enum rtx_code code;
1578   register const char *fmt;
1579
1580   if (x == y)
1581     return 1;
1582
1583   if (x == 0 || y == 0)
1584     return x == y;
1585
1586   code = GET_CODE (x);
1587   if (code != GET_CODE (y))
1588     return 0;
1589
1590   /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.  */
1591   if (GET_MODE (x) != GET_MODE (y))
1592     return 0;
1593
1594   switch (code)
1595     {
1596     case PC:
1597     case CC0:
1598       return x == y;
1599
1600     case CONST_INT:
1601       return INTVAL (x) == INTVAL (y);
1602
1603     case LABEL_REF:
1604       return XEXP (x, 0) == XEXP (y, 0);
1605
1606     case SYMBOL_REF:
1607       return XSTR (x, 0) == XSTR (y, 0);
1608
1609     case REG:
1610       return REGNO (x) == REGNO (y);
1611
1612     case MEM:
1613       /* Can't merge two expressions in different alias sets, since we can
1614          decide that the expression is transparent in a block when it isn't,
1615          due to it being set with the different alias set.  */
1616       if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1617         return 0;
1618       break;
1619
1620     /*  For commutative operations, check both orders.  */
1621     case PLUS:
1622     case MULT:
1623     case AND:
1624     case IOR:
1625     case XOR:
1626     case NE:
1627     case EQ:
1628       return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1629                && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1630               || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1631                   && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1632
1633     case ASM_OPERANDS:
1634       /* We don't use the generic code below because we want to
1635          disregard filename and line numbers.  */
1636
1637       /* A volatile asm isn't equivalent to any other.  */
1638       if (MEM_VOLATILE_P (x) || MEM_VOLATILE_P (y))
1639         return 0;
1640
1641       if (GET_MODE (x) != GET_MODE (y)
1642           || strcmp (ASM_OPERANDS_TEMPLATE (x), ASM_OPERANDS_TEMPLATE (y))
1643           || strcmp (ASM_OPERANDS_OUTPUT_CONSTRAINT (x),
1644                      ASM_OPERANDS_OUTPUT_CONSTRAINT (y))
1645           || ASM_OPERANDS_OUTPUT_IDX (x) != ASM_OPERANDS_OUTPUT_IDX (y)
1646           || ASM_OPERANDS_INPUT_LENGTH (x) != ASM_OPERANDS_INPUT_LENGTH (y))
1647         return 0;
1648
1649       if (ASM_OPERANDS_INPUT_LENGTH (x))
1650         {
1651           for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
1652             if (! expr_equiv_p (ASM_OPERANDS_INPUT (x, i),
1653                                 ASM_OPERANDS_INPUT (y, i))
1654                 || strcmp (ASM_OPERANDS_INPUT_CONSTRAINT (x, i),
1655                            ASM_OPERANDS_INPUT_CONSTRAINT (y, i)))
1656               return 0;
1657         }
1658
1659       return 1;
1660
1661     default:
1662       break;
1663     }
1664
1665   /* Compare the elements.  If any pair of corresponding elements
1666      fail to match, return 0 for the whole thing.  */
1667
1668   fmt = GET_RTX_FORMAT (code);
1669   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1670     {
1671       switch (fmt[i])
1672         {
1673         case 'e':
1674           if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1675             return 0;
1676           break;
1677
1678         case 'E':
1679           if (XVECLEN (x, i) != XVECLEN (y, i))
1680             return 0;
1681           for (j = 0; j < XVECLEN (x, i); j++)
1682             if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1683               return 0;
1684           break;
1685
1686         case 's':
1687           if (strcmp (XSTR (x, i), XSTR (y, i)))
1688             return 0;
1689           break;
1690
1691         case 'i':
1692           if (XINT (x, i) != XINT (y, i))
1693             return 0;
1694           break;
1695
1696         case 'w':
1697           if (XWINT (x, i) != XWINT (y, i))
1698             return 0;
1699         break;
1700
1701         case '0':
1702           break;
1703
1704         default:
1705           abort ();
1706         }
1707       }
1708
1709   return 1;
1710 }
1711
1712 /* Insert expression X in INSN in the hash table.
1713    If it is already present, record it as the last occurrence in INSN's
1714    basic block.
1715
1716    MODE is the mode of the value X is being stored into.
1717    It is only used if X is a CONST_INT.
1718
1719    ANTIC_P is non-zero if X is an anticipatable expression.
1720    AVAIL_P is non-zero if X is an available expression.  */
1721
1722 static void
1723 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1724      rtx x;
1725      enum machine_mode mode;
1726      rtx insn;
1727      int antic_p, avail_p;
1728 {
1729   int found, do_not_record_p;
1730   unsigned int hash;
1731   struct expr *cur_expr, *last_expr = NULL;
1732   struct occr *antic_occr, *avail_occr;
1733   struct occr *last_occr = NULL;
1734
1735   hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1736
1737   /* Do not insert expression in table if it contains volatile operands,
1738      or if hash_expr determines the expression is something we don't want
1739      to or can't handle.  */
1740   if (do_not_record_p)
1741     return;
1742
1743   cur_expr = expr_hash_table[hash];
1744   found = 0;
1745
1746   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1747     {
1748       /* If the expression isn't found, save a pointer to the end of
1749          the list.  */
1750       last_expr = cur_expr;
1751       cur_expr = cur_expr->next_same_hash;
1752     }
1753
1754   if (! found)
1755     {
1756       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1757       bytes_used += sizeof (struct expr);
1758       if (expr_hash_table[hash] == NULL)
1759         /* This is the first pattern that hashed to this index.  */
1760         expr_hash_table[hash] = cur_expr;
1761       else
1762         /* Add EXPR to end of this hash chain.  */
1763         last_expr->next_same_hash = cur_expr;
1764
1765       /* Set the fields of the expr element.  */
1766       cur_expr->expr = x;
1767       cur_expr->bitmap_index = n_exprs++;
1768       cur_expr->next_same_hash = NULL;
1769       cur_expr->antic_occr = NULL;
1770       cur_expr->avail_occr = NULL;
1771     }
1772
1773   /* Now record the occurrence(s).  */
1774   if (antic_p)
1775     {
1776       antic_occr = cur_expr->antic_occr;
1777
1778       /* Search for another occurrence in the same basic block.  */
1779       while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1780         {
1781           /* If an occurrence isn't found, save a pointer to the end of
1782              the list.  */
1783           last_occr = antic_occr;
1784           antic_occr = antic_occr->next;
1785         }
1786
1787       if (antic_occr)
1788         /* Found another instance of the expression in the same basic block.
1789            Prefer the currently recorded one.  We want the first one in the
1790            block and the block is scanned from start to end.  */
1791         ; /* nothing to do */
1792       else
1793         {
1794           /* First occurrence of this expression in this basic block.  */
1795           antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1796           bytes_used += sizeof (struct occr);
1797           /* First occurrence of this expression in any block?  */
1798           if (cur_expr->antic_occr == NULL)
1799             cur_expr->antic_occr = antic_occr;
1800           else
1801             last_occr->next = antic_occr;
1802
1803           antic_occr->insn = insn;
1804           antic_occr->next = NULL;
1805         }
1806     }
1807
1808   if (avail_p)
1809     {
1810       avail_occr = cur_expr->avail_occr;
1811
1812       /* Search for another occurrence in the same basic block.  */
1813       while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1814         {
1815           /* If an occurrence isn't found, save a pointer to the end of
1816              the list.  */
1817           last_occr = avail_occr;
1818           avail_occr = avail_occr->next;
1819         }
1820
1821       if (avail_occr)
1822         /* Found another instance of the expression in the same basic block.
1823            Prefer this occurrence to the currently recorded one.  We want
1824            the last one in the block and the block is scanned from start
1825            to end.  */
1826         avail_occr->insn = insn;
1827       else
1828         {
1829           /* First occurrence of this expression in this basic block.  */
1830           avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1831           bytes_used += sizeof (struct occr);
1832
1833           /* First occurrence of this expression in any block?  */
1834           if (cur_expr->avail_occr == NULL)
1835             cur_expr->avail_occr = avail_occr;
1836           else
1837             last_occr->next = avail_occr;
1838
1839           avail_occr->insn = insn;
1840           avail_occr->next = NULL;
1841         }
1842     }
1843 }
1844
1845 /* Insert pattern X in INSN in the hash table.
1846    X is a SET of a reg to either another reg or a constant.
1847    If it is already present, record it as the last occurrence in INSN's
1848    basic block.  */
1849
1850 static void
1851 insert_set_in_table (x, insn)
1852      rtx x;
1853      rtx insn;
1854 {
1855   int found;
1856   unsigned int hash;
1857   struct expr *cur_expr, *last_expr = NULL;
1858   struct occr *cur_occr, *last_occr = NULL;
1859
1860   if (GET_CODE (x) != SET
1861       || GET_CODE (SET_DEST (x)) != REG)
1862     abort ();
1863
1864   hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1865
1866   cur_expr = set_hash_table[hash];
1867   found = 0;
1868
1869   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1870     {
1871       /* If the expression isn't found, save a pointer to the end of
1872          the list.  */
1873       last_expr = cur_expr;
1874       cur_expr = cur_expr->next_same_hash;
1875     }
1876
1877   if (! found)
1878     {
1879       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1880       bytes_used += sizeof (struct expr);
1881       if (set_hash_table[hash] == NULL)
1882         /* This is the first pattern that hashed to this index.  */
1883         set_hash_table[hash] = cur_expr;
1884       else
1885         /* Add EXPR to end of this hash chain.  */
1886         last_expr->next_same_hash = cur_expr;
1887
1888       /* Set the fields of the expr element.
1889          We must copy X because it can be modified when copy propagation is
1890          performed on its operands.  */
1891       cur_expr->expr = copy_rtx (x);
1892       cur_expr->bitmap_index = n_sets++;
1893       cur_expr->next_same_hash = NULL;
1894       cur_expr->antic_occr = NULL;
1895       cur_expr->avail_occr = NULL;
1896     }
1897
1898   /* Now record the occurrence.  */
1899   cur_occr = cur_expr->avail_occr;
1900
1901   /* Search for another occurrence in the same basic block.  */
1902   while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1903     {
1904       /* If an occurrence isn't found, save a pointer to the end of
1905          the list.  */
1906       last_occr = cur_occr;
1907       cur_occr = cur_occr->next;
1908     }
1909
1910   if (cur_occr)
1911     /* Found another instance of the expression in the same basic block.
1912        Prefer this occurrence to the currently recorded one.  We want the
1913        last one in the block and the block is scanned from start to end.  */
1914     cur_occr->insn = insn;
1915   else
1916     {
1917       /* First occurrence of this expression in this basic block.  */
1918       cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1919       bytes_used += sizeof (struct occr);
1920
1921       /* First occurrence of this expression in any block?  */
1922       if (cur_expr->avail_occr == NULL)
1923         cur_expr->avail_occr = cur_occr;
1924       else
1925         last_occr->next = cur_occr;
1926
1927       cur_occr->insn = insn;
1928       cur_occr->next = NULL;
1929     }
1930 }
1931
1932 /* Scan pattern PAT of INSN and add an entry to the hash table.  If SET_P is
1933    non-zero, this is for the assignment hash table, otherwise it is for the
1934    expression hash table.  */
1935
1936 static void
1937 hash_scan_set (pat, insn, set_p)
1938      rtx pat, insn;
1939      int set_p;
1940 {
1941   rtx src = SET_SRC (pat);
1942   rtx dest = SET_DEST (pat);
1943   rtx note;
1944
1945   if (GET_CODE (src) == CALL)
1946     hash_scan_call (src, insn);
1947
1948   else if (GET_CODE (dest) == REG)
1949     {
1950       unsigned int regno = REGNO (dest);
1951       rtx tmp;
1952
1953       /* If this is a single set and we are doing constant propagation,
1954          see if a REG_NOTE shows this equivalent to a constant.  */
1955       if (set_p && (note = find_reg_equal_equiv_note (insn)) != 0
1956           && CONSTANT_P (XEXP (note, 0)))
1957         src = XEXP (note, 0), pat = gen_rtx_SET (VOIDmode, dest, src);
1958
1959       /* Only record sets of pseudo-regs in the hash table.  */
1960       if (! set_p
1961           && regno >= FIRST_PSEUDO_REGISTER
1962           /* Don't GCSE something if we can't do a reg/reg copy.  */
1963           && can_copy_p [GET_MODE (dest)]
1964           /* Is SET_SRC something we want to gcse?  */
1965           && want_to_gcse_p (src)
1966           /* Don't CSE a nop.  */
1967           && src != dest)
1968         {
1969           /* An expression is not anticipatable if its operands are
1970              modified before this insn.  */
1971           int antic_p = oprs_anticipatable_p (src, insn);
1972           /* An expression is not available if its operands are
1973              subsequently modified, including this insn.  */
1974           int avail_p = oprs_available_p (src, insn);
1975
1976           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1977         }
1978
1979       /* Record sets for constant/copy propagation.  */
1980       else if (set_p
1981                && regno >= FIRST_PSEUDO_REGISTER
1982                && ((GET_CODE (src) == REG
1983                     && REGNO (src) >= FIRST_PSEUDO_REGISTER
1984                     && can_copy_p [GET_MODE (dest)]
1985                     && REGNO (src) != regno)
1986                    || GET_CODE (src) == CONST_INT
1987                    || GET_CODE (src) == SYMBOL_REF
1988                    || GET_CODE (src) == CONST_DOUBLE)
1989                /* A copy is not available if its src or dest is subsequently
1990                   modified.  Here we want to search from INSN+1 on, but
1991                   oprs_available_p searches from INSN on.  */
1992                && (insn == BLOCK_END (BLOCK_NUM (insn))
1993                    || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1994                        && oprs_available_p (pat, tmp))))
1995         insert_set_in_table (pat, insn);
1996     }
1997 }
1998
1999 static void
2000 hash_scan_clobber (x, insn)
2001      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
2002 {
2003   /* Currently nothing to do.  */
2004 }
2005
2006 static void
2007 hash_scan_call (x, insn)
2008      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
2009 {
2010   /* Currently nothing to do.  */
2011 }
2012
2013 /* Process INSN and add hash table entries as appropriate.
2014
2015    Only available expressions that set a single pseudo-reg are recorded.
2016
2017    Single sets in a PARALLEL could be handled, but it's an extra complication
2018    that isn't dealt with right now.  The trick is handling the CLOBBERs that
2019    are also in the PARALLEL.  Later.
2020
2021    If SET_P is non-zero, this is for the assignment hash table,
2022    otherwise it is for the expression hash table.
2023    If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
2024    not record any expressions.  */
2025
2026 static void
2027 hash_scan_insn (insn, set_p, in_libcall_block)
2028      rtx insn;
2029      int set_p;
2030      int in_libcall_block;
2031 {
2032   rtx pat = PATTERN (insn);
2033   int i;
2034
2035   if (in_libcall_block)
2036     return;
2037
2038   /* Pick out the sets of INSN and for other forms of instructions record
2039      what's been modified.  */
2040
2041   if (GET_CODE (pat) == SET)
2042     hash_scan_set (pat, insn, set_p);
2043   else if (GET_CODE (pat) == PARALLEL)
2044     for (i = 0; i < XVECLEN (pat, 0); i++)
2045       {
2046         rtx x = XVECEXP (pat, 0, i);
2047
2048         if (GET_CODE (x) == SET)
2049           hash_scan_set (x, insn, set_p);
2050         else if (GET_CODE (x) == CLOBBER)
2051           hash_scan_clobber (x, insn);
2052         else if (GET_CODE (x) == CALL)
2053           hash_scan_call (x, insn);
2054       }
2055
2056   else if (GET_CODE (pat) == CLOBBER)
2057     hash_scan_clobber (pat, insn);
2058   else if (GET_CODE (pat) == CALL)
2059     hash_scan_call (pat, insn);
2060 }
2061
2062 static void
2063 dump_hash_table (file, name, table, table_size, total_size)
2064      FILE *file;
2065      const char *name;
2066      struct expr **table;
2067      int table_size, total_size;
2068 {
2069   int i;
2070   /* Flattened out table, so it's printed in proper order.  */
2071   struct expr **flat_table;
2072   unsigned int *hash_val;
2073   struct expr *expr;
2074
2075   flat_table
2076     = (struct expr **) xcalloc (total_size, sizeof (struct expr *));
2077   hash_val = (unsigned int *) xmalloc (total_size * sizeof (unsigned int));
2078
2079   for (i = 0; i < table_size; i++)
2080     for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
2081       {
2082         flat_table[expr->bitmap_index] = expr;
2083         hash_val[expr->bitmap_index] = i;
2084       }
2085
2086   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2087            name, table_size, total_size);
2088
2089   for (i = 0; i < total_size; i++)
2090     if (flat_table[i] != 0)
2091       {
2092         expr = flat_table[i];
2093         fprintf (file, "Index %d (hash value %d)\n  ",
2094                  expr->bitmap_index, hash_val[i]);
2095         print_rtl (file, expr->expr);
2096         fprintf (file, "\n");
2097       }
2098
2099   fprintf (file, "\n");
2100
2101   free (flat_table);
2102   free (hash_val);
2103 }
2104
2105 /* Record register first/last/block set information for REGNO in INSN.
2106
2107    reg_first_set records the first place in the block where the register
2108    is set and is used to compute "anticipatability".
2109
2110    reg_last_set records the last place in the block where the register
2111    is set and is used to compute "availability".
2112
2113    reg_set_in_block records whether the register is set in the block
2114    and is used to compute "transparency".  */
2115
2116 static void
2117 record_last_reg_set_info (insn, regno)
2118      rtx insn;
2119      int regno;
2120 {
2121   if (reg_first_set[regno] == NEVER_SET)
2122     reg_first_set[regno] = INSN_CUID (insn);
2123
2124   reg_last_set[regno] = INSN_CUID (insn);
2125   SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2126 }
2127
2128 /* Record memory first/last/block set information for INSN.  */
2129
2130 static void
2131 record_last_mem_set_info (insn)
2132      rtx insn;
2133 {
2134   if (mem_first_set == NEVER_SET)
2135     mem_first_set = INSN_CUID (insn);
2136
2137   mem_last_set = INSN_CUID (insn);
2138   mem_set_in_block[BLOCK_NUM (insn)] = 1;
2139 }
2140
2141 /* Called from compute_hash_table via note_stores to handle one
2142    SET or CLOBBER in an insn.  DATA is really the instruction in which
2143    the SET is taking place.  */
2144
2145 static void
2146 record_last_set_info (dest, setter, data)
2147      rtx dest, setter ATTRIBUTE_UNUSED;
2148      void *data;
2149 {
2150   rtx last_set_insn = (rtx) data;
2151
2152   if (GET_CODE (dest) == SUBREG)
2153     dest = SUBREG_REG (dest);
2154
2155   if (GET_CODE (dest) == REG)
2156     record_last_reg_set_info (last_set_insn, REGNO (dest));
2157   else if (GET_CODE (dest) == MEM
2158            /* Ignore pushes, they clobber nothing.  */
2159            && ! push_operand (dest, GET_MODE (dest)))
2160     record_last_mem_set_info (last_set_insn);
2161 }
2162
2163 /* Top level function to create an expression or assignment hash table.
2164
2165    Expression entries are placed in the hash table if
2166    - they are of the form (set (pseudo-reg) src),
2167    - src is something we want to perform GCSE on,
2168    - none of the operands are subsequently modified in the block
2169
2170    Assignment entries are placed in the hash table if
2171    - they are of the form (set (pseudo-reg) src),
2172    - src is something we want to perform const/copy propagation on,
2173    - none of the operands or target are subsequently modified in the block
2174
2175    Currently src must be a pseudo-reg or a const_int.
2176
2177    F is the first insn.
2178    SET_P is non-zero for computing the assignment hash table.  */
2179
2180 static void
2181 compute_hash_table (set_p)
2182      int set_p;
2183 {
2184   int bb;
2185
2186   /* While we compute the hash table we also compute a bit array of which
2187      registers are set in which blocks.
2188      We also compute which blocks set memory, in the absence of aliasing
2189      support [which is TODO].
2190      ??? This isn't needed during const/copy propagation, but it's cheap to
2191      compute.  Later.  */
2192   sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2193   memset ((char *) mem_set_in_block, 0, n_basic_blocks);
2194
2195   /* Some working arrays used to track first and last set in each block.  */
2196   /* ??? One could use alloca here, but at some size a threshold is crossed
2197      beyond which one should use malloc.  Are we at that threshold here?  */
2198   reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2199   reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2200
2201   for (bb = 0; bb < n_basic_blocks; bb++)
2202     {
2203       rtx insn;
2204       unsigned int regno;
2205       int in_libcall_block;
2206       unsigned int i;
2207
2208       /* First pass over the instructions records information used to
2209          determine when registers and memory are first and last set.
2210          ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2211          could be moved to compute_sets since they currently don't change.  */
2212
2213       for (i = 0; i < max_gcse_regno; i++)
2214         reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2215
2216       mem_first_set = NEVER_SET;
2217       mem_last_set = NEVER_SET;
2218
2219       for (insn = BLOCK_HEAD (bb);
2220            insn && insn != NEXT_INSN (BLOCK_END (bb));
2221            insn = NEXT_INSN (insn))
2222         {
2223 #ifdef NON_SAVING_SETJMP
2224           if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2225               && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2226             {
2227               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2228                 record_last_reg_set_info (insn, regno);
2229               continue;
2230             }
2231 #endif
2232
2233           if (! INSN_P (insn))
2234             continue;
2235
2236           if (GET_CODE (insn) == CALL_INSN)
2237             {
2238               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2239                 if ((call_used_regs[regno]
2240                      && regno != STACK_POINTER_REGNUM
2241 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2242                      && regno != HARD_FRAME_POINTER_REGNUM
2243 #endif
2244 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2245                      && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2246 #endif
2247 #if !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2248                      && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2249 #endif
2250
2251                      && regno != FRAME_POINTER_REGNUM)
2252                     || global_regs[regno])
2253                   record_last_reg_set_info (insn, regno);
2254
2255               if (! CONST_CALL_P (insn))
2256                 record_last_mem_set_info (insn);
2257             }
2258
2259           note_stores (PATTERN (insn), record_last_set_info, insn);
2260         }
2261
2262       /* The next pass builds the hash table.  */
2263
2264       for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2265            insn && insn != NEXT_INSN (BLOCK_END (bb));
2266            insn = NEXT_INSN (insn))
2267         if (INSN_P (insn))
2268           {
2269             if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2270               in_libcall_block = 1;
2271             else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2272               in_libcall_block = 0;
2273             hash_scan_insn (insn, set_p, in_libcall_block);
2274         }
2275     }
2276
2277   free (reg_first_set);
2278   free (reg_last_set);
2279
2280   /* Catch bugs early.  */
2281   reg_first_set = reg_last_set = 0;
2282 }
2283
2284 /* Allocate space for the set hash table.
2285    N_INSNS is the number of instructions in the function.
2286    It is used to determine the number of buckets to use.  */
2287
2288 static void
2289 alloc_set_hash_table (n_insns)
2290      int n_insns;
2291 {
2292   int n;
2293
2294   set_hash_table_size = n_insns / 4;
2295   if (set_hash_table_size < 11)
2296     set_hash_table_size = 11;
2297
2298   /* Attempt to maintain efficient use of hash table.
2299      Making it an odd number is simplest for now.
2300      ??? Later take some measurements.  */
2301   set_hash_table_size |= 1;
2302   n = set_hash_table_size * sizeof (struct expr *);
2303   set_hash_table = (struct expr **) gmalloc (n);
2304 }
2305
2306 /* Free things allocated by alloc_set_hash_table.  */
2307
2308 static void
2309 free_set_hash_table ()
2310 {
2311   free (set_hash_table);
2312 }
2313
2314 /* Compute the hash table for doing copy/const propagation.  */
2315
2316 static void
2317 compute_set_hash_table ()
2318 {
2319   /* Initialize count of number of entries in hash table.  */
2320   n_sets = 0;
2321   memset ((char *) set_hash_table, 0,
2322          set_hash_table_size * sizeof (struct expr *));
2323
2324   compute_hash_table (1);
2325 }
2326
2327 /* Allocate space for the expression hash table.
2328    N_INSNS is the number of instructions in the function.
2329    It is used to determine the number of buckets to use.  */
2330
2331 static void
2332 alloc_expr_hash_table (n_insns)
2333      unsigned int n_insns;
2334 {
2335   int n;
2336
2337   expr_hash_table_size = n_insns / 2;
2338   /* Make sure the amount is usable.  */
2339   if (expr_hash_table_size < 11)
2340     expr_hash_table_size = 11;
2341
2342   /* Attempt to maintain efficient use of hash table.
2343      Making it an odd number is simplest for now.
2344      ??? Later take some measurements.  */
2345   expr_hash_table_size |= 1;
2346   n = expr_hash_table_size * sizeof (struct expr *);
2347   expr_hash_table = (struct expr **) gmalloc (n);
2348 }
2349
2350 /* Free things allocated by alloc_expr_hash_table.  */
2351
2352 static void
2353 free_expr_hash_table ()
2354 {
2355   free (expr_hash_table);
2356 }
2357
2358 /* Compute the hash table for doing GCSE.  */
2359
2360 static void
2361 compute_expr_hash_table ()
2362 {
2363   /* Initialize count of number of entries in hash table.  */
2364   n_exprs = 0;
2365   memset ((char *) expr_hash_table, 0,
2366          expr_hash_table_size * sizeof (struct expr *));
2367
2368   compute_hash_table (0);
2369 }
2370 \f
2371 /* Expression tracking support.  */
2372
2373 /* Lookup pattern PAT in the expression table.
2374    The result is a pointer to the table entry, or NULL if not found.  */
2375
2376 static struct expr *
2377 lookup_expr (pat)
2378      rtx pat;
2379 {
2380   int do_not_record_p;
2381   unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2382                                  expr_hash_table_size);
2383   struct expr *expr;
2384
2385   if (do_not_record_p)
2386     return NULL;
2387
2388   expr = expr_hash_table[hash];
2389
2390   while (expr && ! expr_equiv_p (expr->expr, pat))
2391     expr = expr->next_same_hash;
2392
2393   return expr;
2394 }
2395
2396 /* Lookup REGNO in the set table.  If PAT is non-NULL look for the entry that
2397    matches it, otherwise return the first entry for REGNO.  The result is a
2398    pointer to the table entry, or NULL if not found.  */
2399
2400 static struct expr *
2401 lookup_set (regno, pat)
2402      unsigned int regno;
2403      rtx pat;
2404 {
2405   unsigned int hash = hash_set (regno, set_hash_table_size);
2406   struct expr *expr;
2407
2408   expr = set_hash_table[hash];
2409
2410   if (pat)
2411     {
2412       while (expr && ! expr_equiv_p (expr->expr, pat))
2413         expr = expr->next_same_hash;
2414     }
2415   else
2416     {
2417       while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2418         expr = expr->next_same_hash;
2419     }
2420
2421   return expr;
2422 }
2423
2424 /* Return the next entry for REGNO in list EXPR.  */
2425
2426 static struct expr *
2427 next_set (regno, expr)
2428      unsigned int regno;
2429      struct expr *expr;
2430 {
2431   do
2432     expr = expr->next_same_hash;
2433   while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2434
2435   return expr;
2436 }
2437
2438 /* Reset tables used to keep track of what's still available [since the
2439    start of the block].  */
2440
2441 static void
2442 reset_opr_set_tables ()
2443 {
2444   /* Maintain a bitmap of which regs have been set since beginning of
2445      the block.  */
2446   sbitmap_zero (reg_set_bitmap);
2447
2448   /* Also keep a record of the last instruction to modify memory.
2449      For now this is very trivial, we only record whether any memory
2450      location has been modified.  */
2451   mem_last_set = 0;
2452 }
2453
2454 /* Return non-zero if the operands of X are not set before INSN in
2455    INSN's basic block.  */
2456
2457 static int
2458 oprs_not_set_p (x, insn)
2459      rtx x, insn;
2460 {
2461   int i, j;
2462   enum rtx_code code;
2463   const char *fmt;
2464
2465   if (x == 0)
2466     return 1;
2467
2468   code = GET_CODE (x);
2469   switch (code)
2470     {
2471     case PC:
2472     case CC0:
2473     case CONST:
2474     case CONST_INT:
2475     case CONST_DOUBLE:
2476     case SYMBOL_REF:
2477     case LABEL_REF:
2478     case ADDR_VEC:
2479     case ADDR_DIFF_VEC:
2480       return 1;
2481
2482     case MEM:
2483       if (mem_last_set != 0)
2484         return 0;
2485       else
2486         return oprs_not_set_p (XEXP (x, 0), insn);
2487
2488     case REG:
2489       return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2490
2491     default:
2492       break;
2493     }
2494
2495   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2496     {
2497       if (fmt[i] == 'e')
2498         {
2499           /* If we are about to do the last recursive call
2500              needed at this level, change it into iteration.
2501              This function is called enough to be worth it.  */
2502           if (i == 0)
2503             return oprs_not_set_p (XEXP (x, i), insn);
2504
2505           if (! oprs_not_set_p (XEXP (x, i), insn))
2506             return 0;
2507         }
2508       else if (fmt[i] == 'E')
2509         for (j = 0; j < XVECLEN (x, i); j++)
2510           if (! oprs_not_set_p (XVECEXP (x, i, j), insn))
2511             return 0;
2512     }
2513
2514   return 1;
2515 }
2516
2517 /* Mark things set by a CALL.  */
2518
2519 static void
2520 mark_call (insn)
2521      rtx insn;
2522 {
2523   mem_last_set = INSN_CUID (insn);
2524 }
2525
2526 /* Mark things set by a SET.  */
2527
2528 static void
2529 mark_set (pat, insn)
2530      rtx pat, insn;
2531 {
2532   rtx dest = SET_DEST (pat);
2533
2534   while (GET_CODE (dest) == SUBREG
2535          || GET_CODE (dest) == ZERO_EXTRACT
2536          || GET_CODE (dest) == SIGN_EXTRACT
2537          || GET_CODE (dest) == STRICT_LOW_PART)
2538     dest = XEXP (dest, 0);
2539
2540   if (GET_CODE (dest) == REG)
2541     SET_BIT (reg_set_bitmap, REGNO (dest));
2542   else if (GET_CODE (dest) == MEM)
2543     mem_last_set = INSN_CUID (insn);
2544
2545   if (GET_CODE (SET_SRC (pat)) == CALL)
2546     mark_call (insn);
2547 }
2548
2549 /* Record things set by a CLOBBER.  */
2550
2551 static void
2552 mark_clobber (pat, insn)
2553      rtx pat, insn;
2554 {
2555   rtx clob = XEXP (pat, 0);
2556
2557   while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2558     clob = XEXP (clob, 0);
2559
2560   if (GET_CODE (clob) == REG)
2561     SET_BIT (reg_set_bitmap, REGNO (clob));
2562   else
2563     mem_last_set = INSN_CUID (insn);
2564 }
2565
2566 /* Record things set by INSN.
2567    This data is used by oprs_not_set_p.  */
2568
2569 static void
2570 mark_oprs_set (insn)
2571      rtx insn;
2572 {
2573   rtx pat = PATTERN (insn);
2574   int i;
2575
2576   if (GET_CODE (pat) == SET)
2577     mark_set (pat, insn);
2578   else if (GET_CODE (pat) == PARALLEL)
2579     for (i = 0; i < XVECLEN (pat, 0); i++)
2580       {
2581         rtx x = XVECEXP (pat, 0, i);
2582
2583         if (GET_CODE (x) == SET)
2584           mark_set (x, insn);
2585         else if (GET_CODE (x) == CLOBBER)
2586           mark_clobber (x, insn);
2587         else if (GET_CODE (x) == CALL)
2588           mark_call (insn);
2589       }
2590
2591   else if (GET_CODE (pat) == CLOBBER)
2592     mark_clobber (pat, insn);
2593   else if (GET_CODE (pat) == CALL)
2594     mark_call (insn);
2595 }
2596
2597 \f
2598 /* Classic GCSE reaching definition support.  */
2599
2600 /* Allocate reaching def variables.  */
2601
2602 static void
2603 alloc_rd_mem (n_blocks, n_insns)
2604      int n_blocks, n_insns;
2605 {
2606   rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2607   sbitmap_vector_zero (rd_kill, n_basic_blocks);
2608
2609   rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2610   sbitmap_vector_zero (rd_gen, n_basic_blocks);
2611
2612   reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2613   sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2614
2615   rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2616   sbitmap_vector_zero (rd_out, n_basic_blocks);
2617 }
2618
2619 /* Free reaching def variables.  */
2620
2621 static void
2622 free_rd_mem ()
2623 {
2624   free (rd_kill);
2625   free (rd_gen);
2626   free (reaching_defs);
2627   free (rd_out);
2628 }
2629
2630 /* Add INSN to the kills of BB.  REGNO, set in BB, is killed by INSN.  */
2631
2632 static void
2633 handle_rd_kill_set (insn, regno, bb)
2634      rtx insn;
2635      int regno, bb;
2636 {
2637   struct reg_set *this_reg;
2638
2639   for (this_reg = reg_set_table[regno]; this_reg; this_reg = this_reg ->next)
2640     if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2641       SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2642 }
2643
2644 /* Compute the set of kill's for reaching definitions.  */
2645
2646 static void
2647 compute_kill_rd ()
2648 {
2649   int bb, cuid;
2650   unsigned int regno;
2651   int i;
2652
2653   /* For each block
2654        For each set bit in `gen' of the block (i.e each insn which
2655            generates a definition in the block)
2656          Call the reg set by the insn corresponding to that bit regx
2657          Look at the linked list starting at reg_set_table[regx]
2658          For each setting of regx in the linked list, which is not in
2659              this block
2660            Set the bit in `kill' corresponding to that insn.   */
2661   for (bb = 0; bb < n_basic_blocks; bb++)
2662     for (cuid = 0; cuid < max_cuid; cuid++)
2663       if (TEST_BIT (rd_gen[bb], cuid))
2664         {
2665           rtx insn = CUID_INSN (cuid);
2666           rtx pat = PATTERN (insn);
2667
2668           if (GET_CODE (insn) == CALL_INSN)
2669             {
2670               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2671                 {
2672                   if ((call_used_regs[regno]
2673                        && regno != STACK_POINTER_REGNUM
2674 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2675                        && regno != HARD_FRAME_POINTER_REGNUM
2676 #endif
2677 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2678                        && ! (regno == ARG_POINTER_REGNUM
2679                              && fixed_regs[regno])
2680 #endif
2681 #if !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2682                        && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2683 #endif
2684                        && regno != FRAME_POINTER_REGNUM)
2685                       || global_regs[regno])
2686                     handle_rd_kill_set (insn, regno, bb);
2687                 }
2688             }
2689
2690           if (GET_CODE (pat) == PARALLEL)
2691             {
2692               for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2693                 {
2694                   enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2695
2696                   if ((code == SET || code == CLOBBER)
2697                       && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2698                     handle_rd_kill_set (insn,
2699                                         REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2700                                         bb);
2701                 }
2702             }
2703           else if (GET_CODE (pat) == SET && GET_CODE (SET_DEST (pat)) == REG)
2704             /* Each setting of this register outside of this block
2705                must be marked in the set of kills in this block.  */
2706             handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2707         }
2708 }
2709
2710 /* Compute the reaching definitions as in
2711    Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2712    Chapter 10.  It is the same algorithm as used for computing available
2713    expressions but applied to the gens and kills of reaching definitions.  */
2714
2715 static void
2716 compute_rd ()
2717 {
2718   int bb, changed, passes;
2719
2720   for (bb = 0; bb < n_basic_blocks; bb++)
2721     sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2722
2723   passes = 0;
2724   changed = 1;
2725   while (changed)
2726     {
2727       changed = 0;
2728       for (bb = 0; bb < n_basic_blocks; bb++)
2729         {
2730           sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
2731           changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2732                                             reaching_defs[bb], rd_kill[bb]);
2733         }
2734       passes++;
2735     }
2736
2737   if (gcse_file)
2738     fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2739 }
2740 \f
2741 /* Classic GCSE available expression support.  */
2742
2743 /* Allocate memory for available expression computation.  */
2744
2745 static void
2746 alloc_avail_expr_mem (n_blocks, n_exprs)
2747      int n_blocks, n_exprs;
2748 {
2749   ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2750   sbitmap_vector_zero (ae_kill, n_basic_blocks);
2751
2752   ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2753   sbitmap_vector_zero (ae_gen, n_basic_blocks);
2754
2755   ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2756   sbitmap_vector_zero (ae_in, n_basic_blocks);
2757
2758   ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2759   sbitmap_vector_zero (ae_out, n_basic_blocks);
2760 }
2761
2762 static void
2763 free_avail_expr_mem ()
2764 {
2765   free (ae_kill);
2766   free (ae_gen);
2767   free (ae_in);
2768   free (ae_out);
2769 }
2770
2771 /* Compute the set of available expressions generated in each basic block.  */
2772
2773 static void
2774 compute_ae_gen ()
2775 {
2776   unsigned int i;
2777   struct expr *expr;
2778   struct occr *occr;
2779
2780   /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2781      This is all we have to do because an expression is not recorded if it
2782      is not available, and the only expressions we want to work with are the
2783      ones that are recorded.  */
2784   for (i = 0; i < expr_hash_table_size; i++)
2785     for (expr = expr_hash_table[i]; expr != 0; expr = expr->next_same_hash)
2786       for (occr = expr->avail_occr; occr != 0; occr = occr->next)
2787         SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2788 }
2789
2790 /* Return non-zero if expression X is killed in BB.  */
2791
2792 static int
2793 expr_killed_p (x, bb)
2794      rtx x;
2795      int bb;
2796 {
2797   int i, j;
2798   enum rtx_code code;
2799   const char *fmt;
2800
2801   if (x == 0)
2802     return 1;
2803
2804   code = GET_CODE (x);
2805   switch (code)
2806     {
2807     case REG:
2808       return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2809
2810     case MEM:
2811       if (mem_set_in_block[bb])
2812         return 1;
2813       else
2814         return expr_killed_p (XEXP (x, 0), bb);
2815
2816     case PC:
2817     case CC0: /*FIXME*/
2818     case CONST:
2819     case CONST_INT:
2820     case CONST_DOUBLE:
2821     case SYMBOL_REF:
2822     case LABEL_REF:
2823     case ADDR_VEC:
2824     case ADDR_DIFF_VEC:
2825       return 0;
2826
2827     default:
2828       break;
2829     }
2830
2831   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2832     {
2833       if (fmt[i] == 'e')
2834         {
2835           /* If we are about to do the last recursive call
2836              needed at this level, change it into iteration.
2837              This function is called enough to be worth it.  */
2838           if (i == 0)
2839             return expr_killed_p (XEXP (x, i), bb);
2840           else if (expr_killed_p (XEXP (x, i), bb))
2841             return 1;
2842         }
2843       else if (fmt[i] == 'E')
2844         for (j = 0; j < XVECLEN (x, i); j++)
2845           if (expr_killed_p (XVECEXP (x, i, j), bb))
2846             return 1;
2847     }
2848
2849   return 0;
2850 }
2851
2852 /* Compute the set of available expressions killed in each basic block.  */
2853
2854 static void
2855 compute_ae_kill (ae_gen, ae_kill)
2856      sbitmap *ae_gen, *ae_kill;
2857 {
2858   int bb;
2859   unsigned int i;
2860   struct expr *expr;
2861
2862   for (bb = 0; bb < n_basic_blocks; bb++)
2863     for (i = 0; i < expr_hash_table_size; i++)
2864       for (expr = expr_hash_table[i]; expr; expr = expr->next_same_hash)
2865         {
2866           /* Skip EXPR if generated in this block.  */
2867           if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2868             continue;
2869
2870           if (expr_killed_p (expr->expr, bb))
2871             SET_BIT (ae_kill[bb], expr->bitmap_index);
2872         }
2873 }
2874 \f
2875 /* Actually perform the Classic GCSE optimizations.  */
2876
2877 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2878
2879    CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2880    as a positive reach.  We want to do this when there are two computations
2881    of the expression in the block.
2882
2883    VISITED is a pointer to a working buffer for tracking which BB's have
2884    been visited.  It is NULL for the top-level call.
2885
2886    We treat reaching expressions that go through blocks containing the same
2887    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2888    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2889    2 as not reaching.  The intent is to improve the probability of finding
2890    only one reaching expression and to reduce register lifetimes by picking
2891    the closest such expression.  */
2892
2893 static int
2894 expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
2895      struct occr *occr;
2896      struct expr *expr;
2897      int bb;
2898      int check_self_loop;
2899      char *visited;
2900 {
2901   edge pred;
2902
2903   for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
2904     {
2905       int pred_bb = pred->src->index;
2906
2907       if (visited[pred_bb])
2908         /* This predecessor has already been visited. Nothing to do.  */
2909           ;
2910       else if (pred_bb == bb)
2911         {
2912           /* BB loops on itself.  */
2913           if (check_self_loop
2914               && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2915               && BLOCK_NUM (occr->insn) == pred_bb)
2916             return 1;
2917
2918           visited[pred_bb] = 1;
2919         }
2920
2921       /* Ignore this predecessor if it kills the expression.  */
2922       else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2923         visited[pred_bb] = 1;
2924
2925       /* Does this predecessor generate this expression?  */
2926       else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2927         {
2928           /* Is this the occurrence we're looking for?
2929              Note that there's only one generating occurrence per block
2930              so we just need to check the block number.  */
2931           if (BLOCK_NUM (occr->insn) == pred_bb)
2932             return 1;
2933
2934           visited[pred_bb] = 1;
2935         }
2936
2937       /* Neither gen nor kill.  */
2938       else
2939         {
2940           visited[pred_bb] = 1;
2941           if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
2942               visited))
2943
2944             return 1;
2945         }
2946     }
2947
2948   /* All paths have been checked.  */
2949   return 0;
2950 }
2951
2952 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
2953    memory allocated for that function is returned. */
2954
2955 static int
2956 expr_reaches_here_p (occr, expr, bb, check_self_loop)
2957      struct occr *occr;
2958      struct expr *expr;
2959      int bb;
2960      int check_self_loop;
2961 {
2962   int rval;
2963   char *visited = (char *) xcalloc (n_basic_blocks, 1);
2964
2965   rval = expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited);
2966
2967   free (visited);
2968   return rval;
2969 }
2970
2971 /* Return the instruction that computes EXPR that reaches INSN's basic block.
2972    If there is more than one such instruction, return NULL.
2973
2974    Called only by handle_avail_expr.  */
2975
2976 static rtx
2977 computing_insn (expr, insn)
2978      struct expr *expr;
2979      rtx insn;
2980 {
2981   int bb = BLOCK_NUM (insn);
2982
2983   if (expr->avail_occr->next == NULL)
2984     {
2985       if (BLOCK_NUM (expr->avail_occr->insn) == bb)
2986         /* The available expression is actually itself
2987            (i.e. a loop in the flow graph) so do nothing.  */
2988         return NULL;
2989
2990       /* (FIXME) Case that we found a pattern that was created by
2991          a substitution that took place.  */
2992       return expr->avail_occr->insn;
2993     }
2994   else
2995     {
2996       /* Pattern is computed more than once.
2997          Search backwards from this insn to see how many of these
2998          computations actually reach this insn.  */
2999       struct occr *occr;
3000       rtx insn_computes_expr = NULL;
3001       int can_reach = 0;
3002
3003       for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
3004         {
3005           if (BLOCK_NUM (occr->insn) == bb)
3006             {
3007               /* The expression is generated in this block.
3008                  The only time we care about this is when the expression
3009                  is generated later in the block [and thus there's a loop].
3010                  We let the normal cse pass handle the other cases.  */
3011               if (INSN_CUID (insn) < INSN_CUID (occr->insn)
3012                   && expr_reaches_here_p (occr, expr, bb, 1))
3013                 {
3014                   can_reach++;
3015                   if (can_reach > 1)
3016                     return NULL;
3017
3018                   insn_computes_expr = occr->insn;
3019                 }
3020             }
3021           else if (expr_reaches_here_p (occr, expr, bb, 0))
3022             {
3023               can_reach++;
3024               if (can_reach > 1)
3025                 return NULL;
3026
3027               insn_computes_expr = occr->insn;
3028             }
3029         }
3030
3031       if (insn_computes_expr == NULL)
3032         abort ();
3033
3034       return insn_computes_expr;
3035     }
3036 }
3037
3038 /* Return non-zero if the definition in DEF_INSN can reach INSN.
3039    Only called by can_disregard_other_sets.  */
3040
3041 static int
3042 def_reaches_here_p (insn, def_insn)
3043      rtx insn, def_insn;
3044 {
3045   rtx reg;
3046
3047   if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
3048     return 1;
3049
3050   if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
3051     {
3052       if (INSN_CUID (def_insn) < INSN_CUID (insn))
3053         {
3054           if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
3055             return 1;
3056           else if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
3057             reg = XEXP (PATTERN (def_insn), 0);
3058           else if (GET_CODE (PATTERN (def_insn)) == SET)
3059             reg = SET_DEST (PATTERN (def_insn));
3060           else
3061             abort ();
3062
3063           return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
3064         }
3065       else
3066         return 0;
3067     }
3068
3069   return 0;
3070 }
3071
3072 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN.  The
3073    value returned is the number of definitions that reach INSN.  Returning a
3074    value of zero means that [maybe] more than one definition reaches INSN and
3075    the caller can't perform whatever optimization it is trying.  i.e. it is
3076    always safe to return zero.  */
3077
3078 static int
3079 can_disregard_other_sets (addr_this_reg, insn, for_combine)
3080      struct reg_set **addr_this_reg;
3081      rtx insn;
3082      int for_combine;
3083 {
3084   int number_of_reaching_defs = 0;
3085   struct reg_set *this_reg;
3086
3087   for (this_reg = *addr_this_reg; this_reg != 0; this_reg = this_reg->next)
3088     if (def_reaches_here_p (insn, this_reg->insn))
3089       {
3090         number_of_reaching_defs++;
3091         /* Ignore parallels for now.  */
3092         if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3093           return 0;
3094
3095         if (!for_combine
3096             && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3097                 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3098                                   SET_SRC (PATTERN (insn)))))
3099           /* A setting of the reg to a different value reaches INSN.  */
3100           return 0;
3101
3102         if (number_of_reaching_defs > 1)
3103           {
3104             /* If in this setting the value the register is being set to is
3105                equal to the previous value the register was set to and this
3106                setting reaches the insn we are trying to do the substitution
3107                on then we are ok.  */
3108             if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3109               return 0;
3110             else if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3111                                     SET_SRC (PATTERN (insn))))
3112               return 0;
3113           }
3114
3115         *addr_this_reg = this_reg;
3116       }
3117
3118   return number_of_reaching_defs;
3119 }
3120
3121 /* Expression computed by insn is available and the substitution is legal,
3122    so try to perform the substitution.
3123
3124    The result is non-zero if any changes were made.  */
3125
3126 static int
3127 handle_avail_expr (insn, expr)
3128      rtx insn;
3129      struct expr *expr;
3130 {
3131   rtx pat, insn_computes_expr;
3132   rtx to;
3133   struct reg_set *this_reg;
3134   int found_setting, use_src;
3135   int changed = 0;
3136
3137   /* We only handle the case where one computation of the expression
3138      reaches this instruction.  */
3139   insn_computes_expr = computing_insn (expr, insn);
3140   if (insn_computes_expr == NULL)
3141     return 0;
3142
3143   found_setting = 0;
3144   use_src = 0;
3145
3146   /* At this point we know only one computation of EXPR outside of this
3147      block reaches this insn.  Now try to find a register that the
3148      expression is computed into.  */
3149   if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3150     {
3151       /* This is the case when the available expression that reaches
3152          here has already been handled as an available expression.  */
3153       unsigned int regnum_for_replacing
3154         = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3155
3156       /* If the register was created by GCSE we can't use `reg_set_table',
3157          however we know it's set only once.  */
3158       if (regnum_for_replacing >= max_gcse_regno
3159           /* If the register the expression is computed into is set only once,
3160              or only one set reaches this insn, we can use it.  */
3161           || (((this_reg = reg_set_table[regnum_for_replacing]),
3162                this_reg->next == NULL)
3163               || can_disregard_other_sets (&this_reg, insn, 0)))
3164        {
3165          use_src = 1;
3166          found_setting = 1;
3167        }
3168     }
3169
3170   if (!found_setting)
3171     {
3172       unsigned int regnum_for_replacing
3173         = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3174
3175       /* This shouldn't happen.  */
3176       if (regnum_for_replacing >= max_gcse_regno)
3177         abort ();
3178
3179       this_reg = reg_set_table[regnum_for_replacing];
3180
3181       /* If the register the expression is computed into is set only once,
3182          or only one set reaches this insn, use it.  */
3183       if (this_reg->next == NULL
3184           || can_disregard_other_sets (&this_reg, insn, 0))
3185         found_setting = 1;
3186     }
3187
3188   if (found_setting)
3189     {
3190       pat = PATTERN (insn);
3191       if (use_src)
3192         to = SET_SRC (PATTERN (insn_computes_expr));
3193       else
3194         to = SET_DEST (PATTERN (insn_computes_expr));
3195       changed = validate_change (insn, &SET_SRC (pat), to, 0);
3196
3197       /* We should be able to ignore the return code from validate_change but
3198          to play it safe we check.  */
3199       if (changed)
3200         {
3201           gcse_subst_count++;
3202           if (gcse_file != NULL)
3203             {
3204               fprintf (gcse_file, "GCSE: Replacing the source in insn %d with",
3205                        INSN_UID (insn));
3206               fprintf (gcse_file, " reg %d %s insn %d\n",
3207                        REGNO (to), use_src ? "from" : "set in",
3208                        INSN_UID (insn_computes_expr));
3209             }
3210         }
3211     }
3212
3213   /* The register that the expr is computed into is set more than once.  */
3214   else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3215     {
3216       /* Insert an insn after insnx that copies the reg set in insnx
3217          into a new pseudo register call this new register REGN.
3218          From insnb until end of basic block or until REGB is set
3219          replace all uses of REGB with REGN.  */
3220       rtx new_insn;
3221
3222       to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3223
3224       /* Generate the new insn.  */
3225       /* ??? If the change fails, we return 0, even though we created
3226          an insn.  I think this is ok.  */
3227       new_insn
3228         = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3229                                         SET_DEST (PATTERN
3230                                                   (insn_computes_expr))),
3231                            insn_computes_expr);
3232
3233       /* Keep block number table up to date.  */
3234       set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3235
3236       /* Keep register set table up to date.  */
3237       record_one_set (REGNO (to), new_insn);
3238
3239       gcse_create_count++;
3240       if (gcse_file != NULL)
3241         {
3242           fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d",
3243                    INSN_UID (NEXT_INSN (insn_computes_expr)),
3244                    REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))));
3245           fprintf (gcse_file, ", computed in insn %d,\n",
3246                    INSN_UID (insn_computes_expr));
3247           fprintf (gcse_file, "      into newly allocated reg %d\n",
3248                    REGNO (to));
3249         }
3250
3251       pat = PATTERN (insn);
3252
3253       /* Do register replacement for INSN.  */
3254       changed = validate_change (insn, &SET_SRC (pat),
3255                                  SET_DEST (PATTERN
3256                                            (NEXT_INSN (insn_computes_expr))),
3257                                  0);
3258
3259       /* We should be able to ignore the return code from validate_change but
3260          to play it safe we check.  */
3261       if (changed)
3262         {
3263           gcse_subst_count++;
3264           if (gcse_file != NULL)
3265             {
3266               fprintf (gcse_file,
3267                        "GCSE: Replacing the source in insn %d with reg %d ",
3268                        INSN_UID (insn),
3269                        REGNO (SET_DEST (PATTERN (NEXT_INSN
3270                                                  (insn_computes_expr)))));
3271               fprintf (gcse_file, "set in insn %d\n",
3272                        INSN_UID (insn_computes_expr));
3273             }
3274         }
3275     }
3276
3277   return changed;
3278 }
3279
3280 /* Perform classic GCSE.  This is called by one_classic_gcse_pass after all
3281    the dataflow analysis has been done.
3282
3283    The result is non-zero if a change was made.  */
3284
3285 static int
3286 classic_gcse ()
3287 {
3288   int bb, changed;
3289   rtx insn;
3290
3291   /* Note we start at block 1.  */
3292
3293   changed = 0;
3294   for (bb = 1; bb < n_basic_blocks; bb++)
3295     {
3296       /* Reset tables used to keep track of what's still valid [since the
3297          start of the block].  */
3298       reset_opr_set_tables ();
3299
3300       for (insn = BLOCK_HEAD (bb);
3301            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3302            insn = NEXT_INSN (insn))
3303         {
3304           /* Is insn of form (set (pseudo-reg) ...)?  */
3305           if (GET_CODE (insn) == INSN
3306               && GET_CODE (PATTERN (insn)) == SET
3307               && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3308               && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3309             {
3310               rtx pat = PATTERN (insn);
3311               rtx src = SET_SRC (pat);
3312               struct expr *expr;
3313
3314               if (want_to_gcse_p (src)
3315                   /* Is the expression recorded?  */
3316                   && ((expr = lookup_expr (src)) != NULL)
3317                   /* Is the expression available [at the start of the
3318                      block]?  */
3319                   && TEST_BIT (ae_in[bb], expr->bitmap_index)
3320                   /* Are the operands unchanged since the start of the
3321                      block?  */
3322                   && oprs_not_set_p (src, insn))
3323                 changed |= handle_avail_expr (insn, expr);
3324             }
3325
3326           /* Keep track of everything modified by this insn.  */
3327           /* ??? Need to be careful w.r.t. mods done to INSN.  */
3328           if (INSN_P (insn))
3329             mark_oprs_set (insn);
3330         }
3331     }
3332
3333   return changed;
3334 }
3335
3336 /* Top level routine to perform one classic GCSE pass.
3337
3338    Return non-zero if a change was made.  */
3339
3340 static int
3341 one_classic_gcse_pass (pass)
3342      int pass;
3343 {
3344   int changed = 0;
3345
3346   gcse_subst_count = 0;
3347   gcse_create_count = 0;
3348
3349   alloc_expr_hash_table (max_cuid);
3350   alloc_rd_mem (n_basic_blocks, max_cuid);
3351   compute_expr_hash_table ();
3352   if (gcse_file)
3353     dump_hash_table (gcse_file, "Expression", expr_hash_table,
3354                      expr_hash_table_size, n_exprs);
3355
3356   if (n_exprs > 0)
3357     {
3358       compute_kill_rd ();
3359       compute_rd ();
3360       alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3361       compute_ae_gen ();
3362       compute_ae_kill (ae_gen, ae_kill);
3363       compute_available (ae_gen, ae_kill, ae_out, ae_in);
3364       changed = classic_gcse ();
3365       free_avail_expr_mem ();
3366     }
3367
3368   free_rd_mem ();
3369   free_expr_hash_table ();
3370
3371   if (gcse_file)
3372     {
3373       fprintf (gcse_file, "\n");
3374       fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs,",
3375                current_function_name, pass, bytes_used, gcse_subst_count);
3376       fprintf (gcse_file, "%d insns created\n", gcse_create_count);
3377     }
3378
3379   return changed;
3380 }
3381 \f
3382 /* Compute copy/constant propagation working variables.  */
3383
3384 /* Local properties of assignments.  */
3385 static sbitmap *cprop_pavloc;
3386 static sbitmap *cprop_absaltered;
3387
3388 /* Global properties of assignments (computed from the local properties).  */
3389 static sbitmap *cprop_avin;
3390 static sbitmap *cprop_avout;
3391
3392 /* Allocate vars used for copy/const propagation.  N_BLOCKS is the number of
3393    basic blocks.  N_SETS is the number of sets.  */
3394
3395 static void
3396 alloc_cprop_mem (n_blocks, n_sets)
3397      int n_blocks, n_sets;
3398 {
3399   cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3400   cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3401
3402   cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3403   cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3404 }
3405
3406 /* Free vars used by copy/const propagation.  */
3407
3408 static void
3409 free_cprop_mem ()
3410 {
3411   free (cprop_pavloc);
3412   free (cprop_absaltered);
3413   free (cprop_avin);
3414   free (cprop_avout);
3415 }
3416
3417 /* For each block, compute whether X is transparent.  X is either an
3418    expression or an assignment [though we don't care which, for this context
3419    an assignment is treated as an expression].  For each block where an
3420    element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
3421    bit in BMAP.  */
3422
3423 static void
3424 compute_transp (x, indx, bmap, set_p)
3425      rtx x;
3426      int indx;
3427      sbitmap *bmap;
3428      int set_p;
3429 {
3430   int bb, i, j;
3431   enum rtx_code code;
3432   reg_set *r;
3433   const char *fmt;
3434
3435   /* repeat is used to turn tail-recursion into iteration since GCC
3436      can't do it when there's no return value.  */
3437  repeat:
3438
3439   if (x == 0)
3440     return;
3441
3442   code = GET_CODE (x);
3443   switch (code)
3444     {
3445     case REG:
3446       if (set_p)
3447         {
3448           if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3449             {
3450               for (bb = 0; bb < n_basic_blocks; bb++)
3451                 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3452                   SET_BIT (bmap[bb], indx);
3453             }
3454           else
3455             {
3456               for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3457                 SET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3458             }
3459         }
3460       else
3461         {
3462           if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3463             {
3464               for (bb = 0; bb < n_basic_blocks; bb++)
3465                 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3466                   RESET_BIT (bmap[bb], indx);
3467             }
3468           else
3469             {
3470               for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3471                 RESET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3472             }
3473         }
3474
3475       return;
3476
3477     case MEM:
3478       if (set_p)
3479         {
3480           for (bb = 0; bb < n_basic_blocks; bb++)
3481             if (mem_set_in_block[bb])
3482               SET_BIT (bmap[bb], indx);
3483         }
3484       else
3485         {
3486           for (bb = 0; bb < n_basic_blocks; bb++)
3487             if (mem_set_in_block[bb])
3488               RESET_BIT (bmap[bb], indx);
3489         }
3490
3491       x = XEXP (x, 0);
3492       goto repeat;
3493
3494     case PC:
3495     case CC0: /*FIXME*/
3496     case CONST:
3497     case CONST_INT:
3498     case CONST_DOUBLE:
3499     case SYMBOL_REF:
3500     case LABEL_REF:
3501     case ADDR_VEC:
3502     case ADDR_DIFF_VEC:
3503       return;
3504
3505     default:
3506       break;
3507     }
3508
3509   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3510     {
3511       if (fmt[i] == 'e')
3512         {
3513           /* If we are about to do the last recursive call
3514              needed at this level, change it into iteration.
3515              This function is called enough to be worth it.  */
3516           if (i == 0)
3517             {
3518               x = XEXP (x, i);
3519               goto repeat;
3520             }
3521
3522           compute_transp (XEXP (x, i), indx, bmap, set_p);
3523         }
3524       else if (fmt[i] == 'E')
3525         for (j = 0; j < XVECLEN (x, i); j++)
3526           compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3527     }
3528 }
3529
3530 /* Top level routine to do the dataflow analysis needed by copy/const
3531    propagation.  */
3532
3533 static void
3534 compute_cprop_data ()
3535 {
3536   compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
3537   compute_available (cprop_pavloc, cprop_absaltered,
3538                      cprop_avout, cprop_avin);
3539 }
3540 \f
3541 /* Copy/constant propagation.  */
3542
3543 /* Maximum number of register uses in an insn that we handle.  */
3544 #define MAX_USES 8
3545
3546 /* Table of uses found in an insn.
3547    Allocated statically to avoid alloc/free complexity and overhead.  */
3548 static struct reg_use reg_use_table[MAX_USES];
3549
3550 /* Index into `reg_use_table' while building it.  */
3551 static int reg_use_count;
3552
3553 /* Set up a list of register numbers used in INSN.  The found uses are stored
3554    in `reg_use_table'.  `reg_use_count' is initialized to zero before entry,
3555    and contains the number of uses in the table upon exit.
3556
3557    ??? If a register appears multiple times we will record it multiple times.
3558    This doesn't hurt anything but it will slow things down.  */
3559
3560 static void
3561 find_used_regs (x)
3562      rtx x;
3563 {
3564   int i, j;
3565   enum rtx_code code;
3566   const char *fmt;
3567
3568   /* repeat is used to turn tail-recursion into iteration since GCC
3569      can't do it when there's no return value.  */
3570  repeat:
3571
3572   if (x == 0)
3573     return;
3574
3575   code = GET_CODE (x);
3576   switch (code)
3577     {
3578     case REG:
3579       if (reg_use_count == MAX_USES)
3580         return;
3581
3582       reg_use_table[reg_use_count].reg_rtx = x;
3583       reg_use_count++;
3584       return;
3585
3586     case MEM:
3587       x = XEXP (x, 0);
3588       goto repeat;
3589
3590     case PC:
3591     case CC0:
3592     case CONST:
3593     case CONST_INT:
3594     case CONST_DOUBLE:
3595     case SYMBOL_REF:
3596     case LABEL_REF:
3597     case CLOBBER:
3598     case ADDR_VEC:
3599     case ADDR_DIFF_VEC:
3600     case ASM_INPUT: /*FIXME*/
3601       return;
3602
3603     case SET:
3604       if (GET_CODE (SET_DEST (x)) == MEM)
3605         find_used_regs (SET_DEST (x));
3606       x = SET_SRC (x);
3607       goto repeat;
3608
3609     default:
3610       break;
3611     }
3612
3613   /* Recursively scan the operands of this expression.  */
3614
3615   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3616     {
3617       if (fmt[i] == 'e')
3618         {
3619           /* If we are about to do the last recursive call
3620              needed at this level, change it into iteration.
3621              This function is called enough to be worth it.  */
3622           if (i == 0)
3623             {
3624               x = XEXP (x, 0);
3625               goto repeat;
3626             }
3627
3628           find_used_regs (XEXP (x, i));
3629         }
3630       else if (fmt[i] == 'E')
3631         for (j = 0; j < XVECLEN (x, i); j++)
3632           find_used_regs (XVECEXP (x, i, j));
3633     }
3634 }
3635
3636 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3637    Returns non-zero is successful.  */
3638
3639 static int
3640 try_replace_reg (from, to, insn)
3641      rtx from, to, insn;
3642 {
3643   rtx note = find_reg_equal_equiv_note (insn);
3644   rtx src = 0;
3645   int success = 0;
3646   rtx set = single_set (insn);
3647
3648   /* If this is a single set, try to simplify the source of the set given
3649      our substitution.  We could perhaps try this for multiple SETs, but
3650      it probably won't buy us anything.  */
3651   if (set != 0)
3652     {
3653       src = simplify_replace_rtx (SET_SRC (set), from, to);
3654
3655       /* Try this two ways: first just replace SET_SRC.  If that doesn't
3656          work and this is a PARALLEL, try to replace the whole pattern
3657          with a new SET.  */
3658       if (validate_change (insn, &SET_SRC (set), src, 0))
3659         success = 1;
3660       else if (GET_CODE (PATTERN (insn)) == PARALLEL
3661                && validate_change (insn, &PATTERN (insn),
3662                                    gen_rtx_SET (VOIDmode, SET_DEST (set),
3663                                                 src),
3664                                    0))
3665         success = 1;
3666     }
3667
3668   /* Otherwise, try to do a global replacement within the insn.  */
3669   if (!success)
3670     success = validate_replace_src (from, to, insn);
3671
3672   /* If we've failed to do replacement, have a single SET, and don't already
3673      have a note, add a REG_EQUAL note to not lose information.  */
3674   if (!success && note == 0 && set != 0)
3675     note = REG_NOTES (insn)
3676       = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
3677
3678   /* If there is already a NOTE, update the expression in it with our
3679      replacement.  */
3680   else if (note != 0)
3681     XEXP (note, 0) = simplify_replace_rtx (XEXP (note, 0), from, to);
3682
3683   /* REG_EQUAL may get simplified into register.
3684      We don't allow that. Remove that note. This code ought
3685      not to hapen, because previous code ought to syntetize
3686      reg-reg move, but be on the safe side.  */
3687   if (note && REG_P (XEXP (note, 0)))
3688     remove_note (insn, note);
3689
3690   return success;
3691 }
3692
3693 /* Find a set of REGNOs that are available on entry to INSN's block.  Returns
3694    NULL no such set is found.  */
3695
3696 static struct expr *
3697 find_avail_set (regno, insn)
3698      int regno;
3699      rtx insn;
3700 {
3701   /* SET1 contains the last set found that can be returned to the caller for
3702      use in a substitution.  */
3703   struct expr *set1 = 0;
3704
3705   /* Loops are not possible here.  To get a loop we would need two sets
3706      available at the start of the block containing INSN.  ie we would
3707      need two sets like this available at the start of the block:
3708
3709        (set (reg X) (reg Y))
3710        (set (reg Y) (reg X))
3711
3712      This can not happen since the set of (reg Y) would have killed the
3713      set of (reg X) making it unavailable at the start of this block.  */
3714   while (1)
3715      {
3716       rtx src;
3717       struct expr *set = lookup_set (regno, NULL_RTX);
3718
3719       /* Find a set that is available at the start of the block
3720          which contains INSN.  */
3721       while (set)
3722         {
3723           if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3724             break;
3725           set = next_set (regno, set);
3726         }
3727
3728       /* If no available set was found we've reached the end of the
3729          (possibly empty) copy chain.  */
3730       if (set == 0)
3731         break;
3732
3733       if (GET_CODE (set->expr) != SET)
3734         abort ();
3735
3736       src = SET_SRC (set->expr);
3737
3738       /* We know the set is available.
3739          Now check that SRC is ANTLOC (i.e. none of the source operands
3740          have changed since the start of the block).
3741
3742          If the source operand changed, we may still use it for the next
3743          iteration of this loop, but we may not use it for substitutions.  */
3744
3745       if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3746         set1 = set;
3747
3748       /* If the source of the set is anything except a register, then
3749          we have reached the end of the copy chain.  */
3750       if (GET_CODE (src) != REG)
3751         break;
3752
3753       /* Follow the copy chain, ie start another iteration of the loop
3754          and see if we have an available copy into SRC.  */
3755       regno = REGNO (src);
3756      }
3757
3758   /* SET1 holds the last set that was available and anticipatable at
3759      INSN.  */
3760   return set1;
3761 }
3762
3763 /* Subroutine of cprop_insn that tries to propagate constants into
3764    JUMP_INSNS.  INSN must be a conditional jump.  FROM is what we will try to
3765    replace, SRC is the constant we will try to substitute for it.  Returns
3766    nonzero if a change was made.  We know INSN has just a SET.  */
3767
3768 static int
3769 cprop_jump (insn, from, src)
3770      rtx insn;
3771      rtx from;
3772      rtx src;
3773 {
3774   rtx set = PATTERN (insn);
3775   rtx new = simplify_replace_rtx (SET_SRC (set), from, src);
3776
3777   /* If no simplification can be made, then try the next
3778      register.  */
3779   if (rtx_equal_p (new, SET_SRC (set)))
3780     return 0;
3781
3782   /* If this is now a no-op leave it that way, but update LABEL_NUSED if
3783      necessary.  */
3784   if (new == pc_rtx)
3785     {
3786       SET_SRC (set) = new;
3787
3788       if (JUMP_LABEL (insn) != 0)
3789         --LABEL_NUSES (JUMP_LABEL (insn));
3790     }
3791
3792   /* Otherwise, this must be a valid instruction.  */
3793   else if (! validate_change (insn, &SET_SRC (set), new, 0))
3794     return 0;
3795
3796   /* If this has turned into an unconditional jump,
3797      then put a barrier after it so that the unreachable
3798      code will be deleted.  */
3799   if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3800     emit_barrier_after (insn);
3801
3802   run_jump_opt_after_gcse = 1;
3803
3804   const_prop_count++;
3805   if (gcse_file != NULL)
3806     {
3807       fprintf (gcse_file,
3808                "CONST-PROP: Replacing reg %d in insn %d with constant ",
3809                REGNO (from), INSN_UID (insn));
3810       print_rtl (gcse_file, src);
3811       fprintf (gcse_file, "\n");
3812     }
3813
3814   return 1;
3815 }
3816
3817 #ifdef HAVE_cc0
3818
3819 /* Subroutine of cprop_insn that tries to propagate constants into JUMP_INSNS
3820    for machines that have CC0.  INSN is a single set that stores into CC0;
3821    the insn following it is a conditional jump.  REG_USED is the use we will
3822    try to replace, SRC is the constant we will try to substitute for it.
3823    Returns nonzero if a change was made.  */
3824
3825 static int
3826 cprop_cc0_jump (insn, reg_used, src)
3827      rtx insn;
3828      struct reg_use *reg_used;
3829      rtx src;
3830 {
3831   /* First substitute in the SET_SRC of INSN, then substitute that for
3832      CC0 in JUMP.  */
3833   rtx jump = NEXT_INSN (insn);
3834   rtx new_src = simplify_replace_rtx (SET_SRC (PATTERN (insn)),
3835                                       reg_used->reg_rtx, src);
3836
3837   if (! cprop_jump (jump, cc0_rtx, new_src))
3838     return 0;
3839
3840   /* If we succeeded, delete the cc0 setter.  */
3841   PUT_CODE (insn, NOTE);
3842   NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3843   NOTE_SOURCE_FILE (insn) = 0;
3844
3845   return 1;
3846  }
3847 #endif
3848
3849 /* Perform constant and copy propagation on INSN.
3850    The result is non-zero if a change was made.  */
3851
3852 static int
3853 cprop_insn (insn, alter_jumps)
3854      rtx insn;
3855      int alter_jumps;
3856 {
3857   struct reg_use *reg_used;
3858   int changed = 0;
3859   rtx note;
3860
3861   /* Only propagate into SETs.  Note that a conditional jump is a
3862      SET with pc_rtx as the destination.  */
3863   if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3864     return 0;
3865
3866   reg_use_count = 0;
3867   find_used_regs (PATTERN (insn));
3868
3869   note = find_reg_equal_equiv_note (insn);
3870
3871   /* We may win even when propagating constants into notes. */
3872   if (note)
3873     find_used_regs (XEXP (note, 0));
3874
3875   for (reg_used = &reg_use_table[0]; reg_use_count > 0;
3876        reg_used++, reg_use_count--)
3877     {
3878       unsigned int regno = REGNO (reg_used->reg_rtx);
3879       rtx pat, src;
3880       struct expr *set;
3881
3882       /* Ignore registers created by GCSE.
3883          We do this because ... */
3884       if (regno >= max_gcse_regno)
3885         continue;
3886
3887       /* If the register has already been set in this block, there's
3888          nothing we can do.  */
3889       if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3890         continue;
3891
3892       /* Find an assignment that sets reg_used and is available
3893          at the start of the block.  */
3894       set = find_avail_set (regno, insn);
3895       if (! set)
3896         continue;
3897
3898       pat = set->expr;
3899       /* ??? We might be able to handle PARALLELs.  Later.  */
3900       if (GET_CODE (pat) != SET)
3901         abort ();
3902
3903       src = SET_SRC (pat);
3904
3905       /* Constant propagation.  */
3906       if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
3907           || GET_CODE (src) == SYMBOL_REF)
3908         {
3909           /* Handle normal insns first.  */
3910           if (GET_CODE (insn) == INSN
3911               && try_replace_reg (reg_used->reg_rtx, src, insn))
3912             {
3913               changed = 1;
3914               const_prop_count++;
3915               if (gcse_file != NULL)
3916                 {
3917                   fprintf (gcse_file, "CONST-PROP: Replacing reg %d in ",
3918                            regno);
3919                   fprintf (gcse_file, "insn %d with constant ",
3920                            INSN_UID (insn));
3921                   print_rtl (gcse_file, src);
3922                   fprintf (gcse_file, "\n");
3923                 }
3924
3925               /* The original insn setting reg_used may or may not now be
3926                  deletable.  We leave the deletion to flow.  */
3927             }
3928
3929           /* Try to propagate a CONST_INT into a conditional jump.
3930              We're pretty specific about what we will handle in this
3931              code, we can extend this as necessary over time.
3932
3933              Right now the insn in question must look like
3934              (set (pc) (if_then_else ...))  */
3935           else if (alter_jumps
3936                    && GET_CODE (insn) == JUMP_INSN
3937                    && condjump_p (insn)
3938                    && ! simplejump_p (insn))
3939             changed |= cprop_jump (insn, reg_used->reg_rtx, src);
3940
3941 #ifdef HAVE_cc0
3942           /* Similar code for machines that use a pair of CC0 setter and
3943              conditional jump insn.  */
3944           else if (alter_jumps
3945                    && GET_CODE (PATTERN (insn)) == SET
3946                    && SET_DEST (PATTERN (insn)) == cc0_rtx
3947                    && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
3948                    && condjump_p (NEXT_INSN (insn))
3949                    && ! simplejump_p (NEXT_INSN (insn))
3950                    && cprop_cc0_jump (insn, reg_used, src))
3951             {
3952               changed = 1;
3953               break;
3954             }
3955 #endif
3956         }
3957       else if (GET_CODE (src) == REG
3958                && REGNO (src) >= FIRST_PSEUDO_REGISTER
3959                && REGNO (src) != regno)
3960         {
3961           if (try_replace_reg (reg_used->reg_rtx, src, insn))
3962             {
3963               changed = 1;
3964               copy_prop_count++;
3965               if (gcse_file != NULL)
3966                 {
3967                   fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d",
3968                            regno, INSN_UID (insn));
3969                   fprintf (gcse_file, " with reg %d\n", REGNO (src));
3970                 }
3971
3972               /* The original insn setting reg_used may or may not now be
3973                  deletable.  We leave the deletion to flow.  */
3974               /* FIXME: If it turns out that the insn isn't deletable,
3975                  then we may have unnecessarily extended register lifetimes
3976                  and made things worse.  */
3977             }
3978         }
3979     }
3980
3981   return changed;
3982 }
3983
3984 /* Forward propagate copies.  This includes copies and constants.  Return
3985    non-zero if a change was made.  */
3986
3987 static int
3988 cprop (alter_jumps)
3989      int alter_jumps;
3990 {
3991   int bb, changed;
3992   rtx insn;
3993
3994   /* Note we start at block 1.  */
3995
3996   changed = 0;
3997   for (bb = 1; bb < n_basic_blocks; bb++)
3998     {
3999       /* Reset tables used to keep track of what's still valid [since the
4000          start of the block].  */
4001       reset_opr_set_tables ();
4002
4003       for (insn = BLOCK_HEAD (bb);
4004            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
4005            insn = NEXT_INSN (insn))
4006         if (INSN_P (insn))
4007           {
4008             changed |= cprop_insn (insn, alter_jumps);
4009
4010             /* Keep track of everything modified by this insn.  */
4011             /* ??? Need to be careful w.r.t. mods done to INSN.  Don't
4012                call mark_oprs_set if we turned the insn into a NOTE.  */
4013             if (GET_CODE (insn) != NOTE)
4014               mark_oprs_set (insn);
4015         }
4016     }
4017
4018   if (gcse_file != NULL)
4019     fprintf (gcse_file, "\n");
4020
4021   return changed;
4022 }
4023
4024 /* Perform one copy/constant propagation pass.
4025    F is the first insn in the function.
4026    PASS is the pass count.  */
4027
4028 static int
4029 one_cprop_pass (pass, alter_jumps)
4030      int pass;
4031      int alter_jumps;
4032 {
4033   int changed = 0;
4034
4035   const_prop_count = 0;
4036   copy_prop_count = 0;
4037
4038   alloc_set_hash_table (max_cuid);
4039   compute_set_hash_table ();
4040   if (gcse_file)
4041     dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
4042                      n_sets);
4043   if (n_sets > 0)
4044     {
4045       alloc_cprop_mem (n_basic_blocks, n_sets);
4046       compute_cprop_data ();
4047       changed = cprop (alter_jumps);
4048       free_cprop_mem ();
4049     }
4050
4051   free_set_hash_table ();
4052
4053   if (gcse_file)
4054     {
4055       fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, ",
4056                current_function_name, pass, bytes_used);
4057       fprintf (gcse_file, "%d const props, %d copy props\n\n",
4058                const_prop_count, copy_prop_count);
4059     }
4060
4061   return changed;
4062 }
4063 \f
4064 /* Compute PRE+LCM working variables.  */
4065
4066 /* Local properties of expressions.  */
4067 /* Nonzero for expressions that are transparent in the block.  */
4068 static sbitmap *transp;
4069
4070 /* Nonzero for expressions that are transparent at the end of the block.
4071    This is only zero for expressions killed by abnormal critical edge
4072    created by a calls.  */
4073 static sbitmap *transpout;
4074
4075 /* Nonzero for expressions that are computed (available) in the block.  */
4076 static sbitmap *comp;
4077
4078 /* Nonzero for expressions that are locally anticipatable in the block.  */
4079 static sbitmap *antloc;
4080
4081 /* Nonzero for expressions where this block is an optimal computation
4082    point.  */
4083 static sbitmap *pre_optimal;
4084
4085 /* Nonzero for expressions which are redundant in a particular block.  */
4086 static sbitmap *pre_redundant;
4087
4088 /* Nonzero for expressions which should be inserted on a specific edge.  */
4089 static sbitmap *pre_insert_map;
4090
4091 /* Nonzero for expressions which should be deleted in a specific block.  */
4092 static sbitmap *pre_delete_map;
4093
4094 /* Contains the edge_list returned by pre_edge_lcm.  */
4095 static struct edge_list *edge_list;
4096
4097 /* Redundant insns.  */
4098 static sbitmap pre_redundant_insns;
4099
4100 /* Allocate vars used for PRE analysis.  */
4101
4102 static void
4103 alloc_pre_mem (n_blocks, n_exprs)
4104      int n_blocks, n_exprs;
4105 {
4106   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4107   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4108   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4109
4110   pre_optimal = NULL;
4111   pre_redundant = NULL;
4112   pre_insert_map = NULL;
4113   pre_delete_map = NULL;
4114   ae_in = NULL;
4115   ae_out = NULL;
4116   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
4117
4118   /* pre_insert and pre_delete are allocated later.  */
4119 }
4120
4121 /* Free vars used for PRE analysis.  */
4122
4123 static void
4124 free_pre_mem ()
4125 {
4126   free (transp);
4127   free (comp);
4128
4129   /* ANTLOC and AE_KILL are freed just after pre_lcm finishes.  */
4130
4131   if (pre_optimal)
4132     free (pre_optimal);
4133   if (pre_redundant)
4134     free (pre_redundant);
4135   if (pre_insert_map)
4136     free (pre_insert_map);
4137   if (pre_delete_map)
4138     free (pre_delete_map);
4139
4140   if (ae_in)
4141     free (ae_in);
4142   if (ae_out)
4143     free (ae_out);
4144
4145   transp = comp = NULL;
4146   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
4147   ae_in = ae_out = NULL;
4148 }
4149
4150 /* Top level routine to do the dataflow analysis needed by PRE.  */
4151
4152 static void
4153 compute_pre_data ()
4154 {
4155   sbitmap trapping_expr;
4156   int i;
4157   unsigned int ui;
4158
4159   compute_local_properties (transp, comp, antloc, 0);
4160   sbitmap_vector_zero (ae_kill, n_basic_blocks);
4161
4162   /* Collect expressions which might trap.  */
4163   trapping_expr = sbitmap_alloc (n_exprs);
4164   sbitmap_zero (trapping_expr);
4165   for (ui = 0; ui < expr_hash_table_size; ui++)
4166     {
4167       struct expr *e;
4168       for (e = expr_hash_table[ui]; e != NULL; e = e->next_same_hash)
4169         if (may_trap_p (e->expr))
4170           SET_BIT (trapping_expr, e->bitmap_index);
4171     }
4172
4173   /* Compute ae_kill for each basic block using:
4174
4175      ~(TRANSP | COMP)
4176
4177      This is significantly faster than compute_ae_kill.  */
4178
4179   for (i = 0; i < n_basic_blocks; i++)
4180     {
4181       edge e;
4182
4183       /* If the current block is the destination of an abnormal edge, we
4184          kill all trapping expressions because we won't be able to properly
4185          place the instruction on the edge.  So make them neither
4186          anticipatable nor transparent.  This is fairly conservative.  */
4187       for (e = BASIC_BLOCK (i)->pred; e ; e = e->pred_next)
4188         if (e->flags & EDGE_ABNORMAL)
4189           {
4190             sbitmap_difference (antloc[i], antloc[i], trapping_expr);
4191             sbitmap_difference (transp[i], transp[i], trapping_expr);
4192             break;
4193           }
4194
4195       sbitmap_a_or_b (ae_kill[i], transp[i], comp[i]);
4196       sbitmap_not (ae_kill[i], ae_kill[i]);
4197     }
4198
4199   edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
4200                             ae_kill, &pre_insert_map, &pre_delete_map);
4201   free (antloc);
4202   antloc = NULL;
4203   free (ae_kill);
4204   ae_kill = NULL;
4205   free (trapping_expr);
4206 }
4207 \f
4208 /* PRE utilities */
4209
4210 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4211    block BB.
4212
4213    VISITED is a pointer to a working buffer for tracking which BB's have
4214    been visited.  It is NULL for the top-level call.
4215
4216    We treat reaching expressions that go through blocks containing the same
4217    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
4218    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4219    2 as not reaching.  The intent is to improve the probability of finding
4220    only one reaching expression and to reduce register lifetimes by picking
4221    the closest such expression.  */
4222
4223 static int
4224 pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited)
4225      int occr_bb;
4226      struct expr *expr;
4227      int bb;
4228      char *visited;
4229 {
4230   edge pred;
4231
4232   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
4233     {
4234       int pred_bb = pred->src->index;
4235
4236       if (pred->src == ENTRY_BLOCK_PTR
4237           /* Has predecessor has already been visited?  */
4238           || visited[pred_bb])
4239         ;/* Nothing to do.  */
4240
4241       /* Does this predecessor generate this expression?  */
4242       else if (TEST_BIT (comp[pred_bb], expr->bitmap_index))
4243         {
4244           /* Is this the occurrence we're looking for?
4245              Note that there's only one generating occurrence per block
4246              so we just need to check the block number.  */
4247           if (occr_bb == pred_bb)
4248             return 1;
4249
4250           visited[pred_bb] = 1;
4251         }
4252       /* Ignore this predecessor if it kills the expression.  */
4253       else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
4254         visited[pred_bb] = 1;
4255
4256       /* Neither gen nor kill.  */
4257       else
4258         {
4259           visited[pred_bb] = 1;
4260           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
4261             return 1;
4262         }
4263     }
4264
4265   /* All paths have been checked.  */
4266   return 0;
4267 }
4268
4269 /* The wrapper for pre_expr_reaches_here_work that ensures that any
4270    memory allocated for that function is returned. */
4271
4272 static int
4273 pre_expr_reaches_here_p (occr_bb, expr, bb)
4274      int occr_bb;
4275      struct expr *expr;
4276      int bb;
4277 {
4278   int rval;
4279   char *visited = (char *) xcalloc (n_basic_blocks, 1);
4280
4281   rval = pre_expr_reaches_here_p_work(occr_bb, expr, bb, visited);
4282
4283   free (visited);
4284   return rval;
4285 }
4286 \f
4287
4288 /* Given an expr, generate RTL which we can insert at the end of a BB,
4289    or on an edge.  Set the block number of any insns generated to
4290    the value of BB.  */
4291
4292 static rtx
4293 process_insert_insn (expr)
4294      struct expr *expr;
4295 {
4296   rtx reg = expr->reaching_reg;
4297   rtx exp = copy_rtx (expr->expr);
4298   rtx pat;
4299
4300   start_sequence ();
4301
4302   /* If the expression is something that's an operand, like a constant,
4303      just copy it to a register.  */
4304   if (general_operand (exp, GET_MODE (reg)))
4305     emit_move_insn (reg, exp);
4306
4307   /* Otherwise, make a new insn to compute this expression and make sure the
4308      insn will be recognized (this also adds any needed CLOBBERs).  Copy the
4309      expression to make sure we don't have any sharing issues.  */
4310   else if (insn_invalid_p (emit_insn (gen_rtx_SET (VOIDmode, reg, exp))))
4311     abort ();
4312
4313   pat = gen_sequence ();
4314   end_sequence ();
4315
4316   return pat;
4317 }
4318
4319 /* Add EXPR to the end of basic block BB.
4320
4321    This is used by both the PRE and code hoisting.
4322
4323    For PRE, we want to verify that the expr is either transparent
4324    or locally anticipatable in the target block.  This check makes
4325    no sense for code hoisting.  */
4326
4327 static void
4328 insert_insn_end_bb (expr, bb, pre)
4329      struct expr *expr;
4330      int bb;
4331      int pre;
4332 {
4333   rtx insn = BLOCK_END (bb);
4334   rtx new_insn;
4335   rtx reg = expr->reaching_reg;
4336   int regno = REGNO (reg);
4337   rtx pat;
4338   int i;
4339
4340   pat = process_insert_insn (expr);
4341
4342   /* If the last insn is a jump, insert EXPR in front [taking care to
4343      handle cc0, etc. properly].  */
4344
4345   if (GET_CODE (insn) == JUMP_INSN)
4346     {
4347 #ifdef HAVE_cc0
4348       rtx note;
4349 #endif
4350
4351       /* If this is a jump table, then we can't insert stuff here.  Since
4352          we know the previous real insn must be the tablejump, we insert
4353          the new instruction just before the tablejump.  */
4354       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4355           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4356         insn = prev_real_insn (insn);
4357
4358 #ifdef HAVE_cc0
4359       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4360          if cc0 isn't set.  */
4361       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4362       if (note)
4363         insn = XEXP (note, 0);
4364       else
4365         {
4366           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4367           if (maybe_cc0_setter
4368               && INSN_P (maybe_cc0_setter)
4369               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4370             insn = maybe_cc0_setter;
4371         }
4372 #endif
4373       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
4374       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4375     }
4376
4377   /* Likewise if the last insn is a call, as will happen in the presence
4378      of exception handling.  */
4379   else if (GET_CODE (insn) == CALL_INSN)
4380     {
4381       HARD_REG_SET parm_regs;
4382       int nparm_regs;
4383       rtx p;
4384
4385       /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4386          we search backward and place the instructions before the first
4387          parameter is loaded.  Do this for everyone for consistency and a
4388          presumtion that we'll get better code elsewhere as well.
4389
4390          It should always be the case that we can put these instructions
4391          anywhere in the basic block with performing PRE optimizations.
4392          Check this.  */
4393
4394       if (pre
4395           && !TEST_BIT (antloc[bb], expr->bitmap_index)
4396           && !TEST_BIT (transp[bb], expr->bitmap_index))
4397         abort ();
4398
4399       /* Since different machines initialize their parameter registers
4400          in different orders, assume nothing.  Collect the set of all
4401          parameter registers.  */
4402       CLEAR_HARD_REG_SET (parm_regs);
4403       nparm_regs = 0;
4404       for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4405         if (GET_CODE (XEXP (p, 0)) == USE
4406             && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4407           {
4408             if (REGNO (XEXP (XEXP (p, 0), 0)) >= FIRST_PSEUDO_REGISTER)
4409               abort ();
4410
4411             SET_HARD_REG_BIT (parm_regs, REGNO (XEXP (XEXP (p, 0), 0)));
4412             nparm_regs++;
4413           }
4414
4415       /* Search backward for the first set of a register in this set.  */
4416       while (nparm_regs && BLOCK_HEAD (bb) != insn)
4417         {
4418           insn = PREV_INSN (insn);
4419           p = single_set (insn);
4420           if (p && GET_CODE (SET_DEST (p)) == REG
4421               && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4422               && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4423             {
4424               CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4425               nparm_regs--;
4426             }
4427         }
4428
4429       /* If we found all the parameter loads, then we want to insert
4430          before the first parameter load.
4431
4432          If we did not find all the parameter loads, then we might have
4433          stopped on the head of the block, which could be a CODE_LABEL.
4434          If we inserted before the CODE_LABEL, then we would be putting
4435          the insn in the wrong basic block.  In that case, put the insn
4436          after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
4437       while (GET_CODE (insn) == CODE_LABEL
4438              || NOTE_INSN_BASIC_BLOCK_P (insn))
4439         insn = NEXT_INSN (insn);
4440
4441       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4442     }
4443   else
4444     {
4445       new_insn = emit_insn_after (pat, insn);
4446       BLOCK_END (bb) = new_insn;
4447     }
4448
4449   /* Keep block number table up to date.
4450      Note, PAT could be a multiple insn sequence, we have to make
4451      sure that each insn in the sequence is handled.  */
4452   if (GET_CODE (pat) == SEQUENCE)
4453     {
4454       for (i = 0; i < XVECLEN (pat, 0); i++)
4455         {
4456           rtx insn = XVECEXP (pat, 0, i);
4457
4458           set_block_num (insn, bb);
4459           if (INSN_P (insn))
4460             add_label_notes (PATTERN (insn), new_insn);
4461
4462           note_stores (PATTERN (insn), record_set_info, insn);
4463         }
4464     }
4465   else
4466     {
4467       add_label_notes (SET_SRC (pat), new_insn);
4468       set_block_num (new_insn, bb);
4469
4470       /* Keep register set table up to date.  */
4471       record_one_set (regno, new_insn);
4472     }
4473
4474   gcse_create_count++;
4475
4476   if (gcse_file)
4477     {
4478       fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, ",
4479                bb, INSN_UID (new_insn));
4480       fprintf (gcse_file, "copying expression %d to reg %d\n",
4481                expr->bitmap_index, regno);
4482     }
4483 }
4484
4485 /* Insert partially redundant expressions on edges in the CFG to make
4486    the expressions fully redundant.  */
4487
4488 static int
4489 pre_edge_insert (edge_list, index_map)
4490      struct edge_list *edge_list;
4491      struct expr **index_map;
4492 {
4493   int e, i, j, num_edges, set_size, did_insert = 0;
4494   sbitmap *inserted;
4495
4496   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4497      if it reaches any of the deleted expressions.  */
4498
4499   set_size = pre_insert_map[0]->size;
4500   num_edges = NUM_EDGES (edge_list);
4501   inserted = sbitmap_vector_alloc (num_edges, n_exprs);
4502   sbitmap_vector_zero (inserted, num_edges);
4503
4504   for (e = 0; e < num_edges; e++)
4505     {
4506       int indx;
4507       basic_block pred = INDEX_EDGE_PRED_BB (edge_list, e);
4508       int bb = pred->index;
4509
4510       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4511         {
4512           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
4513
4514           for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
4515             if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4516               {
4517                 struct expr *expr = index_map[j];
4518                 struct occr *occr;
4519
4520                 /* Now look at each deleted occurence of this expression.  */
4521                 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4522                   {
4523                     if (! occr->deleted_p)
4524                       continue;
4525
4526                     /* Insert this expression on this edge if if it would
4527                        reach the deleted occurence in BB.  */
4528                     if (!TEST_BIT (inserted[e], j))
4529                       {
4530                         rtx insn;
4531                         edge eg = INDEX_EDGE (edge_list, e);
4532
4533                         /* We can't insert anything on an abnormal and
4534                            critical edge, so we insert the insn at the end of
4535                            the previous block. There are several alternatives
4536                            detailed in Morgans book P277 (sec 10.5) for
4537                            handling this situation.  This one is easiest for
4538                            now.  */
4539
4540                         if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
4541                           insert_insn_end_bb (index_map[j], bb, 0);
4542                         else
4543                           {
4544                             insn = process_insert_insn (index_map[j]);
4545                             insert_insn_on_edge (insn, eg);
4546                           }
4547
4548                         if (gcse_file)
4549                           {
4550                             fprintf (gcse_file, "PRE/HOIST: edge (%d,%d), ",
4551                                      bb,
4552                                      INDEX_EDGE_SUCC_BB (edge_list, e)->index);
4553                             fprintf (gcse_file, "copy expression %d\n",
4554                                      expr->bitmap_index);
4555                           }
4556
4557                         SET_BIT (inserted[e], j);
4558                         did_insert = 1;
4559                         gcse_create_count++;
4560                       }
4561                   }
4562               }
4563         }
4564     }
4565
4566   free (inserted);
4567   return did_insert;
4568 }
4569
4570 /* Copy the result of INSN to REG.  INDX is the expression number.  */
4571
4572 static void
4573 pre_insert_copy_insn (expr, insn)
4574      struct expr *expr;
4575      rtx insn;
4576 {
4577   rtx reg = expr->reaching_reg;
4578   int regno = REGNO (reg);
4579   int indx = expr->bitmap_index;
4580   rtx set = single_set (insn);
4581   rtx new_insn;
4582   int bb = BLOCK_NUM (insn);
4583
4584   if (!set)
4585     abort ();
4586
4587   new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4588                               insn);
4589
4590   /* Keep block number table up to date.  */
4591   set_block_num (new_insn, bb);
4592
4593   /* Keep register set table up to date.  */
4594   record_one_set (regno, new_insn);
4595   if (insn == BLOCK_END (bb))
4596     BLOCK_END (bb) = new_insn;
4597
4598   gcse_create_count++;
4599
4600   if (gcse_file)
4601     fprintf (gcse_file,
4602              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4603               BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4604               INSN_UID (insn), regno);
4605 }
4606
4607 /* Copy available expressions that reach the redundant expression
4608    to `reaching_reg'.  */
4609
4610 static void
4611 pre_insert_copies ()
4612 {
4613   unsigned int i;
4614   struct expr *expr;
4615   struct occr *occr;
4616   struct occr *avail;
4617
4618   /* For each available expression in the table, copy the result to
4619      `reaching_reg' if the expression reaches a deleted one.
4620
4621      ??? The current algorithm is rather brute force.
4622      Need to do some profiling.  */
4623
4624   for (i = 0; i < expr_hash_table_size; i++)
4625     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4626       {
4627         /* If the basic block isn't reachable, PPOUT will be TRUE.  However,
4628            we don't want to insert a copy here because the expression may not
4629            really be redundant.  So only insert an insn if the expression was
4630            deleted.  This test also avoids further processing if the
4631            expression wasn't deleted anywhere.  */
4632         if (expr->reaching_reg == NULL)
4633           continue;
4634
4635         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4636           {
4637             if (! occr->deleted_p)
4638               continue;
4639
4640             for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4641               {
4642                 rtx insn = avail->insn;
4643
4644                 /* No need to handle this one if handled already.  */
4645                 if (avail->copied_p)
4646                   continue;
4647
4648                 /* Don't handle this one if it's a redundant one.  */
4649                 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4650                   continue;
4651
4652                 /* Or if the expression doesn't reach the deleted one.  */
4653                 if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
4654                                                BLOCK_NUM (occr->insn)))
4655                   continue;
4656
4657                 /* Copy the result of avail to reaching_reg.  */
4658                 pre_insert_copy_insn (expr, insn);
4659                 avail->copied_p = 1;
4660               }
4661           }
4662       }
4663 }
4664
4665 /* Delete redundant computations.
4666    Deletion is done by changing the insn to copy the `reaching_reg' of
4667    the expression into the result of the SET.  It is left to later passes
4668    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4669
4670    Returns non-zero if a change is made.  */
4671
4672 static int
4673 pre_delete ()
4674 {
4675   unsigned int i;
4676   int changed;
4677   struct expr *expr;
4678   struct occr *occr;
4679
4680   changed = 0;
4681   for (i = 0; i < expr_hash_table_size; i++)
4682     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4683       {
4684         int indx = expr->bitmap_index;
4685
4686         /* We only need to search antic_occr since we require
4687            ANTLOC != 0.  */
4688
4689         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4690           {
4691             rtx insn = occr->insn;
4692             rtx set;
4693             int bb = BLOCK_NUM (insn);
4694
4695             if (TEST_BIT (pre_delete_map[bb], indx))
4696               {
4697                 set = single_set (insn);
4698                 if (! set)
4699                   abort ();
4700
4701                 /* Create a pseudo-reg to store the result of reaching
4702                    expressions into.  Get the mode for the new pseudo from
4703                    the mode of the original destination pseudo.  */
4704                 if (expr->reaching_reg == NULL)
4705                   expr->reaching_reg
4706                     = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4707
4708                 /* In theory this should never fail since we're creating
4709                    a reg->reg copy.
4710
4711                    However, on the x86 some of the movXX patterns actually
4712                    contain clobbers of scratch regs.  This may cause the
4713                    insn created by validate_change to not match any pattern
4714                    and thus cause validate_change to fail.   */
4715                 if (validate_change (insn, &SET_SRC (set),
4716                                      expr->reaching_reg, 0))
4717                   {
4718                     occr->deleted_p = 1;
4719                     SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4720                     changed = 1;
4721                     gcse_subst_count++;
4722                   }
4723
4724                 if (gcse_file)
4725                   {
4726                     fprintf (gcse_file,
4727                              "PRE: redundant insn %d (expression %d) in ",
4728                                INSN_UID (insn), indx);
4729                     fprintf (gcse_file, "bb %d, reaching reg is %d\n",
4730                              bb, REGNO (expr->reaching_reg));
4731                   }
4732               }
4733           }
4734       }
4735
4736   return changed;
4737 }
4738
4739 /* Perform GCSE optimizations using PRE.
4740    This is called by one_pre_gcse_pass after all the dataflow analysis
4741    has been done.
4742
4743    This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
4744    lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
4745    Compiler Design and Implementation.
4746
4747    ??? A new pseudo reg is created to hold the reaching expression.  The nice
4748    thing about the classical approach is that it would try to use an existing
4749    reg.  If the register can't be adequately optimized [i.e. we introduce
4750    reload problems], one could add a pass here to propagate the new register
4751    through the block.
4752
4753    ??? We don't handle single sets in PARALLELs because we're [currently] not
4754    able to copy the rest of the parallel when we insert copies to create full
4755    redundancies from partial redundancies.  However, there's no reason why we
4756    can't handle PARALLELs in the cases where there are no partial
4757    redundancies.  */
4758
4759 static int
4760 pre_gcse ()
4761 {
4762   unsigned int i;
4763   int did_insert, changed;
4764   struct expr **index_map;
4765   struct expr *expr;
4766
4767   /* Compute a mapping from expression number (`bitmap_index') to
4768      hash table entry.  */
4769
4770   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
4771   for (i = 0; i < expr_hash_table_size; i++)
4772     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4773       index_map[expr->bitmap_index] = expr;
4774
4775   /* Reset bitmap used to track which insns are redundant.  */
4776   pre_redundant_insns = sbitmap_alloc (max_cuid);
4777   sbitmap_zero (pre_redundant_insns);
4778
4779   /* Delete the redundant insns first so that
4780      - we know what register to use for the new insns and for the other
4781        ones with reaching expressions
4782      - we know which insns are redundant when we go to create copies  */
4783
4784   changed = pre_delete ();
4785
4786   did_insert = pre_edge_insert (edge_list, index_map);
4787
4788   /* In other places with reaching expressions, copy the expression to the
4789      specially allocated pseudo-reg that reaches the redundant expr.  */
4790   pre_insert_copies ();
4791   if (did_insert)
4792     {
4793       commit_edge_insertions ();
4794       changed = 1;
4795     }
4796
4797   free (index_map);
4798   free (pre_redundant_insns);
4799   return changed;
4800 }
4801
4802 /* Top level routine to perform one PRE GCSE pass.
4803
4804    Return non-zero if a change was made.  */
4805
4806 static int
4807 one_pre_gcse_pass (pass)
4808      int pass;
4809 {
4810   int changed = 0;
4811
4812   gcse_subst_count = 0;
4813   gcse_create_count = 0;
4814
4815   alloc_expr_hash_table (max_cuid);
4816   add_noreturn_fake_exit_edges ();
4817   compute_expr_hash_table ();
4818   if (gcse_file)
4819     dump_hash_table (gcse_file, "Expression", expr_hash_table,
4820                      expr_hash_table_size, n_exprs);
4821
4822   if (n_exprs > 0)
4823     {
4824       alloc_pre_mem (n_basic_blocks, n_exprs);
4825       compute_pre_data ();
4826       changed |= pre_gcse ();
4827       free_edge_list (edge_list);
4828       free_pre_mem ();
4829     }
4830
4831   remove_fake_edges ();
4832   free_expr_hash_table ();
4833
4834   if (gcse_file)
4835     {
4836       fprintf (gcse_file, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
4837                current_function_name, pass, bytes_used);
4838       fprintf (gcse_file, "%d substs, %d insns created\n",
4839                gcse_subst_count, gcse_create_count);
4840     }
4841
4842   return changed;
4843 }
4844 \f
4845 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4846    If notes are added to an insn which references a CODE_LABEL, the
4847    LABEL_NUSES count is incremented.  We have to add REG_LABEL notes,
4848    because the following loop optimization pass requires them.  */
4849
4850 /* ??? This is very similar to the loop.c add_label_notes function.  We
4851    could probably share code here.  */
4852
4853 /* ??? If there was a jump optimization pass after gcse and before loop,
4854    then we would not need to do this here, because jump would add the
4855    necessary REG_LABEL notes.  */
4856
4857 static void
4858 add_label_notes (x, insn)
4859      rtx x;
4860      rtx insn;
4861 {
4862   enum rtx_code code = GET_CODE (x);
4863   int i, j;
4864   const char *fmt;
4865
4866   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4867     {
4868       /* This code used to ignore labels that referred to dispatch tables to
4869          avoid flow generating (slighly) worse code.
4870
4871          We no longer ignore such label references (see LABEL_REF handling in
4872          mark_jump_label for additional information).  */
4873
4874       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4875                                             REG_NOTES (insn));
4876       if (LABEL_P (XEXP (x, 0)))
4877         LABEL_NUSES (XEXP (x, 0))++;
4878       return;
4879     }
4880
4881   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
4882     {
4883       if (fmt[i] == 'e')
4884         add_label_notes (XEXP (x, i), insn);
4885       else if (fmt[i] == 'E')
4886         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4887           add_label_notes (XVECEXP (x, i, j), insn);
4888     }
4889 }
4890
4891 /* Compute transparent outgoing information for each block.
4892
4893    An expression is transparent to an edge unless it is killed by
4894    the edge itself.  This can only happen with abnormal control flow,
4895    when the edge is traversed through a call.  This happens with
4896    non-local labels and exceptions.
4897
4898    This would not be necessary if we split the edge.  While this is
4899    normally impossible for abnormal critical edges, with some effort
4900    it should be possible with exception handling, since we still have
4901    control over which handler should be invoked.  But due to increased
4902    EH table sizes, this may not be worthwhile.  */
4903
4904 static void
4905 compute_transpout ()
4906 {
4907   int bb;
4908   unsigned int i;
4909   struct expr *expr;
4910
4911   sbitmap_vector_ones (transpout, n_basic_blocks);
4912
4913   for (bb = 0; bb < n_basic_blocks; ++bb)
4914     {
4915       /* Note that flow inserted a nop a the end of basic blocks that
4916          end in call instructions for reasons other than abnormal
4917          control flow.  */
4918       if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4919         continue;
4920
4921       for (i = 0; i < expr_hash_table_size; i++)
4922         for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4923           if (GET_CODE (expr->expr) == MEM)
4924             {
4925               if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
4926                   && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
4927                 continue;
4928
4929               /* ??? Optimally, we would use interprocedural alias
4930                  analysis to determine if this mem is actually killed
4931                  by this call.  */
4932               RESET_BIT (transpout[bb], expr->bitmap_index);
4933             }
4934     }
4935 }
4936
4937 /* Removal of useless null pointer checks */
4938
4939 /* Called via note_stores.  X is set by SETTER.  If X is a register we must
4940    invalidate nonnull_local and set nonnull_killed.  DATA is really a
4941    `null_pointer_info *'.
4942
4943    We ignore hard registers.  */
4944
4945 static void
4946 invalidate_nonnull_info (x, setter, data)
4947      rtx x;
4948      rtx setter ATTRIBUTE_UNUSED;
4949      void *data;
4950 {
4951   unsigned int regno;
4952   struct null_pointer_info *npi = (struct null_pointer_info *) data;
4953
4954   while (GET_CODE (x) == SUBREG)
4955     x = SUBREG_REG (x);
4956
4957   /* Ignore anything that is not a register or is a hard register.  */
4958   if (GET_CODE (x) != REG
4959       || REGNO (x) < npi->min_reg
4960       || REGNO (x) >= npi->max_reg)
4961     return;
4962
4963   regno = REGNO (x) - npi->min_reg;
4964
4965   RESET_BIT (npi->nonnull_local[npi->current_block], regno);
4966   SET_BIT (npi->nonnull_killed[npi->current_block], regno);
4967 }
4968
4969 /* Do null-pointer check elimination for the registers indicated in
4970    NPI.  NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
4971    they are not our responsibility to free.  */
4972
4973 static void
4974 delete_null_pointer_checks_1 (block_reg, nonnull_avin, nonnull_avout, npi)
4975      unsigned int *block_reg;
4976      sbitmap *nonnull_avin;
4977      sbitmap *nonnull_avout;
4978      struct null_pointer_info *npi;
4979 {
4980   int bb;
4981   int current_block;
4982   sbitmap *nonnull_local = npi->nonnull_local;
4983   sbitmap *nonnull_killed = npi->nonnull_killed;
4984
4985   /* Compute local properties, nonnull and killed.  A register will have
4986      the nonnull property if at the end of the current block its value is
4987      known to be nonnull.  The killed property indicates that somewhere in
4988      the block any information we had about the register is killed.
4989
4990      Note that a register can have both properties in a single block.  That
4991      indicates that it's killed, then later in the block a new value is
4992      computed.  */
4993   sbitmap_vector_zero (nonnull_local, n_basic_blocks);
4994   sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
4995
4996   for (current_block = 0; current_block < n_basic_blocks; current_block++)
4997     {
4998       rtx insn, stop_insn;
4999
5000       /* Set the current block for invalidate_nonnull_info.  */
5001       npi->current_block = current_block;
5002
5003       /* Scan each insn in the basic block looking for memory references and
5004          register sets.  */
5005       stop_insn = NEXT_INSN (BLOCK_END (current_block));
5006       for (insn = BLOCK_HEAD (current_block);
5007            insn != stop_insn;
5008            insn = NEXT_INSN (insn))
5009         {
5010           rtx set;
5011           rtx reg;
5012
5013           /* Ignore anything that is not a normal insn.  */
5014           if (! INSN_P (insn))
5015             continue;
5016
5017           /* Basically ignore anything that is not a simple SET.  We do have
5018              to make sure to invalidate nonnull_local and set nonnull_killed
5019              for such insns though.  */
5020           set = single_set (insn);
5021           if (!set)
5022             {
5023               note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5024               continue;
5025             }
5026
5027           /* See if we've got a useable memory load.  We handle it first
5028              in case it uses its address register as a dest (which kills
5029              the nonnull property).  */
5030           if (GET_CODE (SET_SRC (set)) == MEM
5031               && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
5032               && REGNO (reg) >= npi->min_reg
5033               && REGNO (reg) < npi->max_reg)
5034             SET_BIT (nonnull_local[current_block],
5035                      REGNO (reg) - npi->min_reg);
5036
5037           /* Now invalidate stuff clobbered by this insn.  */
5038           note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5039
5040           /* And handle stores, we do these last since any sets in INSN can
5041              not kill the nonnull property if it is derived from a MEM
5042              appearing in a SET_DEST.  */
5043           if (GET_CODE (SET_DEST (set)) == MEM
5044               && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
5045               && REGNO (reg) >= npi->min_reg
5046               && REGNO (reg) < npi->max_reg)
5047             SET_BIT (nonnull_local[current_block],
5048                      REGNO (reg) - npi->min_reg);
5049         }
5050     }
5051
5052   /* Now compute global properties based on the local properties.   This
5053      is a classic global availablity algorithm.  */
5054   compute_available (nonnull_local, nonnull_killed,
5055                      nonnull_avout, nonnull_avin);
5056
5057   /* Now look at each bb and see if it ends with a compare of a value
5058      against zero.  */
5059   for (bb = 0; bb < n_basic_blocks; bb++)
5060     {
5061       rtx last_insn = BLOCK_END (bb);
5062       rtx condition, earliest;
5063       int compare_and_branch;
5064
5065       /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
5066          since BLOCK_REG[BB] is zero if this block did not end with a
5067          comparison against zero, this condition works.  */
5068       if (block_reg[bb] < npi->min_reg
5069           || block_reg[bb] >= npi->max_reg)
5070         continue;
5071
5072       /* LAST_INSN is a conditional jump.  Get its condition.  */
5073       condition = get_condition (last_insn, &earliest);
5074
5075       /* If we can't determine the condition then skip.  */
5076       if (! condition)
5077         continue;
5078
5079       /* Is the register known to have a nonzero value?  */
5080       if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
5081         continue;
5082
5083       /* Try to compute whether the compare/branch at the loop end is one or
5084          two instructions.  */
5085       if (earliest == last_insn)
5086         compare_and_branch = 1;
5087       else if (earliest == prev_nonnote_insn (last_insn))
5088         compare_and_branch = 2;
5089       else
5090         continue;
5091
5092       /* We know the register in this comparison is nonnull at exit from
5093          this block.  We can optimize this comparison.  */
5094       if (GET_CODE (condition) == NE)
5095         {
5096           rtx new_jump;
5097
5098           new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
5099                                             last_insn);
5100           JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5101           LABEL_NUSES (JUMP_LABEL (new_jump))++;
5102           emit_barrier_after (new_jump);
5103         }
5104       delete_insn (last_insn);
5105       if (compare_and_branch == 2)
5106         delete_insn (earliest);
5107
5108       /* Don't check this block again.  (Note that BLOCK_END is
5109          invalid here; we deleted the last instruction in the
5110          block.)  */
5111       block_reg[bb] = 0;
5112     }
5113 }
5114
5115 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5116    at compile time.
5117
5118    This is conceptually similar to global constant/copy propagation and
5119    classic global CSE (it even uses the same dataflow equations as cprop).
5120
5121    If a register is used as memory address with the form (mem (reg)), then we
5122    know that REG can not be zero at that point in the program.  Any instruction
5123    which sets REG "kills" this property.
5124
5125    So, if every path leading to a conditional branch has an available memory
5126    reference of that form, then we know the register can not have the value
5127    zero at the conditional branch.
5128
5129    So we merely need to compute the local properies and propagate that data
5130    around the cfg, then optimize where possible.
5131
5132    We run this pass two times.  Once before CSE, then again after CSE.  This
5133    has proven to be the most profitable approach.  It is rare for new
5134    optimization opportunities of this nature to appear after the first CSE
5135    pass.
5136
5137    This could probably be integrated with global cprop with a little work.  */
5138
5139 void
5140 delete_null_pointer_checks (f)
5141      rtx f ATTRIBUTE_UNUSED;
5142 {
5143   sbitmap *nonnull_avin, *nonnull_avout;
5144   unsigned int *block_reg;
5145   int bb;
5146   int reg;
5147   int regs_per_pass;
5148   int max_reg;
5149   struct null_pointer_info npi;
5150
5151   /* If we have only a single block, then there's nothing to do.  */
5152   if (n_basic_blocks <= 1)
5153     return;
5154
5155   /* Trying to perform global optimizations on flow graphs which have
5156      a high connectivity will take a long time and is unlikely to be
5157      particularly useful.
5158
5159      In normal circumstances a cfg should have about twice has many edges
5160      as blocks.  But we do not want to punish small functions which have
5161      a couple switch statements.  So we require a relatively large number
5162      of basic blocks and the ratio of edges to blocks to be high.  */
5163   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
5164     return;
5165
5166   /* We need four bitmaps, each with a bit for each register in each
5167      basic block.  */
5168   max_reg = max_reg_num ();
5169   regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
5170
5171   /* Allocate bitmaps to hold local and global properties.  */
5172   npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5173   npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5174   nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5175   nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5176
5177   /* Go through the basic blocks, seeing whether or not each block
5178      ends with a conditional branch whose condition is a comparison
5179      against zero.  Record the register compared in BLOCK_REG.  */
5180   block_reg = (unsigned int *) xcalloc (n_basic_blocks, sizeof (int));
5181   for (bb = 0; bb < n_basic_blocks; bb++)
5182     {
5183       rtx last_insn = BLOCK_END (bb);
5184       rtx condition, earliest, reg;
5185
5186       /* We only want conditional branches.  */
5187       if (GET_CODE (last_insn) != JUMP_INSN
5188           || !any_condjump_p (last_insn)
5189           || !onlyjump_p (last_insn))
5190         continue;
5191
5192       /* LAST_INSN is a conditional jump.  Get its condition.  */
5193       condition = get_condition (last_insn, &earliest);
5194
5195       /* If we were unable to get the condition, or it is not a equality
5196          comparison against zero then there's nothing we can do.  */
5197       if (!condition
5198           || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5199           || GET_CODE (XEXP (condition, 1)) != CONST_INT
5200           || (XEXP (condition, 1)
5201               != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
5202         continue;
5203
5204       /* We must be checking a register against zero.  */
5205       reg = XEXP (condition, 0);
5206       if (GET_CODE (reg) != REG)
5207         continue;
5208
5209       block_reg[bb] = REGNO (reg);
5210     }
5211
5212   /* Go through the algorithm for each block of registers.  */
5213   for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
5214     {
5215       npi.min_reg = reg;
5216       npi.max_reg = MIN (reg + regs_per_pass, max_reg);
5217       delete_null_pointer_checks_1 (block_reg, nonnull_avin,
5218                                     nonnull_avout, &npi);
5219     }
5220
5221   /* Free the table of registers compared at the end of every block.  */
5222   free (block_reg);
5223
5224   /* Free bitmaps.  */
5225   free (npi.nonnull_local);
5226   free (npi.nonnull_killed);
5227   free (nonnull_avin);
5228   free (nonnull_avout);
5229 }
5230
5231 /* Code Hoisting variables and subroutines.  */
5232
5233 /* Very busy expressions.  */
5234 static sbitmap *hoist_vbein;
5235 static sbitmap *hoist_vbeout;
5236
5237 /* Hoistable expressions.  */
5238 static sbitmap *hoist_exprs;
5239
5240 /* Dominator bitmaps.  */
5241 static sbitmap *dominators;
5242
5243 /* ??? We could compute post dominators and run this algorithm in
5244    reverse to to perform tail merging, doing so would probably be
5245    more effective than the tail merging code in jump.c.
5246
5247    It's unclear if tail merging could be run in parallel with
5248    code hoisting.  It would be nice.  */
5249
5250 /* Allocate vars used for code hoisting analysis.  */
5251
5252 static void
5253 alloc_code_hoist_mem (n_blocks, n_exprs)
5254      int n_blocks, n_exprs;
5255 {
5256   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5257   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5258   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5259
5260   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5261   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5262   hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5263   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5264
5265   dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5266 }
5267
5268 /* Free vars used for code hoisting analysis.  */
5269
5270 static void
5271 free_code_hoist_mem ()
5272 {
5273   free (antloc);
5274   free (transp);
5275   free (comp);
5276
5277   free (hoist_vbein);
5278   free (hoist_vbeout);
5279   free (hoist_exprs);
5280   free (transpout);
5281
5282   free (dominators);
5283 }
5284
5285 /* Compute the very busy expressions at entry/exit from each block.
5286
5287    An expression is very busy if all paths from a given point
5288    compute the expression.  */
5289
5290 static void
5291 compute_code_hoist_vbeinout ()
5292 {
5293   int bb, changed, passes;
5294
5295   sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5296   sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5297
5298   passes = 0;
5299   changed = 1;
5300
5301   while (changed)
5302     {
5303       changed = 0;
5304
5305       /* We scan the blocks in the reverse order to speed up
5306          the convergence.  */
5307       for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5308         {
5309           changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5310                                            hoist_vbeout[bb], transp[bb]);
5311           if (bb != n_basic_blocks - 1)
5312             sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
5313         }
5314
5315       passes++;
5316     }
5317
5318   if (gcse_file)
5319     fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5320 }
5321
5322 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
5323
5324 static void
5325 compute_code_hoist_data ()
5326 {
5327   compute_local_properties (transp, comp, antloc, 0);
5328   compute_transpout ();
5329   compute_code_hoist_vbeinout ();
5330   calculate_dominance_info (NULL, dominators, CDI_DOMINATORS);
5331   if (gcse_file)
5332     fprintf (gcse_file, "\n");
5333 }
5334
5335 /* Determine if the expression identified by EXPR_INDEX would
5336    reach BB unimpared if it was placed at the end of EXPR_BB.
5337
5338    It's unclear exactly what Muchnick meant by "unimpared".  It seems
5339    to me that the expression must either be computed or transparent in
5340    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
5341    would allow the expression to be hoisted out of loops, even if
5342    the expression wasn't a loop invariant.
5343
5344    Contrast this to reachability for PRE where an expression is
5345    considered reachable if *any* path reaches instead of *all*
5346    paths.  */
5347
5348 static int
5349 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5350      int expr_bb;
5351      int expr_index;
5352      int bb;
5353      char *visited;
5354 {
5355   edge pred;
5356   int visited_allocated_locally = 0;
5357
5358
5359   if (visited == NULL)
5360     {
5361        visited_allocated_locally = 1;
5362        visited = xcalloc (n_basic_blocks, 1);
5363     }
5364
5365   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5366     {
5367       int pred_bb = pred->src->index;
5368
5369       if (pred->src == ENTRY_BLOCK_PTR)
5370         break;
5371       else if (visited[pred_bb])
5372         continue;
5373
5374       /* Does this predecessor generate this expression?  */
5375       else if (TEST_BIT (comp[pred_bb], expr_index))
5376         break;
5377       else if (! TEST_BIT (transp[pred_bb], expr_index))
5378         break;
5379
5380       /* Not killed.  */
5381       else
5382         {
5383           visited[pred_bb] = 1;
5384           if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5385                                            pred_bb, visited))
5386             break;
5387         }
5388     }
5389   if (visited_allocated_locally)
5390     free (visited);
5391
5392   return (pred == NULL);
5393 }
5394 \f
5395 /* Actually perform code hoisting.  */
5396
5397 static void
5398 hoist_code ()
5399 {
5400   int bb, dominated;
5401   unsigned int i;
5402   struct expr **index_map;
5403   struct expr *expr;
5404
5405   sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5406
5407   /* Compute a mapping from expression number (`bitmap_index') to
5408      hash table entry.  */
5409
5410   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
5411   for (i = 0; i < expr_hash_table_size; i++)
5412     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5413       index_map[expr->bitmap_index] = expr;
5414
5415   /* Walk over each basic block looking for potentially hoistable
5416      expressions, nothing gets hoisted from the entry block.  */
5417   for (bb = 0; bb < n_basic_blocks; bb++)
5418     {
5419       int found = 0;
5420       int insn_inserted_p;
5421
5422       /* Examine each expression that is very busy at the exit of this
5423          block.  These are the potentially hoistable expressions.  */
5424       for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5425         {
5426           int hoistable = 0;
5427
5428           if (TEST_BIT (hoist_vbeout[bb], i) && TEST_BIT (transpout[bb], i))
5429             {
5430               /* We've found a potentially hoistable expression, now
5431                  we look at every block BB dominates to see if it
5432                  computes the expression.  */
5433               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5434                 {
5435                   /* Ignore self dominance.  */
5436                   if (bb == dominated
5437                       || ! TEST_BIT (dominators[dominated], bb))
5438                     continue;
5439
5440                   /* We've found a dominated block, now see if it computes
5441                      the busy expression and whether or not moving that
5442                      expression to the "beginning" of that block is safe.  */
5443                   if (!TEST_BIT (antloc[dominated], i))
5444                     continue;
5445
5446                   /* Note if the expression would reach the dominated block
5447                      unimpared if it was placed at the end of BB.
5448
5449                      Keep track of how many times this expression is hoistable
5450                      from a dominated block into BB.  */
5451                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5452                     hoistable++;
5453                 }
5454
5455               /* If we found more than one hoistable occurence of this
5456                  expression, then note it in the bitmap of expressions to
5457                  hoist.  It makes no sense to hoist things which are computed
5458                  in only one BB, and doing so tends to pessimize register
5459                  allocation.  One could increase this value to try harder
5460                  to avoid any possible code expansion due to register
5461                  allocation issues; however experiments have shown that
5462                  the vast majority of hoistable expressions are only movable
5463                  from two successors, so raising this threshhold is likely
5464                  to nullify any benefit we get from code hoisting.  */
5465               if (hoistable > 1)
5466                 {
5467                   SET_BIT (hoist_exprs[bb], i);
5468                   found = 1;
5469                 }
5470             }
5471         }
5472
5473       /* If we found nothing to hoist, then quit now.  */
5474       if (! found)
5475         continue;
5476
5477       /* Loop over all the hoistable expressions.  */
5478       for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5479         {
5480           /* We want to insert the expression into BB only once, so
5481              note when we've inserted it.  */
5482           insn_inserted_p = 0;
5483
5484           /* These tests should be the same as the tests above.  */
5485           if (TEST_BIT (hoist_vbeout[bb], i))
5486             {
5487               /* We've found a potentially hoistable expression, now
5488                  we look at every block BB dominates to see if it
5489                  computes the expression.  */
5490               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5491                 {
5492                   /* Ignore self dominance.  */
5493                   if (bb == dominated
5494                       || ! TEST_BIT (dominators[dominated], bb))
5495                     continue;
5496
5497                   /* We've found a dominated block, now see if it computes
5498                      the busy expression and whether or not moving that
5499                      expression to the "beginning" of that block is safe.  */
5500                   if (!TEST_BIT (antloc[dominated], i))
5501                     continue;
5502
5503                   /* The expression is computed in the dominated block and
5504                      it would be safe to compute it at the start of the
5505                      dominated block.  Now we have to determine if the
5506                      expresion would reach the dominated block if it was
5507                      placed at the end of BB.  */
5508                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5509                     {
5510                       struct expr *expr = index_map[i];
5511                       struct occr *occr = expr->antic_occr;
5512                       rtx insn;
5513                       rtx set;
5514
5515                       /* Find the right occurence of this expression.  */
5516                       while (BLOCK_NUM (occr->insn) != dominated && occr)
5517                         occr = occr->next;
5518
5519                       /* Should never happen.  */
5520                       if (!occr)
5521                         abort ();
5522
5523                       insn = occr->insn;
5524
5525                       set = single_set (insn);
5526                       if (! set)
5527                         abort ();
5528
5529                       /* Create a pseudo-reg to store the result of reaching
5530                          expressions into.  Get the mode for the new pseudo
5531                          from the mode of the original destination pseudo.  */
5532                       if (expr->reaching_reg == NULL)
5533                         expr->reaching_reg
5534                           = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5535
5536                       /* In theory this should never fail since we're creating
5537                          a reg->reg copy.
5538
5539                          However, on the x86 some of the movXX patterns
5540                          actually contain clobbers of scratch regs.  This may
5541                          cause the insn created by validate_change to not
5542                          match any pattern and thus cause validate_change to
5543                          fail.  */
5544                       if (validate_change (insn, &SET_SRC (set),
5545                                            expr->reaching_reg, 0))
5546                         {
5547                           occr->deleted_p = 1;
5548                           if (!insn_inserted_p)
5549                             {
5550                               insert_insn_end_bb (index_map[i], bb, 0);
5551                               insn_inserted_p = 1;
5552                             }
5553                         }
5554                     }
5555                 }
5556             }
5557         }
5558     }
5559
5560     free (index_map);
5561 }
5562
5563 /* Top level routine to perform one code hoisting (aka unification) pass
5564
5565    Return non-zero if a change was made.  */
5566
5567 static int
5568 one_code_hoisting_pass ()
5569 {
5570   int changed = 0;
5571
5572   alloc_expr_hash_table (max_cuid);
5573   compute_expr_hash_table ();
5574   if (gcse_file)
5575     dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5576                      expr_hash_table_size, n_exprs);
5577
5578   if (n_exprs > 0)
5579     {
5580       alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5581       compute_code_hoist_data ();
5582       hoist_code ();
5583       free_code_hoist_mem ();
5584     }
5585
5586   free_expr_hash_table ();
5587
5588   return changed;
5589 }