gcc/gcse.c

   1 /* Global common subexpression elimination/Partial redundancy elimination
   2    and global constant/copy propagation for GNU compiler.
   3    Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* TODO
  23    - reordering of memory allocation and freeing to be more space efficient
  24    - do rough calc of how many regs are needed in each block, and a rough
  25      calc of how many regs are available in each class and use that to
  26      throttle back the code in cases where RTX_COST is minimal.
  27    - dead store elimination
  28    - a store to the same address as a load does not kill the load if the
  29      source of the store is also the destination of the load.  Handling this
  30      allows more load motion, particularly out of loops.
  31    - ability to realloc sbitmap vectors would allow one initial computation
  32      of reg_set_in_block with only subsequent additions, rather than
  33      recomputing it for each pass
  34
  35 */
  36
  37 /* References searched while implementing this.
  38
  39    Compilers Principles, Techniques and Tools
  40    Aho, Sethi, Ullman
  41    Addison-Wesley, 1988
  42
  43    Global Optimization by Suppression of Partial Redundancies
  44    E. Morel, C. Renvoise
  45    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  46
  47    A Portable Machine-Independent Global Optimizer - Design and Measurements
  48    Frederick Chow
  49    Stanford Ph.D. thesis, Dec. 1983
  50
  51    A Fast Algorithm for Code Movement Optimization
  52    D.M. Dhamdhere
  53    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  54
  55    A Solution to a Problem with Morel and Renvoise's
  56    Global Optimization by Suppression of Partial Redundancies
  57    K-H Drechsler, M.P. Stadel
  58    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  59
  60    Practical Adaptation of the Global Optimization
  61    Algorithm of Morel and Renvoise
  62    D.M. Dhamdhere
  63    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  64
  65    Efficiently Computing Static Single Assignment Form and the Control
  66    Dependence Graph
  67    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  68    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  69
  70    Lazy Code Motion
  71    J. Knoop, O. Ruthing, B. Steffen
  72    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  73
  74    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  75    Time for Reducible Flow Control
  76    Thomas Ball
  77    ACM Letters on Programming Languages and Systems,
  78    Vol. 2, Num. 1-4, Mar-Dec 1993
  79
  80    An Efficient Representation for Sparse Sets
  81    Preston Briggs, Linda Torczon
  82    ACM Letters on Programming Languages and Systems,
  83    Vol. 2, Num. 1-4, Mar-Dec 1993
  84
  85    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  86    K-H Drechsler, M.P. Stadel
  87    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  88
  89    Partial Dead Code Elimination
  90    J. Knoop, O. Ruthing, B. Steffen
  91    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  92
  93    Effective Partial Redundancy Elimination
  94    P. Briggs, K.D. Cooper
  95    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  96
  97    The Program Structure Tree: Computing Control Regions in Linear Time
  98    R. Johnson, D. Pearson, K. Pingali
  99    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
 100
 101    Optimal Code Motion: Theory and Practice
 102    J. Knoop, O. Ruthing, B. Steffen
 103    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
 104
 105    The power of assignment motion
 106    J. Knoop, O. Ruthing, B. Steffen
 107    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 108
 109    Global code motion / global value numbering
 110    C. Click
 111    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 112
 113    Value Driven Redundancy Elimination
 114    L.T. Simpson
 115    Rice University Ph.D. thesis, Apr. 1996
 116
 117    Value Numbering
 118    L.T. Simpson
 119    Massively Scalar Compiler Project, Rice University, Sep. 1996
 120
 121    High Performance Compilers for Parallel Computing
 122    Michael Wolfe
 123    Addison-Wesley, 1996
 124
 125    Advanced Compiler Design and Implementation
 126    Steven Muchnick
 127    Morgan Kaufmann, 1997
 128
 129    Building an Optimizing Compiler
 130    Robert Morgan
 131    Digital Press, 1998
 132
 133    People wishing to speed up the code here should read:
 134      Elimination Algorithms for Data Flow Analysis
 135      B.G. Ryder, M.C. Paull
 136      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 137
 138      How to Analyze Large Programs Efficiently and Informatively
 139      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 140      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 141
 142    People wishing to do something different can find various possibilities
 143    in the above papers and elsewhere.
 144 */
 145
 146 #include "config.h"
 147 #include "system.h"
 148 #include "toplev.h"
 149
 150 #include "rtl.h"
 151 #include "tm_p.h"
 152 #include "regs.h"
 153 #include "hard-reg-set.h"
 154 #include "flags.h"
 155 #include "real.h"
 156 #include "insn-config.h"
 157 #include "recog.h"
 158 #include "basic-block.h"
 159 #include "output.h"
 160 #include "function.h"
 161 #include "expr.h"
 162
 163 #include "obstack.h"
 164 #define obstack_chunk_alloc gmalloc
 165 #define obstack_chunk_free free
 166
 167 /* Maximum number of passes to perform.  */
 168 #define MAX_PASSES 1
 169
 170 /* Propagate flow information through back edges and thus enable PRE's
 171    moving loop invariant calculations out of loops.
 172
 173    Originally this tended to create worse overall code, but several
 174    improvements during the development of PRE seem to have made following
 175    back edges generally a win.
 176
 177    Note much of the loop invariant code motion done here would normally
 178    be done by loop.c, which has more heuristics for when to move invariants
 179    out of loops.  At some point we might need to move some of those
 180    heuristics into gcse.c.  */
 181 #define FOLLOW_BACK_EDGES 1
 182
 183 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 184    are a superset of those done by GCSE.
 185
 186    We perform the following steps:
 187
 188    1) Compute basic block information.
 189
 190    2) Compute table of places where registers are set.
 191
 192    3) Perform copy/constant propagation.
 193
 194    4) Perform global cse.
 195
 196    5) Perform another pass of copy/constant propagation.
 197
 198    Two passes of copy/constant propagation are done because the first one
 199    enables more GCSE and the second one helps to clean up the copies that
 200    GCSE creates.  This is needed more for PRE than for Classic because Classic
 201    GCSE will try to use an existing register containing the common
 202    subexpression rather than create a new one.  This is harder to do for PRE
 203    because of the code motion (which Classic GCSE doesn't do).
 204
 205    Expressions we are interested in GCSE-ing are of the form
 206    (set (pseudo-reg) (expression)).
 207    Function want_to_gcse_p says what these are.
 208
 209    PRE handles moving invariant expressions out of loops (by treating them as
 210    partially redundant).
 211
 212    Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
 213    assignment) based GVN (global value numbering).  L. T. Simpson's paper
 214    (Rice University) on value numbering is a useful reference for this.
 215
 216    **********************
 217
 218    We used to support multiple passes but there are diminishing returns in
 219    doing so.  The first pass usually makes 90% of the changes that are doable.
 220    A second pass can make a few more changes made possible by the first pass.
 221    Experiments show any further passes don't make enough changes to justify
 222    the expense.
 223
 224    A study of spec92 using an unlimited number of passes:
 225    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 226    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 227    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 228
 229    It was found doing copy propagation between each pass enables further
 230    substitutions.
 231
 232    PRE is quite expensive in complicated functions because the DFA can take
 233    awhile to converge.  Hence we only perform one pass.  Macro MAX_PASSES can
 234    be modified if one wants to experiment.
 235
 236    **********************
 237
 238    The steps for PRE are:
 239
 240    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 241
 242    2) Perform the data flow analysis for PRE.
 243
 244    3) Delete the redundant instructions
 245
 246    4) Insert the required copies [if any] that make the partially
 247       redundant instructions fully redundant.
 248
 249    5) For other reaching expressions, insert an instruction to copy the value
 250       to a newly created pseudo that will reach the redundant instruction.
 251
 252    The deletion is done first so that when we do insertions we
 253    know which pseudo reg to use.
 254
 255    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 256    argue it is not.  The number of iterations for the algorithm to converge
 257    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 258
 259    PRE GCSE depends heavily on the second CSE pass to clean up the copies
 260    we create.  To make an expression reach the place where it's redundant,
 261    the result of the expression is copied to a new register, and the redundant
 262    expression is deleted by replacing it with this new register.  Classic GCSE
 263    doesn't have this problem as much as it computes the reaching defs of
 264    each register in each block and thus can try to use an existing register.
 265
 266    **********************
 267
 268    A fair bit of simplicity is created by creating small functions for simple
 269    tasks, even when the function is only called in one place.  This may
 270    measurably slow things down [or may not] by creating more function call
 271    overhead than is necessary.  The source is laid out so that it's trivial
 272    to make the affected functions inline so that one can measure what speed
 273    up, if any, can be achieved, and maybe later when things settle things can
 274    be rearranged.
 275
 276    Help stamp out big monolithic functions!  */
 277 \f
 278 /* GCSE global vars.  */
 279
 280 /* -dG dump file.  */
 281 static FILE *gcse_file;
 282
 283 /* Note whether or not we should run jump optimization after gcse.  We
 284    want to do this for two cases.
 285
 286     * If we changed any jumps via cprop.
 287
 288     * If we added any labels via edge splitting.  */
 289
 290 static int run_jump_opt_after_gcse;
 291
 292 /* Bitmaps are normally not included in debugging dumps.
 293    However it's useful to be able to print them from GDB.
 294    We could create special functions for this, but it's simpler to
 295    just allow passing stderr to the dump_foo fns.  Since stderr can
 296    be a macro, we store a copy here.  */
 297 static FILE *debug_stderr;
 298
 299 /* An obstack for our working variables.  */
 300 static struct obstack gcse_obstack;
 301
 302 /* Non-zero for each mode that supports (set (reg) (reg)).
 303    This is trivially true for integer and floating point values.
 304    It may or may not be true for condition codes.  */
 305 static char can_copy_p[(int) NUM_MACHINE_MODES];
 306
 307 /* Non-zero if can_copy_p has been initialized.  */
 308 static int can_copy_init_p;
 309
 310 struct reg_use {rtx reg_rtx; };
 311
 312 /* Hash table of expressions.  */
 313
 314 struct expr
 315 {
 316   /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
 317   rtx expr;
 318   /* Index in the available expression bitmaps.  */
 319   int bitmap_index;
 320   /* Next entry with the same hash.  */
 321   struct expr *next_same_hash;
 322   /* List of anticipatable occurrences in basic blocks in the function.
 323      An "anticipatable occurrence" is one that is the first occurrence in the
 324      basic block, the operands are not modified in the basic block prior
 325      to the occurrence and the output is not used between the start of
 326      the block and the occurrence.  */
 327   struct occr *antic_occr;
 328   /* List of available occurrence in basic blocks in the function.
 329      An "available occurrence" is one that is the last occurrence in the
 330      basic block and the operands are not modified by following statements in
 331      the basic block [including this insn].  */
 332   struct occr *avail_occr;
 333   /* Non-null if the computation is PRE redundant.
 334      The value is the newly created pseudo-reg to record a copy of the
 335      expression in all the places that reach the redundant copy.  */
 336   rtx reaching_reg;
 337 };
 338
 339 /* Occurrence of an expression.
 340    There is one per basic block.  If a pattern appears more than once the
 341    last appearance is used [or first for anticipatable expressions].  */
 342
 343 struct occr
 344 {
 345   /* Next occurrence of this expression.  */
 346   struct occr *next;
 347   /* The insn that computes the expression.  */
 348   rtx insn;
 349   /* Non-zero if this [anticipatable] occurrence has been deleted.  */
 350   char deleted_p;
 351   /* Non-zero if this [available] occurrence has been copied to
 352      reaching_reg.  */
 353   /* ??? This is mutually exclusive with deleted_p, so they could share
 354      the same byte.  */
 355   char copied_p;
 356 };
 357
 358 /* Expression and copy propagation hash tables.
 359    Each hash table is an array of buckets.
 360    ??? It is known that if it were an array of entries, structure elements
 361    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 362    not clear whether in the final analysis a sufficient amount of memory would
 363    be saved as the size of the available expression bitmaps would be larger
 364    [one could build a mapping table without holes afterwards though].
 365    Someday I'll perform the computation and figure it out.  */
 366
 367 /* Total size of the expression hash table, in elements.  */
 368 static int expr_hash_table_size;
 369 /* The table itself.
 370    This is an array of `expr_hash_table_size' elements.  */
 371 static struct expr **expr_hash_table;
 372
 373 /* Total size of the copy propagation hash table, in elements.  */
 374 static int set_hash_table_size;
 375
 376 /* The table itself.
 377    This is an array of `set_hash_table_size' elements.  */
 378 static struct expr **set_hash_table;
 379
 380 /* Mapping of uids to cuids.
 381    Only real insns get cuids.  */
 382 static int *uid_cuid;
 383
 384 /* Highest UID in UID_CUID.  */
 385 static int max_uid;
 386
 387 /* Get the cuid of an insn.  */
 388 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
 389
 390 /* Number of cuids.  */
 391 static int max_cuid;
 392
 393 /* Mapping of cuids to insns.  */
 394 static rtx *cuid_insn;
 395
 396 /* Get insn from cuid.  */
 397 #define CUID_INSN(CUID) (cuid_insn[CUID])
 398
 399 /* Maximum register number in function prior to doing gcse + 1.
 400    Registers created during this pass have regno >= max_gcse_regno.
 401    This is named with "gcse" to not collide with global of same name.  */
 402 static int max_gcse_regno;
 403
 404 /* Maximum number of cse-able expressions found.  */
 405 static int n_exprs;
 406
 407 /* Maximum number of assignments for copy propagation found.  */
 408 static int n_sets;
 409
 410 /* Table of registers that are modified.
 411
 412    For each register, each element is a list of places where the pseudo-reg
 413    is set.
 414
 415    For simplicity, GCSE is done on sets of pseudo-regs only.  PRE GCSE only
 416    requires knowledge of which blocks kill which regs [and thus could use
 417    a bitmap instead of the lists `reg_set_table' uses].
 418
 419    `reg_set_table' and could be turned into an array of bitmaps (num-bbs x
 420    num-regs) [however perhaps it may be useful to keep the data as is].  One
 421    advantage of recording things this way is that `reg_set_table' is fairly
 422    sparse with respect to pseudo regs but for hard regs could be fairly dense
 423    [relatively speaking].  And recording sets of pseudo-regs in lists speeds
 424    up functions like compute_transp since in the case of pseudo-regs we only
 425    need to iterate over the number of times a pseudo-reg is set, not over the
 426    number of basic blocks [clearly there is a bit of a slow down in the cases
 427    where a pseudo is set more than once in a block, however it is believed
 428    that the net effect is to speed things up].  This isn't done for hard-regs
 429    because recording call-clobbered hard-regs in `reg_set_table' at each
 430    function call can consume a fair bit of memory, and iterating over
 431    hard-regs stored this way in compute_transp will be more expensive.  */
 432
 433 typedef struct reg_set
 434 {
 435   /* The next setting of this register.  */
 436   struct reg_set *next;
 437   /* The insn where it was set.  */
 438   rtx insn;
 439 } reg_set;
 440
 441 static reg_set **reg_set_table;
 442
 443 /* Size of `reg_set_table'.
 444    The table starts out at max_gcse_regno + slop, and is enlarged as
 445    necessary.  */
 446 static int reg_set_table_size;
 447
 448 /* Amount to grow `reg_set_table' by when it's full.  */
 449 #define REG_SET_TABLE_SLOP 100
 450
 451 /* Bitmap containing one bit for each register in the program.
 452    Used when performing GCSE to track which registers have been set since
 453    the start of the basic block.  */
 454 static sbitmap reg_set_bitmap;
 455
 456 /* For each block, a bitmap of registers set in the block.
 457    This is used by expr_killed_p and compute_transp.
 458    It is computed during hash table computation and not by compute_sets
 459    as it includes registers added since the last pass (or between cprop and
 460    gcse) and it's currently not easy to realloc sbitmap vectors.  */
 461 static sbitmap *reg_set_in_block;
 462
 463 /* For each block, non-zero if memory is set in that block.
 464    This is computed during hash table computation and is used by
 465    expr_killed_p and compute_transp.
 466    ??? Handling of memory is very simple, we don't make any attempt
 467    to optimize things (later).
 468    ??? This can be computed by compute_sets since the information
 469    doesn't change.  */
 470 static char *mem_set_in_block;
 471
 472 /* Various variables for statistics gathering.  */
 473
 474 /* Memory used in a pass.
 475    This isn't intended to be absolutely precise.  Its intent is only
 476    to keep an eye on memory usage.  */
 477 static int bytes_used;
 478
 479 /* GCSE substitutions made.  */
 480 static int gcse_subst_count;
 481 /* Number of copy instructions created.  */
 482 static int gcse_create_count;
 483 /* Number of constants propagated.  */
 484 static int const_prop_count;
 485 /* Number of copys propagated.  */
 486 static int copy_prop_count;
 487 \f
 488 /* These variables are used by classic GCSE.
 489    Normally they'd be defined a bit later, but `rd_gen' needs to
 490    be declared sooner.  */
 491
 492 /* A bitmap of all ones for implementing the algorithm for available
 493    expressions and reaching definitions.  */
 494 /* ??? Available expression bitmaps have a different size than reaching
 495    definition bitmaps.  This should be the larger of the two, however, it
 496    is not currently used for reaching definitions.  */
 497 static sbitmap u_bitmap;
 498
 499 /* Each block has a bitmap of each type.
 500    The length of each blocks bitmap is:
 501
 502        max_cuid  - for reaching definitions
 503        n_exprs - for available expressions
 504
 505    Thus we view the bitmaps as 2 dimensional arrays.  i.e.
 506    rd_kill[block_num][cuid_num]
 507    ae_kill[block_num][expr_num]                  */
 508
 509 /* For reaching defs */
 510 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
 511
 512 /* for available exprs */
 513 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
 514
 515 /* Objects of this type are passed around by the null-pointer check
 516    removal routines.  */
 517 struct null_pointer_info
 518 {
 519   /* The basic block being processed.  */
 520   int current_block;
 521   /* The first register to be handled in this pass.  */
 522   int min_reg;
 523   /* One greater than the last register to be handled in this pass.  */
 524   int max_reg;
 525   sbitmap *nonnull_local;
 526   sbitmap *nonnull_killed;
 527 };
 528 \f
 529 static void compute_can_copy    PARAMS ((void));
 530 static char *gmalloc            PARAMS ((unsigned int));
 531 static char *grealloc           PARAMS ((char *, unsigned int));
 532 static char *gcse_alloc         PARAMS ((unsigned long));
 533 static void alloc_gcse_mem      PARAMS ((rtx));
 534 static void free_gcse_mem       PARAMS ((void));
 535 static void alloc_reg_set_mem   PARAMS ((int));
 536 static void free_reg_set_mem    PARAMS ((void));
 537 static int get_bitmap_width     PARAMS ((int, int, int));
 538 static void record_one_set      PARAMS ((int, rtx));
 539 static void record_set_info     PARAMS ((rtx, rtx, void *));
 540 static void compute_sets        PARAMS ((rtx));
 541 static void hash_scan_insn      PARAMS ((rtx, int, int));
 542 static void hash_scan_set       PARAMS ((rtx, rtx, int));
 543 static void hash_scan_clobber   PARAMS ((rtx, rtx));
 544 static void hash_scan_call      PARAMS ((rtx, rtx));
 545 static int want_to_gcse_p       PARAMS ((rtx));
 546 static int oprs_unchanged_p     PARAMS ((rtx, rtx, int));
 547 static int oprs_anticipatable_p PARAMS ((rtx, rtx));
 548 static int oprs_available_p     PARAMS ((rtx, rtx));
 549 static void insert_expr_in_table PARAMS ((rtx, enum machine_mode, rtx,
 550                                           int, int));
 551 static void insert_set_in_table PARAMS ((rtx, rtx));
 552 static unsigned int hash_expr   PARAMS ((rtx, enum machine_mode, int *, int));
 553 static unsigned int hash_expr_1 PARAMS ((rtx, enum machine_mode, int *));
 554 static unsigned int hash_set    PARAMS ((int, int));
 555 static int expr_equiv_p         PARAMS ((rtx, rtx));
 556 static void record_last_reg_set_info PARAMS ((rtx, int));
 557 static void record_last_mem_set_info PARAMS ((rtx));
 558 static void record_last_set_info PARAMS ((rtx, rtx, void *));
 559 static void compute_hash_table  PARAMS ((int));
 560 static void alloc_set_hash_table PARAMS ((int));
 561 static void free_set_hash_table PARAMS ((void));
 562 static void compute_set_hash_table PARAMS ((void));
 563 static void alloc_expr_hash_table PARAMS ((int));
 564 static void free_expr_hash_table PARAMS ((void));
 565 static void compute_expr_hash_table PARAMS ((void));
 566 static void dump_hash_table     PARAMS ((FILE *, const char *, struct expr **,
 567                                          int, int));
 568 static struct expr *lookup_expr PARAMS ((rtx));
 569 static struct expr *lookup_set  PARAMS ((int, rtx));
 570 static struct expr *next_set    PARAMS ((int, struct expr *));
 571 static void reset_opr_set_tables PARAMS ((void));
 572 static int oprs_not_set_p       PARAMS ((rtx, rtx));
 573 static void mark_call           PARAMS ((rtx));
 574 static void mark_set            PARAMS ((rtx, rtx));
 575 static void mark_clobber        PARAMS ((rtx, rtx));
 576 static void mark_oprs_set       PARAMS ((rtx));
 577 static void alloc_cprop_mem     PARAMS ((int, int));
 578 static void free_cprop_mem      PARAMS ((void));
 579 static void compute_transp      PARAMS ((rtx, int, sbitmap *, int));
 580 static void compute_transpout   PARAMS ((void));
 581 static void compute_local_properties PARAMS ((sbitmap *, sbitmap *, sbitmap *,
 582                                               int));
 583 static void compute_cprop_data  PARAMS ((void));
 584 static void find_used_regs      PARAMS ((rtx));
 585 static int try_replace_reg      PARAMS ((rtx, rtx, rtx));
 586 static struct expr *find_avail_set PARAMS ((int, rtx));
 587 static int cprop_jump           PARAMS ((rtx, rtx, struct reg_use *, rtx));
 588 #ifdef HAVE_cc0
 589 static int cprop_cc0_jump       PARAMS ((rtx, struct reg_use *, rtx));
 590 #endif
 591 static int cprop_insn           PARAMS ((rtx, int));
 592 static int cprop                PARAMS ((int));
 593 static int one_cprop_pass       PARAMS ((int, int));
 594 static void alloc_pre_mem       PARAMS ((int, int));
 595 static void free_pre_mem        PARAMS ((void));
 596 static void compute_pre_data    PARAMS ((void));
 597 static int pre_expr_reaches_here_p PARAMS ((int, struct expr *, int));
 598 static void insert_insn_end_bb  PARAMS ((struct expr *, int, int));
 599 static void pre_insert_copy_insn PARAMS ((struct expr *, rtx));
 600 static void pre_insert_copies   PARAMS ((void));
 601 static int pre_delete           PARAMS ((void));
 602 static int pre_gcse             PARAMS ((void));
 603 static int one_pre_gcse_pass    PARAMS ((int));
 604 static void add_label_notes     PARAMS ((rtx, rtx));
 605 static void alloc_code_hoist_mem PARAMS ((int, int));
 606 static void free_code_hoist_mem PARAMS ((void));
 607 static void compute_code_hoist_vbeinout PARAMS ((void));
 608 static void compute_code_hoist_data PARAMS ((void));
 609 static int hoist_expr_reaches_here_p PARAMS ((int, int, int, char *));
 610 static void hoist_code          PARAMS ((void));
 611 static int one_code_hoisting_pass PARAMS ((void));
 612 static void alloc_rd_mem        PARAMS ((int, int));
 613 static void free_rd_mem         PARAMS ((void));
 614 static void handle_rd_kill_set  PARAMS ((rtx, int, int));
 615 static void compute_kill_rd     PARAMS ((void));
 616 static void compute_rd          PARAMS ((void));
 617 static void alloc_avail_expr_mem PARAMS ((int, int));
 618 static void free_avail_expr_mem PARAMS ((void));
 619 static void compute_ae_gen      PARAMS ((void));
 620 static int expr_killed_p        PARAMS ((rtx, int));
 621 static void compute_ae_kill     PARAMS ((sbitmap *, sbitmap *));
 622 static int expr_reaches_here_p  PARAMS ((struct occr *, struct expr *,
 623                                          int, int));
 624 static rtx computing_insn       PARAMS ((struct expr *, rtx));
 625 static int def_reaches_here_p   PARAMS ((rtx, rtx));
 626 static int can_disregard_other_sets PARAMS ((struct reg_set **, rtx, int));
 627 static int handle_avail_expr    PARAMS ((rtx, struct expr *));
 628 static int classic_gcse         PARAMS ((void));
 629 static int one_classic_gcse_pass PARAMS ((int));
 630 static void invalidate_nonnull_info PARAMS ((rtx, rtx, void *));
 631 static void delete_null_pointer_checks_1 PARAMS ((int *, sbitmap *, sbitmap *,
 632                                                   struct null_pointer_info *));
 633 static rtx process_insert_insn  PARAMS ((struct expr *));
 634 static int pre_edge_insert      PARAMS ((struct edge_list *, struct expr **));
 635 static int expr_reaches_here_p_work PARAMS ((struct occr *, struct expr *,
 636                                              int, int, char *));
 637 static int pre_expr_reaches_here_p_work PARAMS ((int, struct expr *,
 638                                                  int, char *));
 639 \f
 640 /* Entry point for global common subexpression elimination.
 641    F is the first instruction in the function.  */
 642
 643 int
 644 gcse_main (f, file)
 645      rtx f;
 646      FILE *file;
 647 {
 648   int changed, pass;
 649   /* Bytes used at start of pass.  */
 650   int initial_bytes_used;
 651   /* Maximum number of bytes used by a pass.  */
 652   int max_pass_bytes;
 653   /* Point to release obstack data from for each pass.  */
 654   char *gcse_obstack_bottom;
 655
 656   /* We do not construct an accurate cfg in functions which call
 657      setjmp, so just punt to be safe.  */
 658   if (current_function_calls_setjmp)
 659     return 0;
 660
 661   /* Assume that we do not need to run jump optimizations after gcse.  */
 662   run_jump_opt_after_gcse = 0;
 663
 664   /* For calling dump_foo fns from gdb.  */
 665   debug_stderr = stderr;
 666   gcse_file = file;
 667
 668   /* Identify the basic block information for this function, including
 669      successors and predecessors.  */
 670   max_gcse_regno = max_reg_num ();
 671   find_basic_blocks (f, max_gcse_regno, file);
 672   cleanup_cfg (f);
 673
 674   if (file)
 675     dump_flow_info (file);
 676
 677   /* Return if there's nothing to do.  */
 678   if (n_basic_blocks <= 1)
 679     {
 680       free_basic_block_vars (0);
 681       return 0;
 682     }
 683
 684   /* Trying to perform global optimizations on flow graphs which have
 685      a high connectivity will take a long time and is unlikely to be
 686      particularly useful.
 687
 688      In normal circumstances a cfg should have about twice has many edges
 689      as blocks.  But we do not want to punish small functions which have
 690      a couple switch statements.  So we require a relatively large number
 691      of basic blocks and the ratio of edges to blocks to be high.  */
 692   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
 693     {
 694       free_basic_block_vars (0);
 695       return 0;
 696     }
 697
 698   /* See what modes support reg/reg copy operations.  */
 699   if (! can_copy_init_p)
 700     {
 701       compute_can_copy ();
 702       can_copy_init_p = 1;
 703     }
 704
 705   gcc_obstack_init (&gcse_obstack);
 706   bytes_used = 0;
 707
 708   /* Record where pseudo-registers are set.  This data is kept accurate
 709      during each pass.  ??? We could also record hard-reg information here
 710      [since it's unchanging], however it is currently done during hash table
 711      computation.
 712
 713      It may be tempting to compute MEM set information here too, but MEM sets
 714      will be subject to code motion one day and thus we need to compute
 715      information about memory sets when we build the hash tables.  */
 716
 717   alloc_reg_set_mem (max_gcse_regno);
 718   compute_sets (f);
 719
 720   pass = 0;
 721   initial_bytes_used = bytes_used;
 722   max_pass_bytes = 0;
 723   gcse_obstack_bottom = gcse_alloc (1);
 724   changed = 1;
 725   while (changed && pass < MAX_PASSES)
 726     {
 727       changed = 0;
 728       if (file)
 729         fprintf (file, "GCSE pass %d\n\n", pass + 1);
 730
 731       /* Initialize bytes_used to the space for the pred/succ lists,
 732          and the reg_set_table data.  */
 733       bytes_used = initial_bytes_used;
 734
 735       /* Each pass may create new registers, so recalculate each time.  */
 736       max_gcse_regno = max_reg_num ();
 737
 738       alloc_gcse_mem (f);
 739
 740       /* Don't allow constant propagation to modify jumps
 741          during this pass.  */
 742       changed = one_cprop_pass (pass + 1, 0);
 743
 744       if (optimize_size)
 745         changed |= one_classic_gcse_pass (pass + 1);
 746       else
 747         {
 748           changed |= one_pre_gcse_pass (pass + 1);
 749           free_reg_set_mem ();
 750           alloc_reg_set_mem (max_reg_num ());
 751           compute_sets (f);
 752           run_jump_opt_after_gcse = 1;
 753         }
 754
 755       if (max_pass_bytes < bytes_used)
 756         max_pass_bytes = bytes_used;
 757
 758       /* Free up memory, then reallocate for code hoisting.  We can
 759          not re-use the existing allocated memory because the tables
 760          will not have info for the insns or registers created by
 761          partial redundancy elimination.  */
 762       free_gcse_mem ();
 763
 764       /* It does not make sense to run code hoisting unless we optimizing
 765          for code size -- it rarely makes programs faster, and can make
 766          them bigger if we did partial redundancy elimination (when optimizing
 767          for space, we use a classic gcse algorithm instead of partial
 768          redundancy algorithms).  */
 769       if (optimize_size)
 770         {
 771           max_gcse_regno = max_reg_num ();
 772           alloc_gcse_mem (f);
 773           changed |= one_code_hoisting_pass ();
 774           free_gcse_mem ();
 775
 776           if (max_pass_bytes < bytes_used)
 777             max_pass_bytes = bytes_used;
 778         }
 779
 780       if (file)
 781         {
 782           fprintf (file, "\n");
 783           fflush (file);
 784         }
 785
 786       obstack_free (&gcse_obstack, gcse_obstack_bottom);
 787       pass++;
 788     }
 789
 790   /* Do one last pass of copy propagation, including cprop into
 791      conditional jumps.  */
 792
 793   max_gcse_regno = max_reg_num ();
 794   alloc_gcse_mem (f);
 795   /* This time, go ahead and allow cprop to alter jumps.  */
 796   one_cprop_pass (pass + 1, 1);
 797   free_gcse_mem ();
 798
 799   if (file)
 800     {
 801       fprintf (file, "GCSE of %s: %d basic blocks, ",
 802                current_function_name, n_basic_blocks);
 803       fprintf (file, "%d pass%s, %d bytes\n\n",
 804                pass, pass > 1 ? "es" : "", max_pass_bytes);
 805     }
 806
 807   obstack_free (&gcse_obstack, NULL_PTR);
 808   free_reg_set_mem ();
 809   free_basic_block_vars (0);
 810   return run_jump_opt_after_gcse;
 811 }
 812 \f
 813 /* Misc. utilities.  */
 814
 815 /* Compute which modes support reg/reg copy operations.  */
 816
 817 static void
 818 compute_can_copy ()
 819 {
 820   int i;
 821 #ifndef AVOID_CCMODE_COPIES
 822   rtx reg,insn;
 823 #endif
 824   char *free_point = (char *) oballoc (1);
 825
 826   bzero (can_copy_p, NUM_MACHINE_MODES);
 827
 828   start_sequence ();
 829   for (i = 0; i < NUM_MACHINE_MODES; i++)
 830     if (GET_MODE_CLASS (i) == MODE_CC)
 831       {
 832 #ifdef AVOID_CCMODE_COPIES
 833         can_copy_p[i] = 0;
 834 #else
 835         reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 836         insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 837         if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
 838           can_copy_p[i] = 1;
 839 #endif
 840       }
 841     else
 842       can_copy_p[i] = 1;
 843
 844   end_sequence ();
 845
 846   /* Free the objects we just allocated.  */
 847   obfree (free_point);
 848 }
 849 \f
 850 /* Cover function to xmalloc to record bytes allocated.  */
 851
 852 static char *
 853 gmalloc (size)
 854      unsigned int size;
 855 {
 856   bytes_used += size;
 857   return xmalloc (size);
 858 }
 859
 860 /* Cover function to xrealloc.
 861    We don't record the additional size since we don't know it.
 862    It won't affect memory usage stats much anyway.  */
 863
 864 static char *
 865 grealloc (ptr, size)
 866      char *ptr;
 867      unsigned int size;
 868 {
 869   return xrealloc (ptr, size);
 870 }
 871
 872 /* Cover function to obstack_alloc.
 873    We don't need to record the bytes allocated here since
 874    obstack_chunk_alloc is set to gmalloc.  */
 875
 876 static char *
 877 gcse_alloc (size)
 878      unsigned long size;
 879 {
 880   return (char *) obstack_alloc (&gcse_obstack, size);
 881 }
 882
 883 /* Allocate memory for the cuid mapping array,
 884    and reg/memory set tracking tables.
 885
 886    This is called at the start of each pass.  */
 887
 888 static void
 889 alloc_gcse_mem (f)
 890      rtx f;
 891 {
 892   int i,n;
 893   rtx insn;
 894
 895   /* Find the largest UID and create a mapping from UIDs to CUIDs.
 896      CUIDs are like UIDs except they increase monotonically, have no gaps,
 897      and only apply to real insns.  */
 898
 899   max_uid = get_max_uid ();
 900   n = (max_uid + 1) * sizeof (int);
 901   uid_cuid = (int *) gmalloc (n);
 902   bzero ((char *) uid_cuid, n);
 903   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 904     {
 905       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
 906         INSN_CUID (insn) = i++;
 907       else
 908         INSN_CUID (insn) = i;
 909     }
 910
 911   /* Create a table mapping cuids to insns.  */
 912
 913   max_cuid = i;
 914   n = (max_cuid + 1) * sizeof (rtx);
 915   cuid_insn = (rtx *) gmalloc (n);
 916   bzero ((char *) cuid_insn, n);
 917   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 918     if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
 919       CUID_INSN (i++) = insn;
 920
 921   /* Allocate vars to track sets of regs.  */
 922   reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
 923
 924   /* Allocate vars to track sets of regs, memory per block.  */
 925   reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
 926                                                        max_gcse_regno);
 927   mem_set_in_block = (char *) gmalloc (n_basic_blocks);
 928 }
 929
 930 /* Free memory allocated by alloc_gcse_mem.  */
 931
 932 static void
 933 free_gcse_mem ()
 934 {
 935   free (uid_cuid);
 936   free (cuid_insn);
 937
 938   free (reg_set_bitmap);
 939
 940   free (reg_set_in_block);
 941   free (mem_set_in_block);
 942 }
 943
 944 /* Many of the global optimization algorithms work by solving dataflow
 945    equations for various expressions.  Initially, some local value is
 946    computed for each expression in each block.  Then, the values across the
 947    various blocks are combined (by following flow graph edges) to arrive at
 948    global values.  Conceptually, each set of equations is independent.  We
 949    may therefore solve all the equations in parallel, solve them one at a
 950    time, or pick any intermediate approach.
 951
 952    When you're going to need N two-dimensional bitmaps, each X (say, the
 953    number of blocks) by Y (say, the number of expressions), call this
 954    function.  It's not important what X and Y represent; only that Y
 955    correspond to the things that can be done in parallel.  This function will
 956    return an appropriate chunking factor C; you should solve C sets of
 957    equations in parallel.  By going through this function, we can easily
 958    trade space against time; by solving fewer equations in parallel we use
 959    less space.  */
 960
 961 static int
 962 get_bitmap_width (n, x, y)
 963      int n;
 964      int x;
 965      int y;
 966 {
 967   /* It's not really worth figuring out *exactly* how much memory will
 968      be used by a particular choice.  The important thing is to get
 969      something approximately right.  */
 970   size_t max_bitmap_memory = 10 * 1024 * 1024;
 971
 972   /* The number of bytes we'd use for a single column of minimum
 973      width.  */
 974   size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
 975
 976   /* Often, it's reasonable just to solve all the equations in
 977      parallel.  */
 978   if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
 979     return y;
 980
 981   /* Otherwise, pick the largest width we can, without going over the
 982      limit.  */
 983   return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
 984                              / column_size);
 985 }
 986 \f
 987 /* Compute the local properties of each recorded expression.
 988
 989    Local properties are those that are defined by the block, irrespective of
 990    other blocks.
 991
 992    An expression is transparent in a block if its operands are not modified
 993    in the block.
 994
 995    An expression is computed (locally available) in a block if it is computed
 996    at least once and expression would contain the same value if the
 997    computation was moved to the end of the block.
 998
 999    An expression is locally anticipatable in a block if it is computed at
1000    least once and expression would contain the same value if the computation
1001    was moved to the beginning of the block.
1002
1003    We call this routine for cprop, pre and code hoisting.  They all compute
1004    basically the same information and thus can easily share this code.
1005
1006    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
1007    properties.  If NULL, then it is not necessary to compute or record that
1008    particular property.
1009
1010    SETP controls which hash table to look at.  If zero, this routine looks at
1011    the expr hash table; if nonzero this routine looks at the set hash table.
1012    Additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
1013    ABSALTERED.  */
1014
1015 static void
1016 compute_local_properties (transp, comp, antloc, setp)
1017      sbitmap *transp;
1018      sbitmap *comp;
1019      sbitmap *antloc;
1020      int setp;
1021 {
1022   int i, hash_table_size;
1023   struct expr **hash_table;
1024
1025   /* Initialize any bitmaps that were passed in.  */
1026   if (transp)
1027     {
1028       if (setp)
1029         sbitmap_vector_zero (transp, n_basic_blocks);
1030       else
1031         sbitmap_vector_ones (transp, n_basic_blocks);
1032     }
1033
1034   if (comp)
1035     sbitmap_vector_zero (comp, n_basic_blocks);
1036   if (antloc)
1037     sbitmap_vector_zero (antloc, n_basic_blocks);
1038
1039   /* We use the same code for cprop, pre and hoisting.  For cprop
1040      we care about the set hash table, for pre and hoisting we
1041      care about the expr hash table.  */
1042   hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1043   hash_table = setp ? set_hash_table : expr_hash_table;
1044
1045   for (i = 0; i < hash_table_size; i++)
1046     {
1047       struct expr *expr;
1048
1049       for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1050         {
1051           int indx = expr->bitmap_index;
1052           struct occr *occr;
1053
1054           /* The expression is transparent in this block if it is not killed.
1055              We start by assuming all are transparent [none are killed], and
1056              then reset the bits for those that are.  */
1057           if (transp)
1058             compute_transp (expr->expr, indx, transp, setp);
1059
1060           /* The occurrences recorded in antic_occr are exactly those that
1061              we want to set to non-zero in ANTLOC.  */
1062           if (antloc)
1063             for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1064               {
1065                 SET_BIT (antloc[BLOCK_NUM (occr->insn)], indx);
1066
1067                 /* While we're scanning the table, this is a good place to
1068                    initialize this.  */
1069                 occr->deleted_p = 0;
1070               }
1071
1072           /* The occurrences recorded in avail_occr are exactly those that
1073              we want to set to non-zero in COMP.  */
1074           if (comp)
1075             for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1076               {
1077                 SET_BIT (comp[BLOCK_NUM (occr->insn)], indx);
1078
1079                 /* While we're scanning the table, this is a good place to
1080                    initialize this.  */
1081                 occr->copied_p = 0;
1082               }
1083
1084           /* While we're scanning the table, this is a good place to
1085              initialize this.  */
1086           expr->reaching_reg = 0;
1087         }
1088     }
1089 }
1090 \f
1091 /* Register set information.
1092
1093    `reg_set_table' records where each register is set or otherwise
1094    modified.  */
1095
1096 static struct obstack reg_set_obstack;
1097
1098 static void
1099 alloc_reg_set_mem (n_regs)
1100      int n_regs;
1101 {
1102   unsigned int n;
1103
1104   reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1105   n = reg_set_table_size * sizeof (struct reg_set *);
1106   reg_set_table = (struct reg_set **) gmalloc (n);
1107   bzero ((char *) reg_set_table, n);
1108
1109   gcc_obstack_init (&reg_set_obstack);
1110 }
1111
1112 static void
1113 free_reg_set_mem ()
1114 {
1115   free (reg_set_table);
1116   obstack_free (&reg_set_obstack, NULL_PTR);
1117 }
1118
1119 /* Record REGNO in the reg_set table.  */
1120
1121 static void
1122 record_one_set (regno, insn)
1123      int regno;
1124      rtx insn;
1125 {
1126   /* allocate a new reg_set element and link it onto the list */
1127   struct reg_set *new_reg_info, *reg_info_ptr1, *reg_info_ptr2;
1128
1129   /* If the table isn't big enough, enlarge it.  */
1130   if (regno >= reg_set_table_size)
1131     {
1132       int new_size = regno + REG_SET_TABLE_SLOP;
1133
1134       reg_set_table
1135         = (struct reg_set **) grealloc ((char *) reg_set_table,
1136                                         new_size * sizeof (struct reg_set *));
1137       bzero ((char *) (reg_set_table + reg_set_table_size),
1138              (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1139       reg_set_table_size = new_size;
1140     }
1141
1142   new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1143                                                    sizeof (struct reg_set));
1144   bytes_used += sizeof (struct reg_set);
1145   new_reg_info->insn = insn;
1146   new_reg_info->next = NULL;
1147   if (reg_set_table[regno] == NULL)
1148     reg_set_table[regno] = new_reg_info;
1149   else
1150     {
1151       reg_info_ptr1 = reg_info_ptr2 = reg_set_table[regno];
1152       /* ??? One could keep a "last" pointer to speed this up.  */
1153       while (reg_info_ptr1 != NULL)
1154         {
1155           reg_info_ptr2 = reg_info_ptr1;
1156           reg_info_ptr1 = reg_info_ptr1->next;
1157         }
1158
1159       reg_info_ptr2->next = new_reg_info;
1160     }
1161 }
1162
1163 /* Called from compute_sets via note_stores to handle one SET or CLOBBER in
1164    an insn.  The DATA is really the instruction in which the SET is
1165    occurring.  */
1166
1167 static void
1168 record_set_info (dest, setter, data)
1169      rtx dest, setter ATTRIBUTE_UNUSED;
1170      void *data;
1171 {
1172   rtx record_set_insn = (rtx) data;
1173
1174   if (GET_CODE (dest) == REG && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1175     record_one_set (REGNO (dest), record_set_insn);
1176 }
1177
1178 /* Scan the function and record each set of each pseudo-register.
1179
1180    This is called once, at the start of the gcse pass.  See the comments for
1181    `reg_set_table' for further documenation.  */
1182
1183 static void
1184 compute_sets (f)
1185      rtx f;
1186 {
1187   rtx insn;
1188
1189   for (insn = f; insn != 0; insn = NEXT_INSN (insn))
1190     if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
1191       note_stores (PATTERN (insn), record_set_info, insn);
1192 }
1193 \f
1194 /* Hash table support.  */
1195
1196 /* For each register, the cuid of the first/last insn in the block to set it,
1197    or -1 if not set.  */
1198 #define NEVER_SET -1
1199 static int *reg_first_set;
1200 static int *reg_last_set;
1201
1202 /* While computing "first/last set" info, this is the CUID of first/last insn
1203    to set memory or -1 if not set.  `mem_last_set' is also used when
1204    performing GCSE to record whether memory has been set since the beginning
1205    of the block.
1206
1207    Note that handling of memory is very simple, we don't make any attempt
1208    to optimize things (later).  */
1209 static int mem_first_set;
1210 static int mem_last_set;
1211
1212 /* Perform a quick check whether X, the source of a set, is something
1213    we want to consider for GCSE.  */
1214
1215 static int
1216 want_to_gcse_p (x)
1217      rtx x;
1218 {
1219   switch (GET_CODE (x))
1220     {
1221     case REG:
1222     case SUBREG:
1223     case CONST_INT:
1224     case CONST_DOUBLE:
1225     case CALL:
1226       return 0;
1227
1228     default:
1229       break;
1230     }
1231
1232   return 1;
1233 }
1234
1235 /* Return non-zero if the operands of expression X are unchanged from the
1236    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1237    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
1238
1239 static int
1240 oprs_unchanged_p (x, insn, avail_p)
1241      rtx x, insn;
1242      int avail_p;
1243 {
1244   int i, j;
1245   enum rtx_code code;
1246   const char *fmt;
1247
1248   if (x == 0)
1249     return 1;
1250
1251   code = GET_CODE (x);
1252   switch (code)
1253     {
1254     case REG:
1255       if (avail_p)
1256         return (reg_last_set[REGNO (x)] == NEVER_SET
1257                 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1258       else
1259         return (reg_first_set[REGNO (x)] == NEVER_SET
1260                 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1261
1262     case MEM:
1263       if (avail_p && mem_last_set != NEVER_SET
1264           && mem_last_set >= INSN_CUID (insn))
1265         return 0;
1266       else if (! avail_p && mem_first_set != NEVER_SET
1267                && mem_first_set < INSN_CUID (insn))
1268         return 0;
1269       else
1270         return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
1271
1272     case PRE_DEC:
1273     case PRE_INC:
1274     case POST_DEC:
1275     case POST_INC:
1276       return 0;
1277
1278     case PC:
1279     case CC0: /*FIXME*/
1280     case CONST:
1281     case CONST_INT:
1282     case CONST_DOUBLE:
1283     case SYMBOL_REF:
1284     case LABEL_REF:
1285     case ADDR_VEC:
1286     case ADDR_DIFF_VEC:
1287       return 1;
1288
1289     default:
1290       break;
1291     }
1292
1293   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1294     {
1295       if (fmt[i] == 'e')
1296         {
1297           /* If we are about to do the last recursive call needed at this
1298              level, change it into iteration.  This function is called enough
1299              to be worth it.  */
1300           if (i == 0)
1301             return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
1302
1303           else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
1304             return 0;
1305         }
1306       else if (fmt[i] == 'E')
1307         for (j = 0; j < XVECLEN (x, i); j++)
1308           if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1309             return 0;
1310     }
1311
1312   return 1;
1313 }
1314
1315 /* Return non-zero if the operands of expression X are unchanged from
1316    the start of INSN's basic block up to but not including INSN.  */
1317
1318 static int
1319 oprs_anticipatable_p (x, insn)
1320      rtx x, insn;
1321 {
1322   return oprs_unchanged_p (x, insn, 0);
1323 }
1324
1325 /* Return non-zero if the operands of expression X are unchanged from
1326    INSN to the end of INSN's basic block.  */
1327
1328 static int
1329 oprs_available_p (x, insn)
1330      rtx x, insn;
1331 {
1332   return oprs_unchanged_p (x, insn, 1);
1333 }
1334
1335 /* Hash expression X.
1336
1337    MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
1338    indicating if a volatile operand is found or if the expression contains
1339    something we don't want to insert in the table.
1340
1341    ??? One might want to merge this with canon_hash.  Later.  */
1342
1343 static unsigned int
1344 hash_expr (x, mode, do_not_record_p, hash_table_size)
1345      rtx x;
1346      enum machine_mode mode;
1347      int *do_not_record_p;
1348      int hash_table_size;
1349 {
1350   unsigned int hash;
1351
1352   *do_not_record_p = 0;
1353
1354   hash = hash_expr_1 (x, mode, do_not_record_p);
1355   return hash % hash_table_size;
1356 }
1357
1358 /* Subroutine of hash_expr to do the actual work.  */
1359
1360 static unsigned int
1361 hash_expr_1 (x, mode, do_not_record_p)
1362      rtx x;
1363      enum machine_mode mode;
1364      int *do_not_record_p;
1365 {
1366   int i, j;
1367   unsigned hash = 0;
1368   enum rtx_code code;
1369   const char *fmt;
1370
1371   /* Used to turn recursion into iteration.  We can't rely on GCC's
1372      tail-recursion eliminatio since we need to keep accumulating values
1373      in HASH.  */
1374
1375   if (x == 0)
1376     return hash;
1377
1378  repeat:
1379   code = GET_CODE (x);
1380   switch (code)
1381     {
1382     case REG:
1383       hash += ((unsigned int) REG << 7) + REGNO (x);
1384       return hash;
1385
1386     case CONST_INT:
1387       hash += (((unsigned int) CONST_INT << 7) + (unsigned int) mode
1388                + (unsigned int) INTVAL (x));
1389       return hash;
1390
1391     case CONST_DOUBLE:
1392       /* This is like the general case, except that it only counts
1393          the integers representing the constant.  */
1394       hash += (unsigned int) code + (unsigned int) GET_MODE (x);
1395       if (GET_MODE (x) != VOIDmode)
1396         for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1397           hash += (unsigned int) XWINT (x, i);
1398       else
1399         hash += ((unsigned int) CONST_DOUBLE_LOW (x)
1400                  + (unsigned int) CONST_DOUBLE_HIGH (x));
1401       return hash;
1402
1403       /* Assume there is only one rtx object for any given label.  */
1404     case LABEL_REF:
1405       /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1406          differences and differences between each stage's debugging dumps.  */
1407       hash += (((unsigned int) LABEL_REF << 7)
1408                + CODE_LABEL_NUMBER (XEXP (x, 0)));
1409       return hash;
1410
1411     case SYMBOL_REF:
1412       {
1413         /* Don't hash on the symbol's address to avoid bootstrap differences.
1414            Different hash values may cause expressions to be recorded in
1415            different orders and thus different registers to be used in the
1416            final assembler.  This also avoids differences in the dump files
1417            between various stages.  */
1418         unsigned int h = 0;
1419         const unsigned char *p = (const unsigned char *) XSTR (x, 0);
1420
1421         while (*p)
1422           h += (h << 7) + *p++; /* ??? revisit */
1423
1424         hash += ((unsigned int) SYMBOL_REF << 7) + h;
1425         return hash;
1426       }
1427
1428     case MEM:
1429       if (MEM_VOLATILE_P (x))
1430         {
1431           *do_not_record_p = 1;
1432           return 0;
1433         }
1434
1435       hash += (unsigned int) MEM;
1436       hash += MEM_ALIAS_SET (x);
1437       x = XEXP (x, 0);
1438       goto repeat;
1439
1440     case PRE_DEC:
1441     case PRE_INC:
1442     case POST_DEC:
1443     case POST_INC:
1444     case PC:
1445     case CC0:
1446     case CALL:
1447     case UNSPEC_VOLATILE:
1448       *do_not_record_p = 1;
1449       return 0;
1450
1451     case ASM_OPERANDS:
1452       if (MEM_VOLATILE_P (x))
1453         {
1454           *do_not_record_p = 1;
1455           return 0;
1456         }
1457
1458     default:
1459       break;
1460     }
1461
1462   hash += (unsigned) code + (unsigned) GET_MODE (x);
1463   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1464     {
1465       if (fmt[i] == 'e')
1466         {
1467           /* If we are about to do the last recursive call
1468              needed at this level, change it into iteration.
1469              This function is called enough to be worth it.  */
1470           if (i == 0)
1471             {
1472               x = XEXP (x, i);
1473               goto repeat;
1474             }
1475
1476           hash += hash_expr_1 (XEXP (x, i), 0, do_not_record_p);
1477           if (*do_not_record_p)
1478             return 0;
1479         }
1480
1481       else if (fmt[i] == 'E')
1482         for (j = 0; j < XVECLEN (x, i); j++)
1483           {
1484             hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1485             if (*do_not_record_p)
1486               return 0;
1487           }
1488
1489       else if (fmt[i] == 's')
1490         {
1491           register const unsigned char *p =
1492             (const unsigned char *) XSTR (x, i);
1493
1494           if (p)
1495             while (*p)
1496               hash += *p++;
1497         }
1498       else if (fmt[i] == 'i')
1499         hash += (unsigned int) XINT (x, i);
1500       else
1501         abort ();
1502     }
1503
1504   return hash;
1505 }
1506
1507 /* Hash a set of register REGNO.
1508
1509    Sets are hashed on the register that is set.  This simplifies the PRE copy
1510    propagation code.
1511
1512    ??? May need to make things more elaborate.  Later, as necessary.  */
1513
1514 static unsigned int
1515 hash_set (regno, hash_table_size)
1516      int regno;
1517      int hash_table_size;
1518 {
1519   unsigned int hash;
1520
1521   hash = regno;
1522   return hash % hash_table_size;
1523 }
1524
1525 /* Return non-zero if exp1 is equivalent to exp2.
1526    ??? Borrowed from cse.c.  Might want to remerge with cse.c.  Later.  */
1527
1528 static int
1529 expr_equiv_p (x, y)
1530      rtx x, y;
1531 {
1532   register int i, j;
1533   register enum rtx_code code;
1534   register const char *fmt;
1535
1536   if (x == y)
1537     return 1;
1538
1539   if (x == 0 || y == 0)
1540     return x == y;
1541
1542   code = GET_CODE (x);
1543   if (code != GET_CODE (y))
1544     return 0;
1545
1546   /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.  */
1547   if (GET_MODE (x) != GET_MODE (y))
1548     return 0;
1549
1550   switch (code)
1551     {
1552     case PC:
1553     case CC0:
1554       return x == y;
1555
1556     case CONST_INT:
1557       return INTVAL (x) == INTVAL (y);
1558
1559     case LABEL_REF:
1560       return XEXP (x, 0) == XEXP (y, 0);
1561
1562     case SYMBOL_REF:
1563       return XSTR (x, 0) == XSTR (y, 0);
1564
1565     case REG:
1566       return REGNO (x) == REGNO (y);
1567
1568     case MEM:
1569       /* Can't merge two expressions in different alias sets, since we can
1570          decide that the expression is transparent in a block when it isn't,
1571          due to it being set with the different alias set.  */
1572       if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1573         return 0;
1574       break;
1575
1576     /*  For commutative operations, check both orders.  */
1577     case PLUS:
1578     case MULT:
1579     case AND:
1580     case IOR:
1581     case XOR:
1582     case NE:
1583     case EQ:
1584       return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1585                && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1586               || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1587                   && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1588
1589     default:
1590       break;
1591     }
1592
1593   /* Compare the elements.  If any pair of corresponding elements
1594      fail to match, return 0 for the whole thing.  */
1595
1596   fmt = GET_RTX_FORMAT (code);
1597   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1598     {
1599       switch (fmt[i])
1600         {
1601         case 'e':
1602           if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1603             return 0;
1604           break;
1605
1606         case 'E':
1607           if (XVECLEN (x, i) != XVECLEN (y, i))
1608             return 0;
1609           for (j = 0; j < XVECLEN (x, i); j++)
1610             if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1611               return 0;
1612           break;
1613
1614         case 's':
1615           if (strcmp (XSTR (x, i), XSTR (y, i)))
1616             return 0;
1617           break;
1618
1619         case 'i':
1620           if (XINT (x, i) != XINT (y, i))
1621             return 0;
1622           break;
1623
1624         case 'w':
1625           if (XWINT (x, i) != XWINT (y, i))
1626             return 0;
1627         break;
1628
1629         case '0':
1630           break;
1631
1632         default:
1633           abort ();
1634         }
1635       }
1636
1637   return 1;
1638 }
1639
1640 /* Insert expression X in INSN in the hash table.
1641    If it is already present, record it as the last occurrence in INSN's
1642    basic block.
1643
1644    MODE is the mode of the value X is being stored into.
1645    It is only used if X is a CONST_INT.
1646
1647    ANTIC_P is non-zero if X is an anticipatable expression.
1648    AVAIL_P is non-zero if X is an available expression.  */
1649
1650 static void
1651 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1652      rtx x;
1653      enum machine_mode mode;
1654      rtx insn;
1655      int antic_p, avail_p;
1656 {
1657   int found, do_not_record_p;
1658   unsigned int hash;
1659   struct expr *cur_expr, *last_expr = NULL;
1660   struct occr *antic_occr, *avail_occr;
1661   struct occr *last_occr = NULL;
1662
1663   hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1664
1665   /* Do not insert expression in table if it contains volatile operands,
1666      or if hash_expr determines the expression is something we don't want
1667      to or can't handle.  */
1668   if (do_not_record_p)
1669     return;
1670
1671   cur_expr = expr_hash_table[hash];
1672   found = 0;
1673
1674   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1675     {
1676       /* If the expression isn't found, save a pointer to the end of
1677          the list.  */
1678       last_expr = cur_expr;
1679       cur_expr = cur_expr->next_same_hash;
1680     }
1681
1682   if (! found)
1683     {
1684       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1685       bytes_used += sizeof (struct expr);
1686       if (expr_hash_table[hash] == NULL)
1687         /* This is the first pattern that hashed to this index.  */
1688         expr_hash_table[hash] = cur_expr;
1689       else
1690         /* Add EXPR to end of this hash chain.  */
1691         last_expr->next_same_hash = cur_expr;
1692
1693       /* Set the fields of the expr element.  */
1694       cur_expr->expr = x;
1695       cur_expr->bitmap_index = n_exprs++;
1696       cur_expr->next_same_hash = NULL;
1697       cur_expr->antic_occr = NULL;
1698       cur_expr->avail_occr = NULL;
1699     }
1700
1701   /* Now record the occurrence(s).  */
1702   if (antic_p)
1703     {
1704       antic_occr = cur_expr->antic_occr;
1705
1706       /* Search for another occurrence in the same basic block.  */
1707       while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1708         {
1709           /* If an occurrence isn't found, save a pointer to the end of
1710              the list.  */
1711           last_occr = antic_occr;
1712           antic_occr = antic_occr->next;
1713         }
1714
1715       if (antic_occr)
1716         /* Found another instance of the expression in the same basic block.
1717            Prefer the currently recorded one.  We want the first one in the
1718            block and the block is scanned from start to end.  */
1719         ; /* nothing to do */
1720       else
1721         {
1722           /* First occurrence of this expression in this basic block.  */
1723           antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1724           bytes_used += sizeof (struct occr);
1725           /* First occurrence of this expression in any block?  */
1726           if (cur_expr->antic_occr == NULL)
1727             cur_expr->antic_occr = antic_occr;
1728           else
1729             last_occr->next = antic_occr;
1730
1731           antic_occr->insn = insn;
1732           antic_occr->next = NULL;
1733         }
1734     }
1735
1736   if (avail_p)
1737     {
1738       avail_occr = cur_expr->avail_occr;
1739
1740       /* Search for another occurrence in the same basic block.  */
1741       while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1742         {
1743           /* If an occurrence isn't found, save a pointer to the end of
1744              the list.  */
1745           last_occr = avail_occr;
1746           avail_occr = avail_occr->next;
1747         }
1748
1749       if (avail_occr)
1750         /* Found another instance of the expression in the same basic block.
1751            Prefer this occurrence to the currently recorded one.  We want
1752            the last one in the block and the block is scanned from start
1753            to end.  */
1754         avail_occr->insn = insn;
1755       else
1756         {
1757           /* First occurrence of this expression in this basic block.  */
1758           avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1759           bytes_used += sizeof (struct occr);
1760
1761           /* First occurrence of this expression in any block?  */
1762           if (cur_expr->avail_occr == NULL)
1763             cur_expr->avail_occr = avail_occr;
1764           else
1765             last_occr->next = avail_occr;
1766
1767           avail_occr->insn = insn;
1768           avail_occr->next = NULL;
1769         }
1770     }
1771 }
1772
1773 /* Insert pattern X in INSN in the hash table.
1774    X is a SET of a reg to either another reg or a constant.
1775    If it is already present, record it as the last occurrence in INSN's
1776    basic block.  */
1777
1778 static void
1779 insert_set_in_table (x, insn)
1780      rtx x;
1781      rtx insn;
1782 {
1783   int found;
1784   unsigned int hash;
1785   struct expr *cur_expr, *last_expr = NULL;
1786   struct occr *cur_occr, *last_occr = NULL;
1787
1788   if (GET_CODE (x) != SET
1789       || GET_CODE (SET_DEST (x)) != REG)
1790     abort ();
1791
1792   hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1793
1794   cur_expr = set_hash_table[hash];
1795   found = 0;
1796
1797   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1798     {
1799       /* If the expression isn't found, save a pointer to the end of
1800          the list.  */
1801       last_expr = cur_expr;
1802       cur_expr = cur_expr->next_same_hash;
1803     }
1804
1805   if (! found)
1806     {
1807       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1808       bytes_used += sizeof (struct expr);
1809       if (set_hash_table[hash] == NULL)
1810         /* This is the first pattern that hashed to this index.  */
1811         set_hash_table[hash] = cur_expr;
1812       else
1813         /* Add EXPR to end of this hash chain.  */
1814         last_expr->next_same_hash = cur_expr;
1815
1816       /* Set the fields of the expr element.
1817          We must copy X because it can be modified when copy propagation is
1818          performed on its operands.  */
1819       /* ??? Should this go in a different obstack?  */
1820       cur_expr->expr = copy_rtx (x);
1821       cur_expr->bitmap_index = n_sets++;
1822       cur_expr->next_same_hash = NULL;
1823       cur_expr->antic_occr = NULL;
1824       cur_expr->avail_occr = NULL;
1825     }
1826
1827   /* Now record the occurrence.  */
1828   cur_occr = cur_expr->avail_occr;
1829
1830   /* Search for another occurrence in the same basic block.  */
1831   while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1832     {
1833       /* If an occurrence isn't found, save a pointer to the end of
1834          the list.  */
1835       last_occr = cur_occr;
1836       cur_occr = cur_occr->next;
1837     }
1838
1839   if (cur_occr)
1840     /* Found another instance of the expression in the same basic block.
1841        Prefer this occurrence to the currently recorded one.  We want the
1842        last one in the block and the block is scanned from start to end.  */
1843     cur_occr->insn = insn;
1844   else
1845     {
1846       /* First occurrence of this expression in this basic block.  */
1847       cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1848       bytes_used += sizeof (struct occr);
1849
1850       /* First occurrence of this expression in any block?  */
1851       if (cur_expr->avail_occr == NULL)
1852         cur_expr->avail_occr = cur_occr;
1853       else
1854         last_occr->next = cur_occr;
1855
1856       cur_occr->insn = insn;
1857       cur_occr->next = NULL;
1858     }
1859 }
1860
1861 /* Scan pattern PAT of INSN and add an entry to the hash table.  If SET_P is
1862    non-zero, this is for the assignment hash table, otherwise it is for the
1863    expression hash table.  */
1864
1865 static void
1866 hash_scan_set (pat, insn, set_p)
1867      rtx pat, insn;
1868      int set_p;
1869 {
1870   rtx src = SET_SRC (pat);
1871   rtx dest = SET_DEST (pat);
1872
1873   if (GET_CODE (src) == CALL)
1874     hash_scan_call (src, insn);
1875
1876   if (GET_CODE (dest) == REG)
1877     {
1878       int regno = REGNO (dest);
1879       rtx tmp;
1880
1881       /* Only record sets of pseudo-regs in the hash table.  */
1882       if (! set_p
1883           && regno >= FIRST_PSEUDO_REGISTER
1884           /* Don't GCSE something if we can't do a reg/reg copy.  */
1885           && can_copy_p [GET_MODE (dest)]
1886           /* Is SET_SRC something we want to gcse?  */
1887           && want_to_gcse_p (src))
1888         {
1889           /* An expression is not anticipatable if its operands are
1890              modified before this insn.  */
1891           int antic_p = oprs_anticipatable_p (src, insn);
1892           /* An expression is not available if its operands are
1893              subsequently modified, including this insn.  */
1894           int avail_p = oprs_available_p (src, insn);
1895
1896           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1897         }
1898
1899       /* Record sets for constant/copy propagation.  */
1900       else if (set_p
1901                && regno >= FIRST_PSEUDO_REGISTER
1902                && ((GET_CODE (src) == REG
1903                     && REGNO (src) >= FIRST_PSEUDO_REGISTER
1904                     && can_copy_p [GET_MODE (dest)])
1905                    || GET_CODE (src) == CONST_INT
1906                    || GET_CODE (src) == SYMBOL_REF
1907                    || GET_CODE (src) == CONST_DOUBLE)
1908                /* A copy is not available if its src or dest is subsequently
1909                   modified.  Here we want to search from INSN+1 on, but
1910                   oprs_available_p searches from INSN on.  */
1911                && (insn == BLOCK_END (BLOCK_NUM (insn))
1912                    || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1913                        && oprs_available_p (pat, tmp))))
1914         insert_set_in_table (pat, insn);
1915     }
1916 }
1917
1918 static void
1919 hash_scan_clobber (x, insn)
1920      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1921 {
1922   /* Currently nothing to do.  */
1923 }
1924
1925 static void
1926 hash_scan_call (x, insn)
1927      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1928 {
1929   /* Currently nothing to do.  */
1930 }
1931
1932 /* Process INSN and add hash table entries as appropriate.
1933
1934    Only available expressions that set a single pseudo-reg are recorded.
1935
1936    Single sets in a PARALLEL could be handled, but it's an extra complication
1937    that isn't dealt with right now.  The trick is handling the CLOBBERs that
1938    are also in the PARALLEL.  Later.
1939
1940    If SET_P is non-zero, this is for the assignment hash table,
1941    otherwise it is for the expression hash table.
1942    If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
1943    not record any expressions.  */
1944
1945 static void
1946 hash_scan_insn (insn, set_p, in_libcall_block)
1947      rtx insn;
1948      int set_p;
1949      int in_libcall_block;
1950 {
1951   rtx pat = PATTERN (insn);
1952   int i;
1953
1954   /* Pick out the sets of INSN and for other forms of instructions record
1955      what's been modified.  */
1956
1957   if (GET_CODE (pat) == SET && ! in_libcall_block)
1958     {
1959       /* Ignore obvious no-ops.  */
1960       if (SET_SRC (pat) != SET_DEST (pat))
1961         hash_scan_set (pat, insn, set_p);
1962     }
1963   else if (GET_CODE (pat) == PARALLEL)
1964     for (i = 0; i < XVECLEN (pat, 0); i++)
1965       {
1966         rtx x = XVECEXP (pat, 0, i);
1967
1968         if (GET_CODE (x) == SET)
1969           {
1970             if (GET_CODE (SET_SRC (x)) == CALL)
1971               hash_scan_call (SET_SRC (x), insn);
1972           }
1973         else if (GET_CODE (x) == CLOBBER)
1974           hash_scan_clobber (x, insn);
1975         else if (GET_CODE (x) == CALL)
1976           hash_scan_call (x, insn);
1977       }
1978
1979   else if (GET_CODE (pat) == CLOBBER)
1980     hash_scan_clobber (pat, insn);
1981   else if (GET_CODE (pat) == CALL)
1982     hash_scan_call (pat, insn);
1983 }
1984
1985 static void
1986 dump_hash_table (file, name, table, table_size, total_size)
1987      FILE *file;
1988      const char *name;
1989      struct expr **table;
1990      int table_size, total_size;
1991 {
1992   int i;
1993   /* Flattened out table, so it's printed in proper order.  */
1994   struct expr **flat_table;
1995   unsigned int *hash_val;
1996   struct expr *expr;
1997
1998   flat_table
1999     = (struct expr **) xcalloc (total_size, sizeof (struct expr *));
2000   hash_val = (unsigned int *) xmalloc (total_size * sizeof (unsigned int));
2001
2002   for (i = 0; i < table_size; i++)
2003     for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
2004       {
2005         flat_table[expr->bitmap_index] = expr;
2006         hash_val[expr->bitmap_index] = i;
2007       }
2008
2009   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2010            name, table_size, total_size);
2011
2012   for (i = 0; i < total_size; i++)
2013     if (flat_table[i] != 0)
2014       {
2015         expr = flat_table[i];
2016         fprintf (file, "Index %d (hash value %d)\n  ",
2017                  expr->bitmap_index, hash_val[i]);
2018         print_rtl (file, expr->expr);
2019         fprintf (file, "\n");
2020       }
2021
2022   fprintf (file, "\n");
2023
2024   free (flat_table);
2025   free (hash_val);
2026 }
2027
2028 /* Record register first/last/block set information for REGNO in INSN.
2029
2030    reg_first_set records the first place in the block where the register
2031    is set and is used to compute "anticipatability".
2032
2033    reg_last_set records the last place in the block where the register
2034    is set and is used to compute "availability".
2035
2036    reg_set_in_block records whether the register is set in the block
2037    and is used to compute "transparency".  */
2038
2039 static void
2040 record_last_reg_set_info (insn, regno)
2041      rtx insn;
2042      int regno;
2043 {
2044   if (reg_first_set[regno] == NEVER_SET)
2045     reg_first_set[regno] = INSN_CUID (insn);
2046
2047   reg_last_set[regno] = INSN_CUID (insn);
2048   SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2049 }
2050
2051 /* Record memory first/last/block set information for INSN.  */
2052
2053 static void
2054 record_last_mem_set_info (insn)
2055      rtx insn;
2056 {
2057   if (mem_first_set == NEVER_SET)
2058     mem_first_set = INSN_CUID (insn);
2059
2060   mem_last_set = INSN_CUID (insn);
2061   mem_set_in_block[BLOCK_NUM (insn)] = 1;
2062 }
2063
2064 /* Called from compute_hash_table via note_stores to handle one
2065    SET or CLOBBER in an insn.  DATA is really the instruction in which
2066    the SET is taking place.  */
2067
2068 static void
2069 record_last_set_info (dest, setter, data)
2070      rtx dest, setter ATTRIBUTE_UNUSED;
2071      void *data;
2072 {
2073   rtx last_set_insn = (rtx) data;
2074
2075   if (GET_CODE (dest) == SUBREG)
2076     dest = SUBREG_REG (dest);
2077
2078   if (GET_CODE (dest) == REG)
2079     record_last_reg_set_info (last_set_insn, REGNO (dest));
2080   else if (GET_CODE (dest) == MEM
2081            /* Ignore pushes, they clobber nothing.  */
2082            && ! push_operand (dest, GET_MODE (dest)))
2083     record_last_mem_set_info (last_set_insn);
2084 }
2085
2086 /* Top level function to create an expression or assignment hash table.
2087
2088    Expression entries are placed in the hash table if
2089    - they are of the form (set (pseudo-reg) src),
2090    - src is something we want to perform GCSE on,
2091    - none of the operands are subsequently modified in the block
2092
2093    Assignment entries are placed in the hash table if
2094    - they are of the form (set (pseudo-reg) src),
2095    - src is something we want to perform const/copy propagation on,
2096    - none of the operands or target are subsequently modified in the block
2097
2098    Currently src must be a pseudo-reg or a const_int.
2099
2100    F is the first insn.
2101    SET_P is non-zero for computing the assignment hash table.  */
2102
2103 static void
2104 compute_hash_table (set_p)
2105      int set_p;
2106 {
2107   int bb;
2108
2109   /* While we compute the hash table we also compute a bit array of which
2110      registers are set in which blocks.
2111      We also compute which blocks set memory, in the absence of aliasing
2112      support [which is TODO].
2113      ??? This isn't needed during const/copy propagation, but it's cheap to
2114      compute.  Later.  */
2115   sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2116   bzero ((char *) mem_set_in_block, n_basic_blocks);
2117
2118   /* Some working arrays used to track first and last set in each block.  */
2119   /* ??? One could use alloca here, but at some size a threshold is crossed
2120      beyond which one should use malloc.  Are we at that threshold here?  */
2121   reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2122   reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2123
2124   for (bb = 0; bb < n_basic_blocks; bb++)
2125     {
2126       rtx insn;
2127       int regno;
2128       int in_libcall_block;
2129       int i;
2130
2131       /* First pass over the instructions records information used to
2132          determine when registers and memory are first and last set.
2133          ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2134          could be moved to compute_sets since they currently don't change.  */
2135
2136       for (i = 0; i < max_gcse_regno; i++)
2137         reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2138       mem_first_set = NEVER_SET;
2139       mem_last_set = NEVER_SET;
2140
2141       for (insn = BLOCK_HEAD (bb);
2142            insn && insn != NEXT_INSN (BLOCK_END (bb));
2143            insn = NEXT_INSN (insn))
2144         {
2145 #ifdef NON_SAVING_SETJMP
2146           if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2147               && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2148             {
2149               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2150                 record_last_reg_set_info (insn, regno);
2151               continue;
2152             }
2153 #endif
2154
2155           if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
2156             continue;
2157
2158           if (GET_CODE (insn) == CALL_INSN)
2159             {
2160               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2161                 if ((call_used_regs[regno]
2162                      && regno != STACK_POINTER_REGNUM
2163 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2164                      && regno != HARD_FRAME_POINTER_REGNUM
2165 #endif
2166 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2167                      && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2168 #endif
2169 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2170                      && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2171 #endif
2172
2173                      && regno != FRAME_POINTER_REGNUM)
2174                     || global_regs[regno])
2175                   record_last_reg_set_info (insn, regno);
2176
2177               if (! CONST_CALL_P (insn))
2178                 record_last_mem_set_info (insn);
2179             }
2180
2181           note_stores (PATTERN (insn), record_last_set_info, insn);
2182         }
2183
2184       /* The next pass builds the hash table.  */
2185
2186       for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2187            insn && insn != NEXT_INSN (BLOCK_END (bb));
2188            insn = NEXT_INSN (insn))
2189         if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
2190           {
2191             if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2192               in_libcall_block = 1;
2193             else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2194               in_libcall_block = 0;
2195             hash_scan_insn (insn, set_p, in_libcall_block);
2196         }
2197     }
2198
2199   free (reg_first_set);
2200   free (reg_last_set);
2201
2202   /* Catch bugs early.  */
2203   reg_first_set = reg_last_set = 0;
2204 }
2205
2206 /* Allocate space for the set hash table.
2207    N_INSNS is the number of instructions in the function.
2208    It is used to determine the number of buckets to use.  */
2209
2210 static void
2211 alloc_set_hash_table (n_insns)
2212      int n_insns;
2213 {
2214   int n;
2215
2216   set_hash_table_size = n_insns / 4;
2217   if (set_hash_table_size < 11)
2218     set_hash_table_size = 11;
2219
2220   /* Attempt to maintain efficient use of hash table.
2221      Making it an odd number is simplest for now.
2222      ??? Later take some measurements.  */
2223   set_hash_table_size |= 1;
2224   n = set_hash_table_size * sizeof (struct expr *);
2225   set_hash_table = (struct expr **) gmalloc (n);
2226 }
2227
2228 /* Free things allocated by alloc_set_hash_table.  */
2229
2230 static void
2231 free_set_hash_table ()
2232 {
2233   free (set_hash_table);
2234 }
2235
2236 /* Compute the hash table for doing copy/const propagation.  */
2237
2238 static void
2239 compute_set_hash_table ()
2240 {
2241   /* Initialize count of number of entries in hash table.  */
2242   n_sets = 0;
2243   bzero ((char *) set_hash_table,
2244          set_hash_table_size * sizeof (struct expr *));
2245
2246   compute_hash_table (1);
2247 }
2248
2249 /* Allocate space for the expression hash table.
2250    N_INSNS is the number of instructions in the function.
2251    It is used to determine the number of buckets to use.  */
2252
2253 static void
2254 alloc_expr_hash_table (n_insns)
2255      int n_insns;
2256 {
2257   int n;
2258
2259   expr_hash_table_size = n_insns / 2;
2260   /* Make sure the amount is usable.  */
2261   if (expr_hash_table_size < 11)
2262     expr_hash_table_size = 11;
2263
2264   /* Attempt to maintain efficient use of hash table.
2265      Making it an odd number is simplest for now.
2266      ??? Later take some measurements.  */
2267   expr_hash_table_size |= 1;
2268   n = expr_hash_table_size * sizeof (struct expr *);
2269   expr_hash_table = (struct expr **) gmalloc (n);
2270 }
2271
2272 /* Free things allocated by alloc_expr_hash_table.  */
2273
2274 static void
2275 free_expr_hash_table ()
2276 {
2277   free (expr_hash_table);
2278 }
2279
2280 /* Compute the hash table for doing GCSE.  */
2281
2282 static void
2283 compute_expr_hash_table ()
2284 {
2285   /* Initialize count of number of entries in hash table.  */
2286   n_exprs = 0;
2287   bzero ((char *) expr_hash_table,
2288          expr_hash_table_size * sizeof (struct expr *));
2289
2290   compute_hash_table (0);
2291 }
2292 \f
2293 /* Expression tracking support.  */
2294
2295 /* Lookup pattern PAT in the expression table.
2296    The result is a pointer to the table entry, or NULL if not found.  */
2297
2298 static struct expr *
2299 lookup_expr (pat)
2300      rtx pat;
2301 {
2302   int do_not_record_p;
2303   unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2304                                  expr_hash_table_size);
2305   struct expr *expr;
2306
2307   if (do_not_record_p)
2308     return NULL;
2309
2310   expr = expr_hash_table[hash];
2311
2312   while (expr && ! expr_equiv_p (expr->expr, pat))
2313     expr = expr->next_same_hash;
2314
2315   return expr;
2316 }
2317
2318 /* Lookup REGNO in the set table.  If PAT is non-NULL look for the entry that
2319    matches it, otherwise return the first entry for REGNO.  The result is a
2320    pointer to the table entry, or NULL if not found.  */
2321
2322 static struct expr *
2323 lookup_set (regno, pat)
2324      int regno;
2325      rtx pat;
2326 {
2327   unsigned int hash = hash_set (regno, set_hash_table_size);
2328   struct expr *expr;
2329
2330   expr = set_hash_table[hash];
2331
2332   if (pat)
2333     {
2334       while (expr && ! expr_equiv_p (expr->expr, pat))
2335         expr = expr->next_same_hash;
2336     }
2337   else
2338     {
2339       while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2340         expr = expr->next_same_hash;
2341     }
2342
2343   return expr;
2344 }
2345
2346 /* Return the next entry for REGNO in list EXPR.  */
2347
2348 static struct expr *
2349 next_set (regno, expr)
2350      int regno;
2351      struct expr *expr;
2352 {
2353   do
2354     expr = expr->next_same_hash;
2355   while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2356
2357   return expr;
2358 }
2359
2360 /* Reset tables used to keep track of what's still available [since the
2361    start of the block].  */
2362
2363 static void
2364 reset_opr_set_tables ()
2365 {
2366   /* Maintain a bitmap of which regs have been set since beginning of
2367      the block.  */
2368   sbitmap_zero (reg_set_bitmap);
2369
2370   /* Also keep a record of the last instruction to modify memory.
2371      For now this is very trivial, we only record whether any memory
2372      location has been modified.  */
2373   mem_last_set = 0;
2374 }
2375
2376 /* Return non-zero if the operands of X are not set before INSN in
2377    INSN's basic block.  */
2378
2379 static int
2380 oprs_not_set_p (x, insn)
2381      rtx x, insn;
2382 {
2383   int i, j;
2384   enum rtx_code code;
2385   const char *fmt;
2386
2387   if (x == 0)
2388     return 1;
2389
2390   code = GET_CODE (x);
2391   switch (code)
2392     {
2393     case PC:
2394     case CC0:
2395     case CONST:
2396     case CONST_INT:
2397     case CONST_DOUBLE:
2398     case SYMBOL_REF:
2399     case LABEL_REF:
2400     case ADDR_VEC:
2401     case ADDR_DIFF_VEC:
2402       return 1;
2403
2404     case MEM:
2405       if (mem_last_set != 0)
2406         return 0;
2407       else
2408         return oprs_not_set_p (XEXP (x, 0), insn);
2409
2410     case REG:
2411       return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2412
2413     default:
2414       break;
2415     }
2416
2417   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2418     {
2419       if (fmt[i] == 'e')
2420         {
2421           /* If we are about to do the last recursive call
2422              needed at this level, change it into iteration.
2423              This function is called enough to be worth it.  */
2424           if (i == 0)
2425             return oprs_not_set_p (XEXP (x, i), insn);
2426
2427           if (! oprs_not_set_p (XEXP (x, i), insn))
2428             return 0;
2429         }
2430       else if (fmt[i] == 'E')
2431         for (j = 0; j < XVECLEN (x, i); j++)
2432           if (! oprs_not_set_p (XVECEXP (x, i, j), insn))
2433             return 0;
2434     }
2435
2436   return 1;
2437 }
2438
2439 /* Mark things set by a CALL.  */
2440
2441 static void
2442 mark_call (insn)
2443      rtx insn;
2444 {
2445   mem_last_set = INSN_CUID (insn);
2446 }
2447
2448 /* Mark things set by a SET.  */
2449
2450 static void
2451 mark_set (pat, insn)
2452      rtx pat, insn;
2453 {
2454   rtx dest = SET_DEST (pat);
2455
2456   while (GET_CODE (dest) == SUBREG
2457          || GET_CODE (dest) == ZERO_EXTRACT
2458          || GET_CODE (dest) == SIGN_EXTRACT
2459          || GET_CODE (dest) == STRICT_LOW_PART)
2460     dest = XEXP (dest, 0);
2461
2462   if (GET_CODE (dest) == REG)
2463     SET_BIT (reg_set_bitmap, REGNO (dest));
2464   else if (GET_CODE (dest) == MEM)
2465     mem_last_set = INSN_CUID (insn);
2466
2467   if (GET_CODE (SET_SRC (pat)) == CALL)
2468     mark_call (insn);
2469 }
2470
2471 /* Record things set by a CLOBBER.  */
2472
2473 static void
2474 mark_clobber (pat, insn)
2475      rtx pat, insn;
2476 {
2477   rtx clob = XEXP (pat, 0);
2478
2479   while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2480     clob = XEXP (clob, 0);
2481
2482   if (GET_CODE (clob) == REG)
2483     SET_BIT (reg_set_bitmap, REGNO (clob));
2484   else
2485     mem_last_set = INSN_CUID (insn);
2486 }
2487
2488 /* Record things set by INSN.
2489    This data is used by oprs_not_set_p.  */
2490
2491 static void
2492 mark_oprs_set (insn)
2493      rtx insn;
2494 {
2495   rtx pat = PATTERN (insn);
2496   int i;
2497
2498   if (GET_CODE (pat) == SET)
2499     mark_set (pat, insn);
2500   else if (GET_CODE (pat) == PARALLEL)
2501     for (i = 0; i < XVECLEN (pat, 0); i++)
2502       {
2503         rtx x = XVECEXP (pat, 0, i);
2504
2505         if (GET_CODE (x) == SET)
2506           mark_set (x, insn);
2507         else if (GET_CODE (x) == CLOBBER)
2508           mark_clobber (x, insn);
2509         else if (GET_CODE (x) == CALL)
2510           mark_call (insn);
2511       }
2512
2513   else if (GET_CODE (pat) == CLOBBER)
2514     mark_clobber (pat, insn);
2515   else if (GET_CODE (pat) == CALL)
2516     mark_call (insn);
2517 }
2518
2519 \f
2520 /* Classic GCSE reaching definition support.  */
2521
2522 /* Allocate reaching def variables.  */
2523
2524 static void
2525 alloc_rd_mem (n_blocks, n_insns)
2526      int n_blocks, n_insns;
2527 {
2528   rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2529   sbitmap_vector_zero (rd_kill, n_basic_blocks);
2530
2531   rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2532   sbitmap_vector_zero (rd_gen, n_basic_blocks);
2533
2534   reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2535   sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2536
2537   rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2538   sbitmap_vector_zero (rd_out, n_basic_blocks);
2539 }
2540
2541 /* Free reaching def variables.  */
2542
2543 static void
2544 free_rd_mem ()
2545 {
2546   free (rd_kill);
2547   free (rd_gen);
2548   free (reaching_defs);
2549   free (rd_out);
2550 }
2551
2552 /* Add INSN to the kills of BB.  REGNO, set in BB, is killed by INSN.  */
2553
2554 static void
2555 handle_rd_kill_set (insn, regno, bb)
2556      rtx insn;
2557      int regno, bb;
2558 {
2559   struct reg_set *this_reg;
2560
2561   for (this_reg = reg_set_table[regno]; this_reg; this_reg = this_reg ->next)
2562     if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2563       SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2564 }
2565
2566 /* Compute the set of kill's for reaching definitions.  */
2567
2568 static void
2569 compute_kill_rd ()
2570 {
2571   int bb, cuid;
2572   int regno, i;
2573
2574   /* For each block
2575        For each set bit in `gen' of the block (i.e each insn which
2576            generates a definition in the block)
2577          Call the reg set by the insn corresponding to that bit regx
2578          Look at the linked list starting at reg_set_table[regx]
2579          For each setting of regx in the linked list, which is not in
2580              this block
2581            Set the bit in `kill' corresponding to that insn.   */
2582   for (bb = 0; bb < n_basic_blocks; bb++)
2583     for (cuid = 0; cuid < max_cuid; cuid++)
2584       if (TEST_BIT (rd_gen[bb], cuid))
2585         {
2586           rtx insn = CUID_INSN (cuid);
2587           rtx pat = PATTERN (insn);
2588
2589           if (GET_CODE (insn) == CALL_INSN)
2590             {
2591               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2592                 {
2593                   if ((call_used_regs[regno]
2594                        && regno != STACK_POINTER_REGNUM
2595 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2596                        && regno != HARD_FRAME_POINTER_REGNUM
2597 #endif
2598 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2599                        && ! (regno == ARG_POINTER_REGNUM
2600                              && fixed_regs[regno])
2601 #endif
2602 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2603                        && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2604 #endif
2605                        && regno != FRAME_POINTER_REGNUM)
2606                       || global_regs[regno])
2607                     handle_rd_kill_set (insn, regno, bb);
2608                 }
2609             }
2610
2611           if (GET_CODE (pat) == PARALLEL)
2612             {
2613               for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2614                 {
2615                   enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2616
2617                   if ((code == SET || code == CLOBBER)
2618                       && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2619                     handle_rd_kill_set (insn,
2620                                         REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2621                                         bb);
2622                 }
2623             }
2624           else if (GET_CODE (pat) == SET && GET_CODE (SET_DEST (pat)) == REG)
2625             /* Each setting of this register outside of this block
2626                must be marked in the set of kills in this block.  */
2627             handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2628         }
2629 }
2630
2631 /* Compute the reaching definitions as in
2632    Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2633    Chapter 10.  It is the same algorithm as used for computing available
2634    expressions but applied to the gens and kills of reaching definitions.  */
2635
2636 static void
2637 compute_rd ()
2638 {
2639   int bb, changed, passes;
2640
2641   for (bb = 0; bb < n_basic_blocks; bb++)
2642     sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2643
2644   passes = 0;
2645   changed = 1;
2646   while (changed)
2647     {
2648       changed = 0;
2649       for (bb = 0; bb < n_basic_blocks; bb++)
2650         {
2651           sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
2652           changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2653                                             reaching_defs[bb], rd_kill[bb]);
2654         }
2655       passes++;
2656     }
2657
2658   if (gcse_file)
2659     fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2660 }
2661 \f
2662 /* Classic GCSE available expression support.  */
2663
2664 /* Allocate memory for available expression computation.  */
2665
2666 static void
2667 alloc_avail_expr_mem (n_blocks, n_exprs)
2668      int n_blocks, n_exprs;
2669 {
2670   ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2671   sbitmap_vector_zero (ae_kill, n_basic_blocks);
2672
2673   ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2674   sbitmap_vector_zero (ae_gen, n_basic_blocks);
2675
2676   ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2677   sbitmap_vector_zero (ae_in, n_basic_blocks);
2678
2679   ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2680   sbitmap_vector_zero (ae_out, n_basic_blocks);
2681
2682   u_bitmap = (sbitmap) sbitmap_alloc (n_exprs);
2683   sbitmap_ones (u_bitmap);
2684 }
2685
2686 static void
2687 free_avail_expr_mem ()
2688 {
2689   free (ae_kill);
2690   free (ae_gen);
2691   free (ae_in);
2692   free (ae_out);
2693   free (u_bitmap);
2694 }
2695
2696 /* Compute the set of available expressions generated in each basic block.  */
2697
2698 static void
2699 compute_ae_gen ()
2700 {
2701   int i;
2702   struct expr *expr;
2703   struct occr *occr;
2704
2705   /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2706      This is all we have to do because an expression is not recorded if it
2707      is not available, and the only expressions we want to work with are the
2708      ones that are recorded.  */
2709   for (i = 0; i < expr_hash_table_size; i++)
2710     for (expr = expr_hash_table[i]; expr != 0; expr = expr->next_same_hash)
2711       for (occr = expr->avail_occr; occr != 0; occr = occr->next)
2712         SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2713 }
2714
2715 /* Return non-zero if expression X is killed in BB.  */
2716
2717 static int
2718 expr_killed_p (x, bb)
2719      rtx x;
2720      int bb;
2721 {
2722   int i, j;
2723   enum rtx_code code;
2724   const char *fmt;
2725
2726   if (x == 0)
2727     return 1;
2728
2729   code = GET_CODE (x);
2730   switch (code)
2731     {
2732     case REG:
2733       return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2734
2735     case MEM:
2736       if (mem_set_in_block[bb])
2737         return 1;
2738       else
2739         return expr_killed_p (XEXP (x, 0), bb);
2740
2741     case PC:
2742     case CC0: /*FIXME*/
2743     case CONST:
2744     case CONST_INT:
2745     case CONST_DOUBLE:
2746     case SYMBOL_REF:
2747     case LABEL_REF:
2748     case ADDR_VEC:
2749     case ADDR_DIFF_VEC:
2750       return 0;
2751
2752     default:
2753       break;
2754     }
2755
2756   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2757     {
2758       if (fmt[i] == 'e')
2759         {
2760           /* If we are about to do the last recursive call
2761              needed at this level, change it into iteration.
2762              This function is called enough to be worth it.  */
2763           if (i == 0)
2764             return expr_killed_p (XEXP (x, i), bb);
2765           else if (expr_killed_p (XEXP (x, i), bb))
2766             return 1;
2767         }
2768       else if (fmt[i] == 'E')
2769         for (j = 0; j < XVECLEN (x, i); j++)
2770           if (expr_killed_p (XVECEXP (x, i, j), bb))
2771             return 1;
2772     }
2773
2774   return 0;
2775 }
2776
2777 /* Compute the set of available expressions killed in each basic block.  */
2778
2779 static void
2780 compute_ae_kill (ae_gen, ae_kill)
2781      sbitmap *ae_gen, *ae_kill;
2782 {
2783   int bb, i;
2784   struct expr *expr;
2785
2786   for (bb = 0; bb < n_basic_blocks; bb++)
2787     for (i = 0; i < expr_hash_table_size; i++)
2788       for (expr = expr_hash_table[i]; expr; expr = expr->next_same_hash)
2789         {
2790           /* Skip EXPR if generated in this block.  */
2791           if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2792             continue;
2793
2794           if (expr_killed_p (expr->expr, bb))
2795             SET_BIT (ae_kill[bb], expr->bitmap_index);
2796         }
2797 }
2798 \f
2799 /* Actually perform the Classic GCSE optimizations.  */
2800
2801 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2802
2803    CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2804    as a positive reach.  We want to do this when there are two computations
2805    of the expression in the block.
2806
2807    VISITED is a pointer to a working buffer for tracking which BB's have
2808    been visited.  It is NULL for the top-level call.
2809
2810    We treat reaching expressions that go through blocks containing the same
2811    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2812    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2813    2 as not reaching.  The intent is to improve the probability of finding
2814    only one reaching expression and to reduce register lifetimes by picking
2815    the closest such expression.  */
2816
2817 static int
2818 expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
2819      struct occr *occr;
2820      struct expr *expr;
2821      int bb;
2822      int check_self_loop;
2823      char *visited;
2824 {
2825   edge pred;
2826
2827   for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
2828     {
2829       int pred_bb = pred->src->index;
2830
2831       if (visited[pred_bb])
2832         /* This predecessor has already been visited. Nothing to do.  */
2833           ;
2834       else if (pred_bb == bb)
2835         {
2836           /* BB loops on itself.  */
2837           if (check_self_loop
2838               && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2839               && BLOCK_NUM (occr->insn) == pred_bb)
2840             return 1;
2841
2842           visited[pred_bb] = 1;
2843         }
2844
2845       /* Ignore this predecessor if it kills the expression.  */
2846       else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2847         visited[pred_bb] = 1;
2848
2849       /* Does this predecessor generate this expression?  */
2850       else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2851         {
2852           /* Is this the occurrence we're looking for?
2853              Note that there's only one generating occurrence per block
2854              so we just need to check the block number.  */
2855           if (BLOCK_NUM (occr->insn) == pred_bb)
2856             return 1;
2857
2858           visited[pred_bb] = 1;
2859         }
2860
2861       /* Neither gen nor kill.  */
2862       else
2863         {
2864           visited[pred_bb] = 1;
2865           if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
2866               visited))
2867
2868             return 1;
2869         }
2870     }
2871
2872   /* All paths have been checked.  */
2873   return 0;
2874 }
2875
2876 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
2877    memory allocated for that function is returned. */
2878
2879 static int
2880 expr_reaches_here_p (occr, expr, bb, check_self_loop)
2881      struct occr *occr;
2882      struct expr *expr;
2883      int bb;
2884      int check_self_loop;
2885 {
2886   int rval;
2887   char *visited = (char *) xcalloc (n_basic_blocks, 1);
2888
2889   rval = expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited);
2890
2891   free (visited);
2892   return rval;
2893 }
2894
2895 /* Return the instruction that computes EXPR that reaches INSN's basic block.
2896    If there is more than one such instruction, return NULL.
2897
2898    Called only by handle_avail_expr.  */
2899
2900 static rtx
2901 computing_insn (expr, insn)
2902      struct expr *expr;
2903      rtx insn;
2904 {
2905   int bb = BLOCK_NUM (insn);
2906
2907   if (expr->avail_occr->next == NULL)
2908     {
2909       if (BLOCK_NUM (expr->avail_occr->insn) == bb)
2910         /* The available expression is actually itself
2911            (i.e. a loop in the flow graph) so do nothing.  */
2912         return NULL;
2913
2914       /* (FIXME) Case that we found a pattern that was created by
2915          a substitution that took place.  */
2916       return expr->avail_occr->insn;
2917     }
2918   else
2919     {
2920       /* Pattern is computed more than once.
2921          Search backwards from this insn to see how many of these
2922          computations actually reach this insn.  */
2923       struct occr *occr;
2924       rtx insn_computes_expr = NULL;
2925       int can_reach = 0;
2926
2927       for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
2928         {
2929           if (BLOCK_NUM (occr->insn) == bb)
2930             {
2931               /* The expression is generated in this block.
2932                  The only time we care about this is when the expression
2933                  is generated later in the block [and thus there's a loop].
2934                  We let the normal cse pass handle the other cases.  */
2935               if (INSN_CUID (insn) < INSN_CUID (occr->insn)
2936                   && expr_reaches_here_p (occr, expr, bb, 1))
2937                 {
2938                   can_reach++;
2939                   if (can_reach > 1)
2940                     return NULL;
2941
2942                   insn_computes_expr = occr->insn;
2943                 }
2944             }
2945           else if (expr_reaches_here_p (occr, expr, bb, 0))
2946             {
2947               can_reach++;
2948               if (can_reach > 1)
2949                 return NULL;
2950
2951               insn_computes_expr = occr->insn;
2952             }
2953         }
2954
2955       if (insn_computes_expr == NULL)
2956         abort ();
2957
2958       return insn_computes_expr;
2959     }
2960 }
2961
2962 /* Return non-zero if the definition in DEF_INSN can reach INSN.
2963    Only called by can_disregard_other_sets.  */
2964
2965 static int
2966 def_reaches_here_p (insn, def_insn)
2967      rtx insn, def_insn;
2968 {
2969   rtx reg;
2970
2971   if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
2972     return 1;
2973
2974   if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
2975     {
2976       if (INSN_CUID (def_insn) < INSN_CUID (insn))
2977         {
2978           if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
2979             return 1;
2980           else if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
2981             reg = XEXP (PATTERN (def_insn), 0);
2982           else if (GET_CODE (PATTERN (def_insn)) == SET)
2983             reg = SET_DEST (PATTERN (def_insn));
2984           else
2985             abort ();
2986
2987           return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
2988         }
2989       else
2990         return 0;
2991     }
2992
2993   return 0;
2994 }
2995
2996 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN.  The
2997    value returned is the number of definitions that reach INSN.  Returning a
2998    value of zero means that [maybe] more than one definition reaches INSN and
2999    the caller can't perform whatever optimization it is trying.  i.e. it is
3000    always safe to return zero.  */
3001
3002 static int
3003 can_disregard_other_sets (addr_this_reg, insn, for_combine)
3004      struct reg_set **addr_this_reg;
3005      rtx insn;
3006      int for_combine;
3007 {
3008   int number_of_reaching_defs = 0;
3009   struct reg_set *this_reg;
3010
3011   for (this_reg = *addr_this_reg; this_reg != 0; this_reg = this_reg->next)
3012     if (def_reaches_here_p (insn, this_reg->insn))
3013       {
3014         number_of_reaching_defs++;
3015         /* Ignore parallels for now.  */
3016         if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3017           return 0;
3018
3019         if (!for_combine
3020             && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3021                 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3022                                   SET_SRC (PATTERN (insn)))))
3023           /* A setting of the reg to a different value reaches INSN.  */
3024           return 0;
3025
3026         if (number_of_reaching_defs > 1)
3027           {
3028             /* If in this setting the value the register is being set to is
3029                equal to the previous value the register was set to and this
3030                setting reaches the insn we are trying to do the substitution
3031                on then we are ok.  */
3032             if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3033               return 0;
3034             else if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3035                                     SET_SRC (PATTERN (insn))))
3036               return 0;
3037           }
3038
3039         *addr_this_reg = this_reg;
3040       }
3041
3042   return number_of_reaching_defs;
3043 }
3044
3045 /* Expression computed by insn is available and the substitution is legal,
3046    so try to perform the substitution.
3047
3048    The result is non-zero if any changes were made.  */
3049
3050 static int
3051 handle_avail_expr (insn, expr)
3052      rtx insn;
3053      struct expr *expr;
3054 {
3055   rtx pat, insn_computes_expr;
3056   rtx to;
3057   struct reg_set *this_reg;
3058   int found_setting, use_src;
3059   int changed = 0;
3060
3061   /* We only handle the case where one computation of the expression
3062      reaches this instruction.  */
3063   insn_computes_expr = computing_insn (expr, insn);
3064   if (insn_computes_expr == NULL)
3065     return 0;
3066
3067   found_setting = 0;
3068   use_src = 0;
3069
3070   /* At this point we know only one computation of EXPR outside of this
3071      block reaches this insn.  Now try to find a register that the
3072      expression is computed into.  */
3073   if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3074     {
3075       /* This is the case when the available expression that reaches
3076          here has already been handled as an available expression.  */
3077       int regnum_for_replacing
3078         = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3079
3080       /* If the register was created by GCSE we can't use `reg_set_table',
3081          however we know it's set only once.  */
3082       if (regnum_for_replacing >= max_gcse_regno
3083           /* If the register the expression is computed into is set only once,
3084              or only one set reaches this insn, we can use it.  */
3085           || (((this_reg = reg_set_table[regnum_for_replacing]),
3086                this_reg->next == NULL)
3087               || can_disregard_other_sets (&this_reg, insn, 0)))
3088        {
3089          use_src = 1;
3090          found_setting = 1;
3091        }
3092     }
3093
3094   if (!found_setting)
3095     {
3096       int regnum_for_replacing
3097         = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3098
3099       /* This shouldn't happen.  */
3100       if (regnum_for_replacing >= max_gcse_regno)
3101         abort ();
3102
3103       this_reg = reg_set_table[regnum_for_replacing];
3104
3105       /* If the register the expression is computed into is set only once,
3106          or only one set reaches this insn, use it.  */
3107       if (this_reg->next == NULL
3108           || can_disregard_other_sets (&this_reg, insn, 0))
3109         found_setting = 1;
3110     }
3111
3112   if (found_setting)
3113     {
3114       pat = PATTERN (insn);
3115       if (use_src)
3116         to = SET_SRC (PATTERN (insn_computes_expr));
3117       else
3118         to = SET_DEST (PATTERN (insn_computes_expr));
3119       changed = validate_change (insn, &SET_SRC (pat), to, 0);
3120
3121       /* We should be able to ignore the return code from validate_change but
3122          to play it safe we check.  */
3123       if (changed)
3124         {
3125           gcse_subst_count++;
3126           if (gcse_file != NULL)
3127             {
3128               fprintf (gcse_file, "GCSE: Replacing the source in insn %d with",
3129                        INSN_UID (insn));
3130               fprintf (gcse_file, " reg %d %s insn %d\n",
3131                        REGNO (to), use_src ? "from" : "set in",
3132                        INSN_UID (insn_computes_expr));
3133             }
3134         }
3135     }
3136
3137   /* The register that the expr is computed into is set more than once.  */
3138   else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3139     {
3140       /* Insert an insn after insnx that copies the reg set in insnx
3141          into a new pseudo register call this new register REGN.
3142          From insnb until end of basic block or until REGB is set
3143          replace all uses of REGB with REGN.  */
3144       rtx new_insn;
3145
3146       to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3147
3148       /* Generate the new insn.  */
3149       /* ??? If the change fails, we return 0, even though we created
3150          an insn.  I think this is ok.  */
3151       new_insn
3152         = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3153                                         SET_DEST (PATTERN
3154                                                   (insn_computes_expr))),
3155                            insn_computes_expr);
3156
3157       /* Keep block number table up to date.  */
3158       set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3159
3160       /* Keep register set table up to date.  */
3161       record_one_set (REGNO (to), new_insn);
3162
3163       gcse_create_count++;
3164       if (gcse_file != NULL)
3165         {
3166           fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d",
3167                    INSN_UID (NEXT_INSN (insn_computes_expr)),
3168                    REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))));
3169           fprintf (gcse_file, ", computed in insn %d,\n",
3170                    INSN_UID (insn_computes_expr));
3171           fprintf (gcse_file, "      into newly allocated reg %d\n",
3172                    REGNO (to));
3173         }
3174
3175       pat = PATTERN (insn);
3176
3177       /* Do register replacement for INSN.  */
3178       changed = validate_change (insn, &SET_SRC (pat),
3179                                  SET_DEST (PATTERN
3180                                            (NEXT_INSN (insn_computes_expr))),
3181                                  0);
3182
3183       /* We should be able to ignore the return code from validate_change but
3184          to play it safe we check.  */
3185       if (changed)
3186         {
3187           gcse_subst_count++;
3188           if (gcse_file != NULL)
3189             {
3190               fprintf (gcse_file,
3191                        "GCSE: Replacing the source in insn %d with reg %d ",
3192                        INSN_UID (insn),
3193                        REGNO (SET_DEST (PATTERN (NEXT_INSN
3194                                                  (insn_computes_expr)))));
3195               fprintf (gcse_file, "set in insn %d\n",
3196                        INSN_UID (insn_computes_expr));
3197             }
3198         }
3199     }
3200
3201   return changed;
3202 }
3203
3204 /* Perform classic GCSE.  This is called by one_classic_gcse_pass after all
3205    the dataflow analysis has been done.
3206
3207    The result is non-zero if a change was made.  */
3208
3209 static int
3210 classic_gcse ()
3211 {
3212   int bb, changed;
3213   rtx insn;
3214
3215   /* Note we start at block 1.  */
3216
3217   changed = 0;
3218   for (bb = 1; bb < n_basic_blocks; bb++)
3219     {
3220       /* Reset tables used to keep track of what's still valid [since the
3221          start of the block].  */
3222       reset_opr_set_tables ();
3223
3224       for (insn = BLOCK_HEAD (bb);
3225            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3226            insn = NEXT_INSN (insn))
3227         {
3228           /* Is insn of form (set (pseudo-reg) ...)?  */
3229           if (GET_CODE (insn) == INSN
3230               && GET_CODE (PATTERN (insn)) == SET
3231               && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3232               && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3233             {
3234               rtx pat = PATTERN (insn);
3235               rtx src = SET_SRC (pat);
3236               struct expr *expr;
3237
3238               if (want_to_gcse_p (src)
3239                   /* Is the expression recorded?  */
3240                   && ((expr = lookup_expr (src)) != NULL)
3241                   /* Is the expression available [at the start of the
3242                      block]?  */
3243                   && TEST_BIT (ae_in[bb], expr->bitmap_index)
3244                   /* Are the operands unchanged since the start of the
3245                      block?  */
3246                   && oprs_not_set_p (src, insn))
3247                 changed |= handle_avail_expr (insn, expr);
3248             }
3249
3250           /* Keep track of everything modified by this insn.  */
3251           /* ??? Need to be careful w.r.t. mods done to INSN.  */
3252           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3253             mark_oprs_set (insn);
3254         }
3255     }
3256
3257   return changed;
3258 }
3259
3260 /* Top level routine to perform one classic GCSE pass.
3261
3262    Return non-zero if a change was made.  */
3263
3264 static int
3265 one_classic_gcse_pass (pass)
3266      int pass;
3267 {
3268   int changed = 0;
3269
3270   gcse_subst_count = 0;
3271   gcse_create_count = 0;
3272
3273   alloc_expr_hash_table (max_cuid);
3274   alloc_rd_mem (n_basic_blocks, max_cuid);
3275   compute_expr_hash_table ();
3276   if (gcse_file)
3277     dump_hash_table (gcse_file, "Expression", expr_hash_table,
3278                      expr_hash_table_size, n_exprs);
3279
3280   if (n_exprs > 0)
3281     {
3282       compute_kill_rd ();
3283       compute_rd ();
3284       alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3285       compute_ae_gen ();
3286       compute_ae_kill (ae_gen, ae_kill);
3287       compute_available (ae_gen, ae_kill, ae_out, ae_in);
3288       changed = classic_gcse ();
3289       free_avail_expr_mem ();
3290     }
3291
3292   free_rd_mem ();
3293   free_expr_hash_table ();
3294
3295   if (gcse_file)
3296     {
3297       fprintf (gcse_file, "\n");
3298       fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs,",
3299                current_function_name, pass, bytes_used, gcse_subst_count);
3300       fprintf (gcse_file, "%d insns created\n", gcse_create_count);
3301     }
3302
3303   return changed;
3304 }
3305 \f
3306 /* Compute copy/constant propagation working variables.  */
3307
3308 /* Local properties of assignments.  */
3309 static sbitmap *cprop_pavloc;
3310 static sbitmap *cprop_absaltered;
3311
3312 /* Global properties of assignments (computed from the local properties).  */
3313 static sbitmap *cprop_avin;
3314 static sbitmap *cprop_avout;
3315
3316 /* Allocate vars used for copy/const propagation.  N_BLOCKS is the number of
3317    basic blocks.  N_SETS is the number of sets.  */
3318
3319 static void
3320 alloc_cprop_mem (n_blocks, n_sets)
3321      int n_blocks, n_sets;
3322 {
3323   cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3324   cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3325
3326   cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3327   cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3328 }
3329
3330 /* Free vars used by copy/const propagation.  */
3331
3332 static void
3333 free_cprop_mem ()
3334 {
3335   free (cprop_pavloc);
3336   free (cprop_absaltered);
3337   free (cprop_avin);
3338   free (cprop_avout);
3339 }
3340
3341 /* For each block, compute whether X is transparent.  X is either an
3342    expression or an assignment [though we don't care which, for this context
3343    an assignment is treated as an expression].  For each block where an
3344    element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
3345    bit in BMAP.  */
3346
3347 static void
3348 compute_transp (x, indx, bmap, set_p)
3349      rtx x;
3350      int indx;
3351      sbitmap *bmap;
3352      int set_p;
3353 {
3354   int bb, i, j;
3355   enum rtx_code code;
3356   reg_set *r;
3357   const char *fmt;
3358
3359   /* repeat is used to turn tail-recursion into iteration since GCC
3360      can't do it when there's no return value.  */
3361  repeat:
3362
3363   if (x == 0)
3364     return;
3365
3366   code = GET_CODE (x);
3367   switch (code)
3368     {
3369     case REG:
3370       if (set_p)
3371         {
3372           if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3373             {
3374               for (bb = 0; bb < n_basic_blocks; bb++)
3375                 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3376                   SET_BIT (bmap[bb], indx);
3377             }
3378           else
3379             {
3380               for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3381                 SET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3382             }
3383         }
3384       else
3385         {
3386           if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3387             {
3388               for (bb = 0; bb < n_basic_blocks; bb++)
3389                 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3390                   RESET_BIT (bmap[bb], indx);
3391             }
3392           else
3393             {
3394               for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3395                 RESET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3396             }
3397         }
3398
3399       return;
3400
3401     case MEM:
3402       if (set_p)
3403         {
3404           for (bb = 0; bb < n_basic_blocks; bb++)
3405             if (mem_set_in_block[bb])
3406               SET_BIT (bmap[bb], indx);
3407         }
3408       else
3409         {
3410           for (bb = 0; bb < n_basic_blocks; bb++)
3411             if (mem_set_in_block[bb])
3412               RESET_BIT (bmap[bb], indx);
3413         }
3414
3415       x = XEXP (x, 0);
3416       goto repeat;
3417
3418     case PC:
3419     case CC0: /*FIXME*/
3420     case CONST:
3421     case CONST_INT:
3422     case CONST_DOUBLE:
3423     case SYMBOL_REF:
3424     case LABEL_REF:
3425     case ADDR_VEC:
3426     case ADDR_DIFF_VEC:
3427       return;
3428
3429     default:
3430       break;
3431     }
3432
3433   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3434     {
3435       if (fmt[i] == 'e')
3436         {
3437           /* If we are about to do the last recursive call
3438              needed at this level, change it into iteration.
3439              This function is called enough to be worth it.  */
3440           if (i == 0)
3441             {
3442               x = XEXP (x, i);
3443               goto repeat;
3444             }
3445
3446           compute_transp (XEXP (x, i), indx, bmap, set_p);
3447         }
3448       else if (fmt[i] == 'E')
3449         for (j = 0; j < XVECLEN (x, i); j++)
3450           compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3451     }
3452 }
3453
3454 /* Top level routine to do the dataflow analysis needed by copy/const
3455    propagation.  */
3456
3457 static void
3458 compute_cprop_data ()
3459 {
3460   compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
3461   compute_available (cprop_pavloc, cprop_absaltered,
3462                      cprop_avout, cprop_avin);
3463 }
3464 \f
3465 /* Copy/constant propagation.  */
3466
3467 /* Maximum number of register uses in an insn that we handle.  */
3468 #define MAX_USES 8
3469
3470 /* Table of uses found in an insn.
3471    Allocated statically to avoid alloc/free complexity and overhead.  */
3472 static struct reg_use reg_use_table[MAX_USES];
3473
3474 /* Index into `reg_use_table' while building it.  */
3475 static int reg_use_count;
3476
3477 /* Set up a list of register numbers used in INSN.  The found uses are stored
3478    in `reg_use_table'.  `reg_use_count' is initialized to zero before entry,
3479    and contains the number of uses in the table upon exit.
3480
3481    ??? If a register appears multiple times we will record it multiple times.
3482    This doesn't hurt anything but it will slow things down.  */
3483
3484 static void
3485 find_used_regs (x)
3486      rtx x;
3487 {
3488   int i, j;
3489   enum rtx_code code;
3490   const char *fmt;
3491
3492   /* repeat is used to turn tail-recursion into iteration since GCC
3493      can't do it when there's no return value.  */
3494  repeat:
3495
3496   if (x == 0)
3497     return;
3498
3499   code = GET_CODE (x);
3500   switch (code)
3501     {
3502     case REG:
3503       if (reg_use_count == MAX_USES)
3504         return;
3505
3506       reg_use_table[reg_use_count].reg_rtx = x;
3507       reg_use_count++;
3508       return;
3509
3510     case MEM:
3511       x = XEXP (x, 0);
3512       goto repeat;
3513
3514     case PC:
3515     case CC0:
3516     case CONST:
3517     case CONST_INT:
3518     case CONST_DOUBLE:
3519     case SYMBOL_REF:
3520     case LABEL_REF:
3521     case CLOBBER:
3522     case ADDR_VEC:
3523     case ADDR_DIFF_VEC:
3524     case ASM_INPUT: /*FIXME*/
3525       return;
3526
3527     case SET:
3528       if (GET_CODE (SET_DEST (x)) == MEM)
3529         find_used_regs (SET_DEST (x));
3530       x = SET_SRC (x);
3531       goto repeat;
3532
3533     default:
3534       break;
3535     }
3536
3537   /* Recursively scan the operands of this expression.  */
3538
3539   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3540     {
3541       if (fmt[i] == 'e')
3542         {
3543           /* If we are about to do the last recursive call
3544              needed at this level, change it into iteration.
3545              This function is called enough to be worth it.  */
3546           if (i == 0)
3547             {
3548               x = XEXP (x, 0);
3549               goto repeat;
3550             }
3551
3552           find_used_regs (XEXP (x, i));
3553         }
3554       else if (fmt[i] == 'E')
3555         for (j = 0; j < XVECLEN (x, i); j++)
3556           find_used_regs (XVECEXP (x, i, j));
3557     }
3558 }
3559
3560 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3561    Returns non-zero is successful.  */
3562
3563 static int
3564 try_replace_reg (from, to, insn)
3565      rtx from, to, insn;
3566 {
3567   rtx note;
3568   rtx src;
3569   int success;
3570   rtx set;
3571
3572   note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3573
3574   if (!note)
3575     note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3576
3577   /* If this fails we could try to simplify the result of the
3578      replacement and attempt to recognize the simplified insn.
3579
3580      But we need a general simplify_rtx that doesn't have pass
3581      specific state variables.  I'm not aware of one at the moment.  */
3582
3583   success = validate_replace_src (from, to, insn);
3584   set = single_set (insn);
3585
3586   /* We've failed to do replacement. Try to add REG_EQUAL note to not loose
3587      information.  */
3588   if (!success && !note)
3589     {
3590       if (!set)
3591         return 0;
3592
3593       note = REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL,
3594                                                    copy_rtx (SET_SRC (set)),
3595                                                    REG_NOTES (insn));
3596     }
3597
3598   /* Always do the replacement in REQ_EQUAL and REG_EQUIV notes.  Also
3599      try to simplify them.  */
3600   if (note)
3601     {
3602       rtx simplified;
3603
3604       src = XEXP (note, 0);
3605       replace_rtx (src, from, to);
3606
3607       /* Try to simplify resulting note. */
3608       simplified = simplify_rtx (src);
3609       if (simplified)
3610         {
3611           src = simplified;
3612           XEXP (note, 0) = src;
3613         }
3614
3615       /* REG_EQUAL may get simplified into register.
3616          We don't allow that. Remove that note. This code ought
3617          not to hapen, because previous code ought to syntetize
3618          reg-reg move, but be on the safe side.  */
3619       else if (REG_P (src))
3620         remove_note (insn, note);
3621     }
3622   return success;
3623 }
3624
3625 /* Find a set of REGNOs that are available on entry to INSN's block.  Returns
3626    NULL no such set is found.  */
3627
3628 static struct expr *
3629 find_avail_set (regno, insn)
3630      int regno;
3631      rtx insn;
3632 {
3633   /* SET1 contains the last set found that can be returned to the caller for
3634      use in a substitution.  */
3635   struct expr *set1 = 0;
3636
3637   /* Loops are not possible here.  To get a loop we would need two sets
3638      available at the start of the block containing INSN.  ie we would
3639      need two sets like this available at the start of the block:
3640
3641        (set (reg X) (reg Y))
3642        (set (reg Y) (reg X))
3643
3644      This can not happen since the set of (reg Y) would have killed the
3645      set of (reg X) making it unavailable at the start of this block.  */
3646   while (1)
3647      {
3648       rtx src;
3649       struct expr *set = lookup_set (regno, NULL_RTX);
3650
3651       /* Find a set that is available at the start of the block
3652          which contains INSN.  */
3653       while (set)
3654         {
3655           if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3656             break;
3657           set = next_set (regno, set);
3658         }
3659
3660       /* If no available set was found we've reached the end of the
3661          (possibly empty) copy chain.  */
3662       if (set == 0)
3663         break;
3664
3665       if (GET_CODE (set->expr) != SET)
3666         abort ();
3667
3668       src = SET_SRC (set->expr);
3669
3670       /* We know the set is available.
3671          Now check that SRC is ANTLOC (i.e. none of the source operands
3672          have changed since the start of the block).
3673
3674          If the source operand changed, we may still use it for the next
3675          iteration of this loop, but we may not use it for substitutions.  */
3676
3677       if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3678         set1 = set;
3679
3680       /* If the source of the set is anything except a register, then
3681          we have reached the end of the copy chain.  */
3682       if (GET_CODE (src) != REG)
3683         break;
3684
3685       /* Follow the copy chain, ie start another iteration of the loop
3686          and see if we have an available copy into SRC.  */
3687       regno = REGNO (src);
3688      }
3689
3690   /* SET1 holds the last set that was available and anticipatable at
3691      INSN.  */
3692   return set1;
3693 }
3694
3695 /* Subroutine of cprop_insn that tries to propagate constants into
3696    JUMP_INSNS.  INSN must be a conditional jump; COPY is a copy of it
3697    that we can use for substitutions.
3698    REG_USED is the use we will try to replace, SRC is the constant we
3699    will try to substitute for it.
3700    Returns nonzero if a change was made.  */
3701
3702 static int
3703 cprop_jump (insn, copy, reg_used, src)
3704      rtx insn, copy;
3705      struct reg_use *reg_used;
3706      rtx src;
3707 {
3708   rtx set = PATTERN (copy);
3709   rtx temp;
3710
3711   /* Replace the register with the appropriate constant.  */
3712   replace_rtx (SET_SRC (set), reg_used->reg_rtx, src);
3713
3714   temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)),
3715                                      GET_MODE (SET_SRC (set)),
3716                                      GET_MODE (XEXP (SET_SRC (set), 0)),
3717                                      XEXP (SET_SRC (set), 0),
3718                                      XEXP (SET_SRC (set), 1),
3719                                      XEXP (SET_SRC (set), 2));
3720
3721   /* If no simplification can be made, then try the next
3722      register.  */
3723   if (temp == 0)
3724     return 0;
3725
3726   SET_SRC (set) = temp;
3727
3728   /* That may have changed the structure of TEMP, so
3729      force it to be rerecognized if it has not turned
3730      into a nop or unconditional jump.  */
3731
3732   INSN_CODE (copy) = -1;
3733   if ((SET_DEST (set) == pc_rtx
3734        && (SET_SRC (set) == pc_rtx
3735            || GET_CODE (SET_SRC (set)) == LABEL_REF))
3736       || recog (PATTERN (copy), copy, NULL) >= 0)
3737     {
3738       /* This has either become an unconditional jump
3739          or a nop-jump.  We'd like to delete nop jumps
3740          here, but doing so confuses gcse.  So we just
3741          make the replacement and let later passes
3742          sort things out.  */
3743       PATTERN (insn) = set;
3744       INSN_CODE (insn) = -1;
3745
3746       /* One less use of the label this insn used to jump to
3747          if we turned this into a NOP jump.  */
3748       if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0)
3749         --LABEL_NUSES (JUMP_LABEL (insn));
3750
3751       /* If this has turned into an unconditional jump,
3752          then put a barrier after it so that the unreachable
3753          code will be deleted.  */
3754       if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3755         emit_barrier_after (insn);
3756
3757       run_jump_opt_after_gcse = 1;
3758
3759       const_prop_count++;
3760       if (gcse_file != NULL)
3761         {
3762           fprintf (gcse_file,
3763                    "CONST-PROP: Replacing reg %d in insn %d with constant ",
3764                    REGNO (reg_used->reg_rtx), INSN_UID (insn));
3765           print_rtl (gcse_file, src);
3766           fprintf (gcse_file, "\n");
3767         }
3768
3769       return 1;
3770     }
3771   return 0;
3772 }
3773
3774 #ifdef HAVE_cc0
3775
3776 /* Subroutine of cprop_insn that tries to propagate constants into JUMP_INSNS
3777    for machines that have CC0.  INSN is a single set that stores into CC0;
3778    the insn following it is a conditional jump.  REG_USED is the use we will
3779    try to replace, SRC is the constant we will try to substitute for it.
3780    Returns nonzero if a change was made.  */
3781
3782 static int
3783 cprop_cc0_jump (insn, reg_used, src)
3784      rtx insn;
3785      struct reg_use *reg_used;
3786      rtx src;
3787 {
3788   rtx jump = NEXT_INSN (insn);
3789   rtx copy = copy_rtx (jump);
3790   rtx set = PATTERN (copy);
3791
3792   /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3793      substitute into it.  */
3794   replace_rtx (SET_SRC (set), cc0_rtx, copy_rtx (SET_SRC (PATTERN (insn))));
3795   if (! cprop_jump (jump, copy, reg_used, src))
3796     return 0;
3797
3798   /* If we succeeded, delete the cc0 setter.  */
3799   PUT_CODE (insn, NOTE);
3800   NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3801   NOTE_SOURCE_FILE (insn) = 0;
3802   return 1;
3803  }
3804 #endif
3805
3806 /* Perform constant and copy propagation on INSN.
3807    The result is non-zero if a change was made.  */
3808
3809 static int
3810 cprop_insn (insn, alter_jumps)
3811      rtx insn;
3812      int alter_jumps;
3813 {
3814   struct reg_use *reg_used;
3815   int changed = 0;
3816   rtx note;
3817
3818   /* Only propagate into SETs.  Note that a conditional jump is a
3819      SET with pc_rtx as the destination.  */
3820   if ((GET_CODE (insn) != INSN
3821        && GET_CODE (insn) != JUMP_INSN)
3822       || GET_CODE (PATTERN (insn)) != SET)
3823     return 0;
3824
3825   reg_use_count = 0;
3826   find_used_regs (PATTERN (insn));
3827
3828   note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3829   if (!note)
3830     note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3831
3832   /* We may win even when propagating constants into notes. */
3833   if (note)
3834     find_used_regs (XEXP (note, 0));
3835
3836   for (reg_used = &reg_use_table[0]; reg_use_count > 0;
3837        reg_used++, reg_use_count--)
3838     {
3839       int regno = REGNO (reg_used->reg_rtx);
3840       rtx pat, src;
3841       struct expr *set;
3842
3843       /* Ignore registers created by GCSE.
3844          We do this because ... */
3845       if (regno >= max_gcse_regno)
3846         continue;
3847
3848       /* If the register has already been set in this block, there's
3849          nothing we can do.  */
3850       if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3851         continue;
3852
3853       /* Find an assignment that sets reg_used and is available
3854          at the start of the block.  */
3855       set = find_avail_set (regno, insn);
3856       if (! set)
3857         continue;
3858
3859       pat = set->expr;
3860       /* ??? We might be able to handle PARALLELs.  Later.  */
3861       if (GET_CODE (pat) != SET)
3862         abort ();
3863
3864       src = SET_SRC (pat);
3865
3866       /* Constant propagation.  */
3867       if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
3868           || GET_CODE (src) == SYMBOL_REF)
3869         {
3870           /* Handle normal insns first.  */
3871           if (GET_CODE (insn) == INSN
3872               && try_replace_reg (reg_used->reg_rtx, src, insn))
3873             {
3874               changed = 1;
3875               const_prop_count++;
3876               if (gcse_file != NULL)
3877                 {
3878                   fprintf (gcse_file, "CONST-PROP: Replacing reg %d in ",
3879                            regno);
3880                   fprintf (gcse_file, "insn %d with constant ",
3881                            INSN_UID (insn));
3882                   print_rtl (gcse_file, src);
3883                   fprintf (gcse_file, "\n");
3884                 }
3885
3886               /* The original insn setting reg_used may or may not now be
3887                  deletable.  We leave the deletion to flow.  */
3888             }
3889
3890           /* Try to propagate a CONST_INT into a conditional jump.
3891              We're pretty specific about what we will handle in this
3892              code, we can extend this as necessary over time.
3893
3894              Right now the insn in question must look like
3895              (set (pc) (if_then_else ...))  */
3896           else if (alter_jumps
3897                    && GET_CODE (insn) == JUMP_INSN
3898                    && condjump_p (insn)
3899                    && ! simplejump_p (insn))
3900             changed |= cprop_jump (insn, copy_rtx (insn), reg_used, src);
3901 #ifdef HAVE_cc0
3902           /* Similar code for machines that use a pair of CC0 setter and
3903              conditional jump insn.  */
3904           else if (alter_jumps
3905                    && GET_CODE (PATTERN (insn)) == SET
3906                    && SET_DEST (PATTERN (insn)) == cc0_rtx
3907                    && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
3908                    && condjump_p (NEXT_INSN (insn))
3909                    && ! simplejump_p (NEXT_INSN (insn)))
3910             changed |= cprop_cc0_jump (insn, reg_used, src);
3911 #endif
3912         }
3913       else if (GET_CODE (src) == REG
3914                && REGNO (src) >= FIRST_PSEUDO_REGISTER
3915                && REGNO (src) != regno)
3916         {
3917           if (try_replace_reg (reg_used->reg_rtx, src, insn))
3918             {
3919               changed = 1;
3920               copy_prop_count++;
3921               if (gcse_file != NULL)
3922                 {
3923                   fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d",
3924                            regno, INSN_UID (insn));
3925                   fprintf (gcse_file, " with reg %d\n", REGNO (src));
3926                 }
3927
3928               /* The original insn setting reg_used may or may not now be
3929                  deletable.  We leave the deletion to flow.  */
3930               /* FIXME: If it turns out that the insn isn't deletable,
3931                  then we may have unnecessarily extended register lifetimes
3932                  and made things worse.  */
3933             }
3934         }
3935     }
3936
3937   return changed;
3938 }
3939
3940 /* Forward propagate copies.  This includes copies and constants.  Return
3941    non-zero if a change was made.  */
3942
3943 static int
3944 cprop (alter_jumps)
3945      int alter_jumps;
3946 {
3947   int bb, changed;
3948   rtx insn;
3949
3950   /* Note we start at block 1.  */
3951
3952   changed = 0;
3953   for (bb = 1; bb < n_basic_blocks; bb++)
3954     {
3955       /* Reset tables used to keep track of what's still valid [since the
3956          start of the block].  */
3957       reset_opr_set_tables ();
3958
3959       for (insn = BLOCK_HEAD (bb);
3960            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3961            insn = NEXT_INSN (insn))
3962         {
3963           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3964             {
3965               changed |= cprop_insn (insn, alter_jumps);
3966
3967               /* Keep track of everything modified by this insn.  */
3968               /* ??? Need to be careful w.r.t. mods done to INSN.  Don't
3969                  call mark_oprs_set if we turned the insn into a NOTE.  */
3970               if (GET_CODE (insn) != NOTE)
3971                 mark_oprs_set (insn);
3972             }
3973         }
3974     }
3975
3976   if (gcse_file != NULL)
3977     fprintf (gcse_file, "\n");
3978
3979   return changed;
3980 }
3981
3982 /* Perform one copy/constant propagation pass.
3983    F is the first insn in the function.
3984    PASS is the pass count.  */
3985
3986 static int
3987 one_cprop_pass (pass, alter_jumps)
3988      int pass;
3989      int alter_jumps;
3990 {
3991   int changed = 0;
3992
3993   const_prop_count = 0;
3994   copy_prop_count = 0;
3995
3996   alloc_set_hash_table (max_cuid);
3997   compute_set_hash_table ();
3998   if (gcse_file)
3999     dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
4000                      n_sets);
4001   if (n_sets > 0)
4002     {
4003       alloc_cprop_mem (n_basic_blocks, n_sets);
4004       compute_cprop_data ();
4005       changed = cprop (alter_jumps);
4006       free_cprop_mem ();
4007     }
4008
4009   free_set_hash_table ();
4010
4011   if (gcse_file)
4012     {
4013       fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, ",
4014                current_function_name, pass, bytes_used);
4015       fprintf (gcse_file, "%d const props, %d copy props\n\n",
4016                const_prop_count, copy_prop_count);
4017     }
4018
4019   return changed;
4020 }
4021 \f
4022 /* Compute PRE+LCM working variables.  */
4023
4024 /* Local properties of expressions.  */
4025 /* Nonzero for expressions that are transparent in the block.  */
4026 static sbitmap *transp;
4027
4028 /* Nonzero for expressions that are transparent at the end of the block.
4029    This is only zero for expressions killed by abnormal critical edge
4030    created by a calls.  */
4031 static sbitmap *transpout;
4032
4033 /* Nonzero for expressions that are computed (available) in the block.  */
4034 static sbitmap *comp;
4035
4036 /* Nonzero for expressions that are locally anticipatable in the block.  */
4037 static sbitmap *antloc;
4038
4039 /* Nonzero for expressions where this block is an optimal computation
4040    point.  */
4041 static sbitmap *pre_optimal;
4042
4043 /* Nonzero for expressions which are redundant in a particular block.  */
4044 static sbitmap *pre_redundant;
4045
4046 /* Nonzero for expressions which should be inserted on a specific edge.  */
4047 static sbitmap *pre_insert_map;
4048
4049 /* Nonzero for expressions which should be deleted in a specific block.  */
4050 static sbitmap *pre_delete_map;
4051
4052 /* Contains the edge_list returned by pre_edge_lcm.  */
4053 static struct edge_list *edge_list;
4054
4055 static sbitmap *temp_bitmap;
4056
4057 /* Redundant insns.  */
4058 static sbitmap pre_redundant_insns;
4059
4060 /* Allocate vars used for PRE analysis.  */
4061
4062 static void
4063 alloc_pre_mem (n_blocks, n_exprs)
4064      int n_blocks, n_exprs;
4065 {
4066   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4067   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4068   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4069   temp_bitmap = sbitmap_vector_alloc (n_blocks, n_exprs);
4070
4071   pre_optimal = NULL;
4072   pre_redundant = NULL;
4073   pre_insert_map = NULL;
4074   pre_delete_map = NULL;
4075   ae_in = NULL;
4076   ae_out = NULL;
4077   u_bitmap = NULL;
4078   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
4079   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
4080
4081   /* pre_insert and pre_delete are allocated later.  */
4082 }
4083
4084 /* Free vars used for PRE analysis.  */
4085
4086 static void
4087 free_pre_mem ()
4088 {
4089   free (transp);
4090   free (comp);
4091   free (antloc);
4092   free (temp_bitmap);
4093
4094   if (pre_optimal)
4095     free (pre_optimal);
4096   if (pre_redundant)
4097     free (pre_redundant);
4098   if (pre_insert_map)
4099     free (pre_insert_map);
4100   if (pre_delete_map)
4101     free (pre_delete_map);
4102   if (transpout)
4103     free (transpout);
4104
4105   if (ae_in)
4106     free (ae_in);
4107   if (ae_out)
4108     free (ae_out);
4109   if (ae_kill)
4110     free (ae_kill);
4111   if (u_bitmap)
4112     free (u_bitmap);
4113
4114   transp = comp = antloc = NULL;
4115   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
4116   transpout = ae_in = ae_out = ae_kill = NULL;
4117   u_bitmap = NULL;
4118
4119 }
4120
4121 /* Top level routine to do the dataflow analysis needed by PRE.  */
4122
4123 static void
4124 compute_pre_data ()
4125 {
4126   compute_local_properties (transp, comp, antloc, 0);
4127   compute_transpout ();
4128   sbitmap_vector_zero (ae_kill, n_basic_blocks);
4129   compute_ae_kill (comp, ae_kill);
4130   edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
4131                             ae_kill, &pre_insert_map, &pre_delete_map);
4132 }
4133 \f
4134 /* PRE utilities */
4135
4136 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4137    block BB.
4138
4139    VISITED is a pointer to a working buffer for tracking which BB's have
4140    been visited.  It is NULL for the top-level call.
4141
4142    We treat reaching expressions that go through blocks containing the same
4143    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
4144    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4145    2 as not reaching.  The intent is to improve the probability of finding
4146    only one reaching expression and to reduce register lifetimes by picking
4147    the closest such expression.  */
4148
4149 static int
4150 pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited)
4151      int occr_bb;
4152      struct expr *expr;
4153      int bb;
4154      char *visited;
4155 {
4156   edge pred;
4157
4158   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
4159     {
4160       int pred_bb = pred->src->index;
4161
4162       if (pred->src == ENTRY_BLOCK_PTR
4163           /* Has predecessor has already been visited?  */
4164           || visited[pred_bb])
4165         ;/* Nothing to do.  */
4166
4167       /* Does this predecessor generate this expression?  */
4168       else if (TEST_BIT (comp[pred_bb], expr->bitmap_index))
4169         {
4170           /* Is this the occurrence we're looking for?
4171              Note that there's only one generating occurrence per block
4172              so we just need to check the block number.  */
4173           if (occr_bb == pred_bb)
4174             return 1;
4175
4176           visited[pred_bb] = 1;
4177         }
4178       /* Ignore this predecessor if it kills the expression.  */
4179       else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
4180         visited[pred_bb] = 1;
4181
4182       /* Neither gen nor kill.  */
4183       else
4184         {
4185           visited[pred_bb] = 1;
4186           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
4187             return 1;
4188         }
4189     }
4190
4191   /* All paths have been checked.  */
4192   return 0;
4193 }
4194
4195 /* The wrapper for pre_expr_reaches_here_work that ensures that any
4196    memory allocated for that function is returned. */
4197
4198 static int
4199 pre_expr_reaches_here_p (occr_bb, expr, bb)
4200      int occr_bb;
4201      struct expr *expr;
4202      int bb;
4203 {
4204   int rval;
4205   char *visited = (char *) xcalloc (n_basic_blocks, 1);
4206
4207   rval = pre_expr_reaches_here_p_work(occr_bb, expr, bb, visited);
4208
4209   free (visited);
4210   return rval;
4211 }
4212 \f
4213
4214 /* Given an expr, generate RTL which we can insert at the end of a BB,
4215    or on an edge.  Set the block number of any insns generated to
4216    the value of BB.  */
4217
4218 static rtx
4219 process_insert_insn (expr)
4220      struct expr *expr;
4221 {
4222   rtx reg = expr->reaching_reg;
4223   rtx pat, copied_expr;
4224   rtx first_new_insn;
4225
4226   start_sequence ();
4227   copied_expr = copy_rtx (expr->expr);
4228   emit_move_insn (reg, copied_expr);
4229   first_new_insn = get_insns ();
4230   pat = gen_sequence ();
4231   end_sequence ();
4232
4233   return pat;
4234 }
4235
4236 /* Add EXPR to the end of basic block BB.
4237
4238    This is used by both the PRE and code hoisting.
4239
4240    For PRE, we want to verify that the expr is either transparent
4241    or locally anticipatable in the target block.  This check makes
4242    no sense for code hoisting.  */
4243
4244 static void
4245 insert_insn_end_bb (expr, bb, pre)
4246      struct expr *expr;
4247      int bb;
4248      int pre;
4249 {
4250   rtx insn = BLOCK_END (bb);
4251   rtx new_insn;
4252   rtx reg = expr->reaching_reg;
4253   int regno = REGNO (reg);
4254   rtx pat;
4255   int i;
4256
4257   pat = process_insert_insn (expr);
4258
4259   /* If the last insn is a jump, insert EXPR in front [taking care to
4260      handle cc0, etc. properly].  */
4261
4262   if (GET_CODE (insn) == JUMP_INSN)
4263     {
4264 #ifdef HAVE_cc0
4265       rtx note;
4266 #endif
4267
4268       /* If this is a jump table, then we can't insert stuff here.  Since
4269          we know the previous real insn must be the tablejump, we insert
4270          the new instruction just before the tablejump.  */
4271       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4272           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4273         insn = prev_real_insn (insn);
4274
4275 #ifdef HAVE_cc0
4276       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4277          if cc0 isn't set.  */
4278       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4279       if (note)
4280         insn = XEXP (note, 0);
4281       else
4282         {
4283           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4284           if (maybe_cc0_setter
4285               && GET_RTX_CLASS (GET_CODE (maybe_cc0_setter)) == 'i'
4286               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4287             insn = maybe_cc0_setter;
4288         }
4289 #endif
4290       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
4291       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4292     }
4293
4294   /* Likewise if the last insn is a call, as will happen in the presence
4295      of exception handling.  */
4296   else if (GET_CODE (insn) == CALL_INSN)
4297     {
4298       HARD_REG_SET parm_regs;
4299       int nparm_regs;
4300       rtx p;
4301
4302       /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4303          we search backward and place the instructions before the first
4304          parameter is loaded.  Do this for everyone for consistency and a
4305          presumtion that we'll get better code elsewhere as well.
4306
4307          It should always be the case that we can put these instructions
4308          anywhere in the basic block with performing PRE optimizations.
4309          Check this.  */
4310
4311       if (pre
4312           && !TEST_BIT (antloc[bb], expr->bitmap_index)
4313           && !TEST_BIT (transp[bb], expr->bitmap_index))
4314         abort ();
4315
4316       /* Since different machines initialize their parameter registers
4317          in different orders, assume nothing.  Collect the set of all
4318          parameter registers.  */
4319       CLEAR_HARD_REG_SET (parm_regs);
4320       nparm_regs = 0;
4321       for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4322         if (GET_CODE (XEXP (p, 0)) == USE
4323             && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4324           {
4325             if (REGNO (XEXP (XEXP (p, 0), 0)) >= FIRST_PSEUDO_REGISTER)
4326               abort ();
4327
4328             SET_HARD_REG_BIT (parm_regs, REGNO (XEXP (XEXP (p, 0), 0)));
4329             nparm_regs++;
4330           }
4331
4332       /* Search backward for the first set of a register in this set.  */
4333       while (nparm_regs && BLOCK_HEAD (bb) != insn)
4334         {
4335           insn = PREV_INSN (insn);
4336           p = single_set (insn);
4337           if (p && GET_CODE (SET_DEST (p)) == REG
4338               && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4339               && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4340             {
4341               CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4342               nparm_regs--;
4343             }
4344         }
4345
4346       /* If we found all the parameter loads, then we want to insert
4347          before the first parameter load.
4348
4349          If we did not find all the parameter loads, then we might have
4350          stopped on the head of the block, which could be a CODE_LABEL.
4351          If we inserted before the CODE_LABEL, then we would be putting
4352          the insn in the wrong basic block.  In that case, put the insn
4353          after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
4354       if (GET_CODE (insn) == CODE_LABEL)
4355         insn = NEXT_INSN (insn);
4356       else if (GET_CODE (insn) == NOTE
4357                && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4358         insn = NEXT_INSN (insn);
4359
4360       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4361     }
4362   else
4363     {
4364       new_insn = emit_insn_after (pat, insn);
4365       BLOCK_END (bb) = new_insn;
4366     }
4367
4368   /* Keep block number table up to date.
4369      Note, PAT could be a multiple insn sequence, we have to make
4370      sure that each insn in the sequence is handled.  */
4371   if (GET_CODE (pat) == SEQUENCE)
4372     {
4373       for (i = 0; i < XVECLEN (pat, 0); i++)
4374         {
4375           rtx insn = XVECEXP (pat, 0, i);
4376
4377           set_block_num (insn, bb);
4378           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4379             add_label_notes (PATTERN (insn), new_insn);
4380
4381           note_stores (PATTERN (insn), record_set_info, insn);
4382         }
4383     }
4384   else
4385     {
4386       add_label_notes (SET_SRC (pat), new_insn);
4387       set_block_num (new_insn, bb);
4388
4389       /* Keep register set table up to date.  */
4390       record_one_set (regno, new_insn);
4391     }
4392
4393   gcse_create_count++;
4394
4395   if (gcse_file)
4396     {
4397       fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, ",
4398                bb, INSN_UID (new_insn));
4399       fprintf (gcse_file, "copying expression %d to reg %d\n",
4400                expr->bitmap_index, regno);
4401     }
4402 }
4403
4404 /* Insert partially redundant expressions on edges in the CFG to make
4405    the expressions fully redundant.  */
4406
4407 static int
4408 pre_edge_insert (edge_list, index_map)
4409      struct edge_list *edge_list;
4410      struct expr **index_map;
4411 {
4412   int e, i, j, num_edges, set_size, did_insert = 0;
4413   sbitmap *inserted;
4414
4415   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4416      if it reaches any of the deleted expressions.  */
4417
4418   set_size = pre_insert_map[0]->size;
4419   num_edges = NUM_EDGES (edge_list);
4420   inserted = sbitmap_vector_alloc (num_edges, n_exprs);
4421   sbitmap_vector_zero (inserted, num_edges);
4422
4423   for (e = 0; e < num_edges; e++)
4424     {
4425       int indx;
4426       basic_block pred = INDEX_EDGE_PRED_BB (edge_list, e);
4427       int bb = pred->index;
4428
4429       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4430         {
4431           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
4432
4433           for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
4434             if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4435               {
4436                 struct expr *expr = index_map[j];
4437                 struct occr *occr;
4438
4439                 /* Now look at each deleted occurence of this expression.  */
4440                 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4441                   {
4442                     if (! occr->deleted_p)
4443                       continue;
4444
4445                     /* Insert this expression on this edge if if it would
4446                        reach the deleted occurence in BB.  */
4447                     if (!TEST_BIT (inserted[e], j))
4448                       {
4449                         rtx insn;
4450                         edge eg = INDEX_EDGE (edge_list, e);
4451
4452                         /* We can't insert anything on an abnormal and
4453                            critical edge, so we insert the insn at the end of
4454                            the previous block. There are several alternatives
4455                            detailed in Morgans book P277 (sec 10.5) for
4456                            handling this situation.  This one is easiest for
4457                            now.  */
4458
4459                         if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
4460                           insert_insn_end_bb (index_map[j], bb, 0);
4461                         else
4462                           {
4463                             insn = process_insert_insn (index_map[j]);
4464                             insert_insn_on_edge (insn, eg);
4465                           }
4466
4467                         if (gcse_file)
4468                           {
4469                             fprintf (gcse_file, "PRE/HOIST: edge (%d,%d), ",
4470                                      bb,
4471                                      INDEX_EDGE_SUCC_BB (edge_list, e)->index);
4472                             fprintf (gcse_file, "copy expression %d\n",
4473                                      expr->bitmap_index);
4474                           }
4475
4476                         SET_BIT (inserted[e], j);
4477                         did_insert = 1;
4478                         gcse_create_count++;
4479                       }
4480                   }
4481               }
4482         }
4483     }
4484
4485   free (inserted);
4486   return did_insert;
4487 }
4488
4489 /* Copy the result of INSN to REG.  INDX is the expression number.  */
4490
4491 static void
4492 pre_insert_copy_insn (expr, insn)
4493      struct expr *expr;
4494      rtx insn;
4495 {
4496   rtx reg = expr->reaching_reg;
4497   int regno = REGNO (reg);
4498   int indx = expr->bitmap_index;
4499   rtx set = single_set (insn);
4500   rtx new_insn;
4501   int bb = BLOCK_NUM (insn);
4502
4503   if (!set)
4504     abort ();
4505
4506   new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4507                               insn);
4508
4509   /* Keep block number table up to date.  */
4510   set_block_num (new_insn, bb);
4511
4512   /* Keep register set table up to date.  */
4513   record_one_set (regno, new_insn);
4514   if (insn == BLOCK_END (bb))
4515     BLOCK_END (bb) = new_insn;
4516
4517   gcse_create_count++;
4518
4519   if (gcse_file)
4520     fprintf (gcse_file,
4521              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4522               BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4523               INSN_UID (insn), regno);
4524 }
4525
4526 /* Copy available expressions that reach the redundant expression
4527    to `reaching_reg'.  */
4528
4529 static void
4530 pre_insert_copies ()
4531 {
4532   int i;
4533   struct expr *expr;
4534   struct occr *occr;
4535   struct occr *avail;
4536
4537   /* For each available expression in the table, copy the result to
4538      `reaching_reg' if the expression reaches a deleted one.
4539
4540      ??? The current algorithm is rather brute force.
4541      Need to do some profiling.  */
4542
4543   for (i = 0; i < expr_hash_table_size; i++)
4544     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4545       {
4546         /* If the basic block isn't reachable, PPOUT will be TRUE.  However,
4547            we don't want to insert a copy here because the expression may not
4548            really be redundant.  So only insert an insn if the expression was
4549            deleted.  This test also avoids further processing if the
4550            expression wasn't deleted anywhere.  */
4551         if (expr->reaching_reg == NULL)
4552           continue;
4553
4554         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4555           {
4556             if (! occr->deleted_p)
4557               continue;
4558
4559             for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4560               {
4561                 rtx insn = avail->insn;
4562
4563                 /* No need to handle this one if handled already.  */
4564                 if (avail->copied_p)
4565                   continue;
4566
4567                 /* Don't handle this one if it's a redundant one.  */
4568                 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4569                   continue;
4570
4571                 /* Or if the expression doesn't reach the deleted one.  */
4572                 if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
4573                                                BLOCK_NUM (occr->insn)))
4574                   continue;
4575
4576                 /* Copy the result of avail to reaching_reg.  */
4577                 pre_insert_copy_insn (expr, insn);
4578                 avail->copied_p = 1;
4579               }
4580           }
4581       }
4582 }
4583
4584 /* Delete redundant computations.
4585    Deletion is done by changing the insn to copy the `reaching_reg' of
4586    the expression into the result of the SET.  It is left to later passes
4587    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4588
4589    Returns non-zero if a change is made.  */
4590
4591 static int
4592 pre_delete ()
4593 {
4594   int i, bb, changed;
4595   struct expr *expr;
4596   struct occr *occr;
4597
4598   /* Compute the expressions which are redundant and need to be replaced by
4599      copies from the reaching reg to the target reg.  */
4600   for (bb = 0; bb < n_basic_blocks; bb++)
4601     sbitmap_copy (temp_bitmap[bb], pre_delete_map[bb]);
4602
4603   changed = 0;
4604   for (i = 0; i < expr_hash_table_size; i++)
4605     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4606       {
4607         int indx = expr->bitmap_index;
4608
4609         /* We only need to search antic_occr since we require
4610            ANTLOC != 0.  */
4611
4612         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4613           {
4614             rtx insn = occr->insn;
4615             rtx set;
4616             int bb = BLOCK_NUM (insn);
4617
4618             if (TEST_BIT (temp_bitmap[bb], indx))
4619               {
4620                 set = single_set (insn);
4621                 if (! set)
4622                   abort ();
4623
4624                 /* Create a pseudo-reg to store the result of reaching
4625                    expressions into.  Get the mode for the new pseudo from
4626                    the mode of the original destination pseudo.  */
4627                 if (expr->reaching_reg == NULL)
4628                   expr->reaching_reg
4629                     = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4630
4631                 /* In theory this should never fail since we're creating
4632                    a reg->reg copy.
4633
4634                    However, on the x86 some of the movXX patterns actually
4635                    contain clobbers of scratch regs.  This may cause the
4636                    insn created by validate_change to not match any pattern
4637                    and thus cause validate_change to fail.   */
4638                 if (validate_change (insn, &SET_SRC (set),
4639                                      expr->reaching_reg, 0))
4640                   {
4641                     occr->deleted_p = 1;
4642                     SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4643                     changed = 1;
4644                     gcse_subst_count++;
4645                   }
4646
4647                 if (gcse_file)
4648                   {
4649                     fprintf (gcse_file,
4650                              "PRE: redundant insn %d (expression %d) in ",
4651                                INSN_UID (insn), indx);
4652                     fprintf (gcse_file, "bb %d, reaching reg is %d\n",
4653                              bb, REGNO (expr->reaching_reg));
4654                   }
4655               }
4656           }
4657       }
4658
4659   return changed;
4660 }
4661
4662 /* Perform GCSE optimizations using PRE.
4663    This is called by one_pre_gcse_pass after all the dataflow analysis
4664    has been done.
4665
4666    This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
4667    lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
4668    Compiler Design and Implementation.
4669
4670    ??? A new pseudo reg is created to hold the reaching expression.  The nice
4671    thing about the classical approach is that it would try to use an existing
4672    reg.  If the register can't be adequately optimized [i.e. we introduce
4673    reload problems], one could add a pass here to propagate the new register
4674    through the block.
4675
4676    ??? We don't handle single sets in PARALLELs because we're [currently] not
4677    able to copy the rest of the parallel when we insert copies to create full
4678    redundancies from partial redundancies.  However, there's no reason why we
4679    can't handle PARALLELs in the cases where there are no partial
4680    redundancies.  */
4681
4682 static int
4683 pre_gcse ()
4684 {
4685   int i, did_insert;
4686   int changed;
4687   struct expr **index_map;
4688   struct expr *expr;
4689
4690   /* Compute a mapping from expression number (`bitmap_index') to
4691      hash table entry.  */
4692
4693   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
4694   for (i = 0; i < expr_hash_table_size; i++)
4695     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4696       index_map[expr->bitmap_index] = expr;
4697
4698   /* Reset bitmap used to track which insns are redundant.  */
4699   pre_redundant_insns = sbitmap_alloc (max_cuid);
4700   sbitmap_zero (pre_redundant_insns);
4701
4702   /* Delete the redundant insns first so that
4703      - we know what register to use for the new insns and for the other
4704        ones with reaching expressions
4705      - we know which insns are redundant when we go to create copies  */
4706
4707   changed = pre_delete ();
4708
4709   did_insert = pre_edge_insert (edge_list, index_map);
4710
4711   /* In other places with reaching expressions, copy the expression to the
4712      specially allocated pseudo-reg that reaches the redundant expr.  */
4713   pre_insert_copies ();
4714   if (did_insert)
4715     {
4716       commit_edge_insertions ();
4717       changed = 1;
4718     }
4719
4720   free (index_map);
4721   free (pre_redundant_insns);
4722   return changed;
4723 }
4724
4725 /* Top level routine to perform one PRE GCSE pass.
4726
4727    Return non-zero if a change was made.  */
4728
4729 static int
4730 one_pre_gcse_pass (pass)
4731      int pass;
4732 {
4733   int changed = 0;
4734
4735   gcse_subst_count = 0;
4736   gcse_create_count = 0;
4737
4738   alloc_expr_hash_table (max_cuid);
4739   add_noreturn_fake_exit_edges ();
4740   compute_expr_hash_table ();
4741   if (gcse_file)
4742     dump_hash_table (gcse_file, "Expression", expr_hash_table,
4743                      expr_hash_table_size, n_exprs);
4744
4745   if (n_exprs > 0)
4746     {
4747       alloc_pre_mem (n_basic_blocks, n_exprs);
4748       compute_pre_data ();
4749       changed |= pre_gcse ();
4750       free_edge_list (edge_list);
4751       free_pre_mem ();
4752     }
4753
4754   remove_fake_edges ();
4755   free_expr_hash_table ();
4756
4757   if (gcse_file)
4758     {
4759       fprintf (gcse_file, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
4760                current_function_name, pass, bytes_used);
4761       fprintf (gcse_file, "%d substs, %d insns created\n",
4762                gcse_subst_count, gcse_create_count);
4763     }
4764
4765   return changed;
4766 }
4767 \f
4768 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4769    We have to add REG_LABEL notes, because the following loop optimization
4770    pass requires them.  */
4771
4772 /* ??? This is very similar to the loop.c add_label_notes function.  We
4773    could probably share code here.  */
4774
4775 /* ??? If there was a jump optimization pass after gcse and before loop,
4776    then we would not need to do this here, because jump would add the
4777    necessary REG_LABEL notes.  */
4778
4779 static void
4780 add_label_notes (x, insn)
4781      rtx x;
4782      rtx insn;
4783 {
4784   enum rtx_code code = GET_CODE (x);
4785   int i, j;
4786   const char *fmt;
4787
4788   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4789     {
4790       /* This code used to ignore labels that referred to dispatch tables to
4791          avoid flow generating (slighly) worse code.
4792
4793          We no longer ignore such label references (see LABEL_REF handling in
4794          mark_jump_label for additional information).  */
4795
4796       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4797                                             REG_NOTES (insn));
4798       return;
4799     }
4800
4801   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
4802     {
4803       if (fmt[i] == 'e')
4804         add_label_notes (XEXP (x, i), insn);
4805       else if (fmt[i] == 'E')
4806         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4807           add_label_notes (XVECEXP (x, i, j), insn);
4808     }
4809 }
4810
4811 /* Compute transparent outgoing information for each block.
4812
4813    An expression is transparent to an edge unless it is killed by
4814    the edge itself.  This can only happen with abnormal control flow,
4815    when the edge is traversed through a call.  This happens with
4816    non-local labels and exceptions.
4817
4818    This would not be necessary if we split the edge.  While this is
4819    normally impossible for abnormal critical edges, with some effort
4820    it should be possible with exception handling, since we still have
4821    control over which handler should be invoked.  But due to increased
4822    EH table sizes, this may not be worthwhile.  */
4823
4824 static void
4825 compute_transpout ()
4826 {
4827   int bb;
4828   int i;
4829   struct expr *expr;
4830
4831   sbitmap_vector_ones (transpout, n_basic_blocks);
4832
4833   for (bb = 0; bb < n_basic_blocks; ++bb)
4834     {
4835       /* Note that flow inserted a nop a the end of basic blocks that
4836          end in call instructions for reasons other than abnormal
4837          control flow.  */
4838       if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4839         continue;
4840
4841       for (i = 0; i < expr_hash_table_size; i++)
4842         for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4843           if (GET_CODE (expr->expr) == MEM)
4844             {
4845               if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
4846                   && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
4847                 continue;
4848
4849               /* ??? Optimally, we would use interprocedural alias
4850                  analysis to determine if this mem is actually killed
4851                  by this call.  */
4852               RESET_BIT (transpout[bb], expr->bitmap_index);
4853             }
4854     }
4855 }
4856
4857 /* Removal of useless null pointer checks */
4858
4859 /* Called via note_stores.  X is set by SETTER.  If X is a register we must
4860    invalidate nonnull_local and set nonnull_killed.  DATA is really a
4861    `null_pointer_info *'.
4862
4863    We ignore hard registers.  */
4864
4865 static void
4866 invalidate_nonnull_info (x, setter, data)
4867      rtx x;
4868      rtx setter ATTRIBUTE_UNUSED;
4869      void *data;
4870 {
4871   int offset, regno;
4872   struct null_pointer_info* npi = (struct null_pointer_info *) data;
4873
4874   offset = 0;
4875
4876   while (GET_CODE (x) == SUBREG)
4877     x = SUBREG_REG (x);
4878
4879   /* Ignore anything that is not a register or is a hard register.  */
4880   if (GET_CODE (x) != REG
4881       || REGNO (x) < npi->min_reg
4882       || REGNO (x) >= npi->max_reg)
4883     return;
4884
4885   regno = REGNO (x) - npi->min_reg;
4886
4887   RESET_BIT (npi->nonnull_local[npi->current_block], regno);
4888   SET_BIT (npi->nonnull_killed[npi->current_block], regno);
4889 }
4890
4891 /* Do null-pointer check elimination for the registers indicated in
4892    NPI.  NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
4893    they are not our responsibility to free.  */
4894
4895 static void
4896 delete_null_pointer_checks_1 (block_reg, nonnull_avin, nonnull_avout, npi)
4897      int *block_reg;
4898      sbitmap *nonnull_avin;
4899      sbitmap *nonnull_avout;
4900      struct null_pointer_info *npi;
4901 {
4902   int bb;
4903   int current_block;
4904   sbitmap *nonnull_local = npi->nonnull_local;
4905   sbitmap *nonnull_killed = npi->nonnull_killed;
4906
4907   /* Compute local properties, nonnull and killed.  A register will have
4908      the nonnull property if at the end of the current block its value is
4909      known to be nonnull.  The killed property indicates that somewhere in
4910      the block any information we had about the register is killed.
4911
4912      Note that a register can have both properties in a single block.  That
4913      indicates that it's killed, then later in the block a new value is
4914      computed.  */
4915   sbitmap_vector_zero (nonnull_local, n_basic_blocks);
4916   sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
4917
4918   for (current_block = 0; current_block < n_basic_blocks; current_block++)
4919     {
4920       rtx insn, stop_insn;
4921
4922       /* Set the current block for invalidate_nonnull_info.  */
4923       npi->current_block = current_block;
4924
4925       /* Scan each insn in the basic block looking for memory references and
4926          register sets.  */
4927       stop_insn = NEXT_INSN (BLOCK_END (current_block));
4928       for (insn = BLOCK_HEAD (current_block);
4929            insn != stop_insn;
4930            insn = NEXT_INSN (insn))
4931         {
4932           rtx set;
4933           rtx reg;
4934
4935           /* Ignore anything that is not a normal insn.  */
4936           if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
4937             continue;
4938
4939           /* Basically ignore anything that is not a simple SET.  We do have
4940              to make sure to invalidate nonnull_local and set nonnull_killed
4941              for such insns though.  */
4942           set = single_set (insn);
4943           if (!set)
4944             {
4945               note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
4946               continue;
4947             }
4948
4949           /* See if we've got a useable memory load.  We handle it first
4950              in case it uses its address register as a dest (which kills
4951              the nonnull property).  */
4952           if (GET_CODE (SET_SRC (set)) == MEM
4953               && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
4954               && REGNO (reg) >= npi->min_reg
4955               && REGNO (reg) < npi->max_reg)
4956             SET_BIT (nonnull_local[current_block],
4957                      REGNO (reg) - npi->min_reg);
4958
4959           /* Now invalidate stuff clobbered by this insn.  */
4960           note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
4961
4962           /* And handle stores, we do these last since any sets in INSN can
4963              not kill the nonnull property if it is derived from a MEM
4964              appearing in a SET_DEST.  */
4965           if (GET_CODE (SET_DEST (set)) == MEM
4966               && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
4967               && REGNO (reg) >= npi->min_reg
4968               && REGNO (reg) < npi->max_reg)
4969             SET_BIT (nonnull_local[current_block],
4970                      REGNO (reg) - npi->min_reg);
4971         }
4972     }
4973
4974   /* Now compute global properties based on the local properties.   This
4975      is a classic global availablity algorithm.  */
4976   compute_available (nonnull_local, nonnull_killed,
4977                      nonnull_avout, nonnull_avin);
4978
4979   /* Now look at each bb and see if it ends with a compare of a value
4980      against zero.  */
4981   for (bb = 0; bb < n_basic_blocks; bb++)
4982     {
4983       rtx last_insn = BLOCK_END (bb);
4984       rtx condition, earliest;
4985       int compare_and_branch;
4986
4987       /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
4988          since BLOCK_REG[BB] is zero if this block did not end with a
4989          comparison against zero, this condition works.  */
4990       if (block_reg[bb] < npi->min_reg
4991           || block_reg[bb] >= npi->max_reg)
4992         continue;
4993
4994       /* LAST_INSN is a conditional jump.  Get its condition.  */
4995       condition = get_condition (last_insn, &earliest);
4996
4997       /* If we can't determine the condition then skip.  */
4998       if (! condition)
4999         continue;
5000
5001       /* Is the register known to have a nonzero value?  */
5002       if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
5003         continue;
5004
5005       /* Try to compute whether the compare/branch at the loop end is one or
5006          two instructions.  */
5007       if (earliest == last_insn)
5008         compare_and_branch = 1;
5009       else if (earliest == prev_nonnote_insn (last_insn))
5010         compare_and_branch = 2;
5011       else
5012         continue;
5013
5014       /* We know the register in this comparison is nonnull at exit from
5015          this block.  We can optimize this comparison.  */
5016       if (GET_CODE (condition) == NE)
5017         {
5018           rtx new_jump;
5019
5020           new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
5021                                             last_insn);
5022           JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5023           LABEL_NUSES (JUMP_LABEL (new_jump))++;
5024           emit_barrier_after (new_jump);
5025         }
5026       delete_insn (last_insn);
5027       if (compare_and_branch == 2)
5028         delete_insn (earliest);
5029
5030       /* Don't check this block again.  (Note that BLOCK_END is
5031          invalid here; we deleted the last instruction in the
5032          block.)  */
5033       block_reg[bb] = 0;
5034     }
5035 }
5036
5037 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5038    at compile time.
5039
5040    This is conceptually similar to global constant/copy propagation and
5041    classic global CSE (it even uses the same dataflow equations as cprop).
5042
5043    If a register is used as memory address with the form (mem (reg)), then we
5044    know that REG can not be zero at that point in the program.  Any instruction
5045    which sets REG "kills" this property.
5046
5047    So, if every path leading to a conditional branch has an available memory
5048    reference of that form, then we know the register can not have the value
5049    zero at the conditional branch.
5050
5051    So we merely need to compute the local properies and propagate that data
5052    around the cfg, then optimize where possible.
5053
5054    We run this pass two times.  Once before CSE, then again after CSE.  This
5055    has proven to be the most profitable approach.  It is rare for new
5056    optimization opportunities of this nature to appear after the first CSE
5057    pass.
5058
5059    This could probably be integrated with global cprop with a little work.  */
5060
5061 void
5062 delete_null_pointer_checks (f)
5063      rtx f;
5064 {
5065   sbitmap *nonnull_avin, *nonnull_avout;
5066   int *block_reg;
5067   int bb;
5068   int reg;
5069   int regs_per_pass;
5070   int max_reg;
5071   struct null_pointer_info npi;
5072
5073   /* First break the program into basic blocks.  */
5074   find_basic_blocks (f, max_reg_num (), NULL);
5075   cleanup_cfg (f);
5076
5077   /* If we have only a single block, then there's nothing to do.  */
5078   if (n_basic_blocks <= 1)
5079     {
5080       /* Free storage allocated by find_basic_blocks.  */
5081       free_basic_block_vars (0);
5082       return;
5083     }
5084
5085   /* Trying to perform global optimizations on flow graphs which have
5086      a high connectivity will take a long time and is unlikely to be
5087      particularly useful.
5088
5089      In normal circumstances a cfg should have about twice has many edges
5090      as blocks.  But we do not want to punish small functions which have
5091      a couple switch statements.  So we require a relatively large number
5092      of basic blocks and the ratio of edges to blocks to be high.  */
5093   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
5094     {
5095       /* Free storage allocated by find_basic_blocks.  */
5096       free_basic_block_vars (0);
5097       return;
5098     }
5099
5100   /* We need four bitmaps, each with a bit for each register in each
5101      basic block.  */
5102   max_reg = max_reg_num ();
5103   regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
5104
5105   /* Allocate bitmaps to hold local and global properties.  */
5106   npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5107   npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5108   nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5109   nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5110
5111   /* Go through the basic blocks, seeing whether or not each block
5112      ends with a conditional branch whose condition is a comparison
5113      against zero.  Record the register compared in BLOCK_REG.  */
5114   block_reg = (int *) xcalloc (n_basic_blocks, sizeof (int));
5115   for (bb = 0; bb < n_basic_blocks; bb++)
5116     {
5117       rtx last_insn = BLOCK_END (bb);
5118       rtx condition, earliest, reg;
5119
5120       /* We only want conditional branches.  */
5121       if (GET_CODE (last_insn) != JUMP_INSN
5122           || !condjump_p (last_insn)
5123           || simplejump_p (last_insn))
5124         continue;
5125
5126       /* LAST_INSN is a conditional jump.  Get its condition.  */
5127       condition = get_condition (last_insn, &earliest);
5128
5129       /* If we were unable to get the condition, or it is not a equality
5130          comparison against zero then there's nothing we can do.  */
5131       if (!condition
5132           || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5133           || GET_CODE (XEXP (condition, 1)) != CONST_INT
5134           || (XEXP (condition, 1)
5135               != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
5136         continue;
5137
5138       /* We must be checking a register against zero.  */
5139       reg = XEXP (condition, 0);
5140       if (GET_CODE (reg) != REG)
5141         continue;
5142
5143       block_reg[bb] = REGNO (reg);
5144     }
5145
5146   /* Go through the algorithm for each block of registers.  */
5147   for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
5148     {
5149       npi.min_reg = reg;
5150       npi.max_reg = MIN (reg + regs_per_pass, max_reg);
5151       delete_null_pointer_checks_1 (block_reg, nonnull_avin,
5152                                     nonnull_avout, &npi);
5153     }
5154
5155   /* Free storage allocated by find_basic_blocks.  */
5156   free_basic_block_vars (0);
5157
5158   /* Free the table of registers compared at the end of every block.  */
5159   free (block_reg);
5160
5161   /* Free bitmaps.  */
5162   free (npi.nonnull_local);
5163   free (npi.nonnull_killed);
5164   free (nonnull_avin);
5165   free (nonnull_avout);
5166 }
5167
5168 /* Code Hoisting variables and subroutines.  */
5169
5170 /* Very busy expressions.  */
5171 static sbitmap *hoist_vbein;
5172 static sbitmap *hoist_vbeout;
5173
5174 /* Hoistable expressions.  */
5175 static sbitmap *hoist_exprs;
5176
5177 /* Dominator bitmaps.  */
5178 static sbitmap *dominators;
5179
5180 /* ??? We could compute post dominators and run this algorithm in
5181    reverse to to perform tail merging, doing so would probably be
5182    more effective than the tail merging code in jump.c.
5183
5184    It's unclear if tail merging could be run in parallel with
5185    code hoisting.  It would be nice.  */
5186
5187 /* Allocate vars used for code hoisting analysis.  */
5188
5189 static void
5190 alloc_code_hoist_mem (n_blocks, n_exprs)
5191      int n_blocks, n_exprs;
5192 {
5193   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5194   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5195   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5196
5197   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5198   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5199   hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5200   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5201
5202   dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5203 }
5204
5205 /* Free vars used for code hoisting analysis.  */
5206
5207 static void
5208 free_code_hoist_mem ()
5209 {
5210   free (antloc);
5211   free (transp);
5212   free (comp);
5213
5214   free (hoist_vbein);
5215   free (hoist_vbeout);
5216   free (hoist_exprs);
5217   free (transpout);
5218
5219   free (dominators);
5220 }
5221
5222 /* Compute the very busy expressions at entry/exit from each block.
5223
5224    An expression is very busy if all paths from a given point
5225    compute the expression.  */
5226
5227 static void
5228 compute_code_hoist_vbeinout ()
5229 {
5230   int bb, changed, passes;
5231
5232   sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5233   sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5234
5235   passes = 0;
5236   changed = 1;
5237
5238   while (changed)
5239     {
5240       changed = 0;
5241
5242       /* We scan the blocks in the reverse order to speed up
5243          the convergence.  */
5244       for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5245         {
5246           changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5247                                            hoist_vbeout[bb], transp[bb]);
5248           if (bb != n_basic_blocks - 1)
5249             sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
5250         }
5251
5252       passes++;
5253     }
5254
5255   if (gcse_file)
5256     fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5257 }
5258
5259 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
5260
5261 static void
5262 compute_code_hoist_data ()
5263 {
5264   compute_local_properties (transp, comp, antloc, 0);
5265   compute_transpout ();
5266   compute_code_hoist_vbeinout ();
5267   compute_flow_dominators (dominators, NULL);
5268   if (gcse_file)
5269     fprintf (gcse_file, "\n");
5270 }
5271
5272 /* Determine if the expression identified by EXPR_INDEX would
5273    reach BB unimpared if it was placed at the end of EXPR_BB.
5274
5275    It's unclear exactly what Muchnick meant by "unimpared".  It seems
5276    to me that the expression must either be computed or transparent in
5277    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
5278    would allow the expression to be hoisted out of loops, even if
5279    the expression wasn't a loop invariant.
5280
5281    Contrast this to reachability for PRE where an expression is
5282    considered reachable if *any* path reaches instead of *all*
5283    paths.  */
5284
5285 static int
5286 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5287      int expr_bb;
5288      int expr_index;
5289      int bb;
5290      char *visited;
5291 {
5292   edge pred;
5293   int visited_allocated_locally = 0;
5294
5295
5296   if (visited == NULL)
5297     {
5298        visited_allocated_locally = 1;
5299        visited = xcalloc (n_basic_blocks, 1);
5300     }
5301
5302   visited[expr_bb] = 1;
5303   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5304     {
5305       int pred_bb = pred->src->index;
5306
5307       if (pred->src == ENTRY_BLOCK_PTR)
5308         break;
5309       else if (visited[pred_bb])
5310         continue;
5311
5312       /* Does this predecessor generate this expression?  */
5313       else if (TEST_BIT (comp[pred_bb], expr_index))
5314         break;
5315       else if (! TEST_BIT (transp[pred_bb], expr_index))
5316         break;
5317
5318       /* Not killed.  */
5319       else
5320         {
5321           visited[pred_bb] = 1;
5322           if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5323                                            pred_bb, visited))
5324             break;
5325         }
5326     }
5327   if (visited_allocated_locally)
5328     free (visited);
5329
5330   return (pred == NULL);
5331 }
5332 \f
5333 /* Actually perform code hoisting.  */
5334
5335 static void
5336 hoist_code ()
5337 {
5338   int bb, dominated, i;
5339   struct expr **index_map;
5340   struct expr *expr;
5341
5342   sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5343
5344   /* Compute a mapping from expression number (`bitmap_index') to
5345      hash table entry.  */
5346
5347   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
5348   for (i = 0; i < expr_hash_table_size; i++)
5349     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5350       index_map[expr->bitmap_index] = expr;
5351
5352   /* Walk over each basic block looking for potentially hoistable
5353      expressions, nothing gets hoisted from the entry block.  */
5354   for (bb = 0; bb < n_basic_blocks; bb++)
5355     {
5356       int found = 0;
5357       int insn_inserted_p;
5358
5359       /* Examine each expression that is very busy at the exit of this
5360          block.  These are the potentially hoistable expressions.  */
5361       for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5362         {
5363           int hoistable = 0;
5364
5365           if (TEST_BIT (hoist_vbeout[bb], i) && TEST_BIT (transpout[bb], i))
5366             {
5367               /* We've found a potentially hoistable expression, now
5368                  we look at every block BB dominates to see if it
5369                  computes the expression.  */
5370               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5371                 {
5372                   /* Ignore self dominance.  */
5373                   if (bb == dominated
5374                       || ! TEST_BIT (dominators[dominated], bb))
5375                     continue;
5376
5377                   /* We've found a dominated block, now see if it computes
5378                      the busy expression and whether or not moving that
5379                      expression to the "beginning" of that block is safe.  */
5380                   if (!TEST_BIT (antloc[dominated], i))
5381                     continue;
5382
5383                   /* Note if the expression would reach the dominated block
5384                      unimpared if it was placed at the end of BB.
5385
5386                      Keep track of how many times this expression is hoistable
5387                      from a dominated block into BB.  */
5388                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5389                     hoistable++;
5390                 }
5391
5392               /* If we found more than one hoistable occurence of this
5393                  expression, then note it in the bitmap of expressions to
5394                  hoist.  It makes no sense to hoist things which are computed
5395                  in only one BB, and doing so tends to pessimize register
5396                  allocation.  One could increase this value to try harder
5397                  to avoid any possible code expansion due to register
5398                  allocation issues; however experiments have shown that
5399                  the vast majority of hoistable expressions are only movable
5400                  from two successors, so raising this threshhold is likely
5401                  to nullify any benefit we get from code hoisting.  */
5402               if (hoistable > 1)
5403                 {
5404                   SET_BIT (hoist_exprs[bb], i);
5405                   found = 1;
5406                 }
5407             }
5408         }
5409
5410       /* If we found nothing to hoist, then quit now.  */
5411       if (! found)
5412         continue;
5413
5414       /* Loop over all the hoistable expressions.  */
5415       for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5416         {
5417           /* We want to insert the expression into BB only once, so
5418              note when we've inserted it.  */
5419           insn_inserted_p = 0;
5420
5421           /* These tests should be the same as the tests above.  */
5422           if (TEST_BIT (hoist_vbeout[bb], i))
5423             {
5424               /* We've found a potentially hoistable expression, now
5425                  we look at every block BB dominates to see if it
5426                  computes the expression.  */
5427               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5428                 {
5429                   /* Ignore self dominance.  */
5430                   if (bb == dominated
5431                       || ! TEST_BIT (dominators[dominated], bb))
5432                     continue;
5433
5434                   /* We've found a dominated block, now see if it computes
5435                      the busy expression and whether or not moving that
5436                      expression to the "beginning" of that block is safe.  */
5437                   if (!TEST_BIT (antloc[dominated], i))
5438                     continue;
5439
5440                   /* The expression is computed in the dominated block and
5441                      it would be safe to compute it at the start of the
5442                      dominated block.  Now we have to determine if the
5443                      expresion would reach the dominated block if it was
5444                      placed at the end of BB.  */
5445                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5446                     {
5447                       struct expr *expr = index_map[i];
5448                       struct occr *occr = expr->antic_occr;
5449                       rtx insn;
5450                       rtx set;
5451
5452                       /* Find the right occurence of this expression.  */
5453                       while (BLOCK_NUM (occr->insn) != dominated && occr)
5454                         occr = occr->next;
5455
5456                       /* Should never happen.  */
5457                       if (!occr)
5458                         abort ();
5459
5460                       insn = occr->insn;
5461
5462                       set = single_set (insn);
5463                       if (! set)
5464                         abort ();
5465
5466                       /* Create a pseudo-reg to store the result of reaching
5467                          expressions into.  Get the mode for the new pseudo
5468                          from the mode of the original destination pseudo.  */
5469                       if (expr->reaching_reg == NULL)
5470                         expr->reaching_reg
5471                           = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5472
5473                       /* In theory this should never fail since we're creating
5474                          a reg->reg copy.
5475
5476                          However, on the x86 some of the movXX patterns
5477                          actually contain clobbers of scratch regs.  This may
5478                          cause the insn created by validate_change to not
5479                          match any pattern and thus cause validate_change to
5480                          fail.  */
5481                       if (validate_change (insn, &SET_SRC (set),
5482                                            expr->reaching_reg, 0))
5483                         {
5484                           occr->deleted_p = 1;
5485                           if (!insn_inserted_p)
5486                             {
5487                               insert_insn_end_bb (index_map[i], bb, 0);
5488                               insn_inserted_p = 1;
5489                             }
5490                         }
5491                     }
5492                 }
5493             }
5494         }
5495     }
5496
5497     free (index_map);
5498 }
5499
5500 /* Top level routine to perform one code hoisting (aka unification) pass
5501
5502    Return non-zero if a change was made.  */
5503
5504 static int
5505 one_code_hoisting_pass ()
5506 {
5507   int changed = 0;
5508
5509   alloc_expr_hash_table (max_cuid);
5510   compute_expr_hash_table ();
5511   if (gcse_file)
5512     dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5513                      expr_hash_table_size, n_exprs);
5514
5515   if (n_exprs > 0)
5516     {
5517       alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5518       compute_code_hoist_data ();
5519       hoist_code ();
5520       free_code_hoist_mem ();
5521     }
5522
5523   free_expr_hash_table ();
5524
5525   return changed;
5526 }