gcc/gcse.c

   1 /* Global common subexpression elimination/Partial redundancy elimination
   2    and global constant/copy propagation for GNU compiler.
   3    Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* TODO
  23    - reordering of memory allocation and freeing to be more space efficient
  24    - do rough calc of how many regs are needed in each block, and a rough
  25      calc of how many regs are available in each class and use that to
  26      throttle back the code in cases where RTX_COST is minimal.
  27    - dead store elimination
  28    - a store to the same address as a load does not kill the load if the
  29      source of the store is also the destination of the load.  Handling this
  30      allows more load motion, particularly out of loops.
  31    - ability to realloc sbitmap vectors would allow one initial computation
  32      of reg_set_in_block with only subsequent additions, rather than
  33      recomputing it for each pass
  34
  35 */
  36
  37 /* References searched while implementing this.
  38
  39    Compilers Principles, Techniques and Tools
  40    Aho, Sethi, Ullman
  41    Addison-Wesley, 1988
  42
  43    Global Optimization by Suppression of Partial Redundancies
  44    E. Morel, C. Renvoise
  45    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  46
  47    A Portable Machine-Independent Global Optimizer - Design and Measurements
  48    Frederick Chow
  49    Stanford Ph.D. thesis, Dec. 1983
  50
  51    A Fast Algorithm for Code Movement Optimization
  52    D.M. Dhamdhere
  53    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  54
  55    A Solution to a Problem with Morel and Renvoise's
  56    Global Optimization by Suppression of Partial Redundancies
  57    K-H Drechsler, M.P. Stadel
  58    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  59
  60    Practical Adaptation of the Global Optimization
  61    Algorithm of Morel and Renvoise
  62    D.M. Dhamdhere
  63    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  64
  65    Efficiently Computing Static Single Assignment Form and the Control
  66    Dependence Graph
  67    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  68    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  69
  70    Lazy Code Motion
  71    J. Knoop, O. Ruthing, B. Steffen
  72    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  73
  74    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  75    Time for Reducible Flow Control
  76    Thomas Ball
  77    ACM Letters on Programming Languages and Systems,
  78    Vol. 2, Num. 1-4, Mar-Dec 1993
  79
  80    An Efficient Representation for Sparse Sets
  81    Preston Briggs, Linda Torczon
  82    ACM Letters on Programming Languages and Systems,
  83    Vol. 2, Num. 1-4, Mar-Dec 1993
  84
  85    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  86    K-H Drechsler, M.P. Stadel
  87    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  88
  89    Partial Dead Code Elimination
  90    J. Knoop, O. Ruthing, B. Steffen
  91    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  92
  93    Effective Partial Redundancy Elimination
  94    P. Briggs, K.D. Cooper
  95    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  96
  97    The Program Structure Tree: Computing Control Regions in Linear Time
  98    R. Johnson, D. Pearson, K. Pingali
  99    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
 100
 101    Optimal Code Motion: Theory and Practice
 102    J. Knoop, O. Ruthing, B. Steffen
 103    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
 104
 105    The power of assignment motion
 106    J. Knoop, O. Ruthing, B. Steffen
 107    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 108
 109    Global code motion / global value numbering
 110    C. Click
 111    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 112
 113    Value Driven Redundancy Elimination
 114    L.T. Simpson
 115    Rice University Ph.D. thesis, Apr. 1996
 116
 117    Value Numbering
 118    L.T. Simpson
 119    Massively Scalar Compiler Project, Rice University, Sep. 1996
 120
 121    High Performance Compilers for Parallel Computing
 122    Michael Wolfe
 123    Addison-Wesley, 1996
 124
 125    Advanced Compiler Design and Implementation
 126    Steven Muchnick
 127    Morgan Kaufmann, 1997
 128
 129    Building an Optimizing Compiler
 130    Robert Morgan
 131    Digital Press, 1998
 132
 133    People wishing to speed up the code here should read:
 134      Elimination Algorithms for Data Flow Analysis
 135      B.G. Ryder, M.C. Paull
 136      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 137
 138      How to Analyze Large Programs Efficiently and Informatively
 139      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 140      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 141
 142    People wishing to do something different can find various possibilities
 143    in the above papers and elsewhere.
 144 */
 145
 146 #include "config.h"
 147 #include "system.h"
 148 #include "toplev.h"
 149
 150 #include "rtl.h"
 151 #include "tm_p.h"
 152 #include "regs.h"
 153 #include "hard-reg-set.h"
 154 #include "flags.h"
 155 #include "real.h"
 156 #include "insn-config.h"
 157 #include "recog.h"
 158 #include "basic-block.h"
 159 #include "output.h"
 160 #include "function.h"
 161 #include "expr.h"
 162
 163 #include "obstack.h"
 164 #define obstack_chunk_alloc gmalloc
 165 #define obstack_chunk_free free
 166
 167 /* Maximum number of passes to perform.  */
 168 #define MAX_PASSES 1
 169
 170 /* Propagate flow information through back edges and thus enable PRE's
 171    moving loop invariant calculations out of loops.
 172
 173    Originally this tended to create worse overall code, but several
 174    improvements during the development of PRE seem to have made following
 175    back edges generally a win.
 176
 177    Note much of the loop invariant code motion done here would normally
 178    be done by loop.c, which has more heuristics for when to move invariants
 179    out of loops.  At some point we might need to move some of those
 180    heuristics into gcse.c.  */
 181 #define FOLLOW_BACK_EDGES 1
 182
 183 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 184    are a superset of those done by GCSE.
 185
 186    We perform the following steps:
 187
 188    1) Compute basic block information.
 189
 190    2) Compute table of places where registers are set.
 191
 192    3) Perform copy/constant propagation.
 193
 194    4) Perform global cse.
 195
 196    5) Perform another pass of copy/constant propagation.
 197
 198    Two passes of copy/constant propagation are done because the first one
 199    enables more GCSE and the second one helps to clean up the copies that
 200    GCSE creates.  This is needed more for PRE than for Classic because Classic
 201    GCSE will try to use an existing register containing the common
 202    subexpression rather than create a new one.  This is harder to do for PRE
 203    because of the code motion (which Classic GCSE doesn't do).
 204
 205    Expressions we are interested in GCSE-ing are of the form
 206    (set (pseudo-reg) (expression)).
 207    Function want_to_gcse_p says what these are.
 208
 209    PRE handles moving invariant expressions out of loops (by treating them as
 210    partially redundant).
 211
 212    Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
 213    assignment) based GVN (global value numbering).  L. T. Simpson's paper
 214    (Rice University) on value numbering is a useful reference for this.
 215
 216    **********************
 217
 218    We used to support multiple passes but there are diminishing returns in
 219    doing so.  The first pass usually makes 90% of the changes that are doable.
 220    A second pass can make a few more changes made possible by the first pass.
 221    Experiments show any further passes don't make enough changes to justify
 222    the expense.
 223
 224    A study of spec92 using an unlimited number of passes:
 225    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 226    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 227    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 228
 229    It was found doing copy propagation between each pass enables further
 230    substitutions.
 231
 232    PRE is quite expensive in complicated functions because the DFA can take
 233    awhile to converge.  Hence we only perform one pass.  Macro MAX_PASSES can
 234    be modified if one wants to experiment.
 235
 236    **********************
 237
 238    The steps for PRE are:
 239
 240    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 241
 242    2) Perform the data flow analysis for PRE.
 243
 244    3) Delete the redundant instructions
 245
 246    4) Insert the required copies [if any] that make the partially
 247       redundant instructions fully redundant.
 248
 249    5) For other reaching expressions, insert an instruction to copy the value
 250       to a newly created pseudo that will reach the redundant instruction.
 251
 252    The deletion is done first so that when we do insertions we
 253    know which pseudo reg to use.
 254
 255    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 256    argue it is not.  The number of iterations for the algorithm to converge
 257    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 258
 259    PRE GCSE depends heavily on the second CSE pass to clean up the copies
 260    we create.  To make an expression reach the place where it's redundant,
 261    the result of the expression is copied to a new register, and the redundant
 262    expression is deleted by replacing it with this new register.  Classic GCSE
 263    doesn't have this problem as much as it computes the reaching defs of
 264    each register in each block and thus can try to use an existing register.
 265
 266    **********************
 267
 268    A fair bit of simplicity is created by creating small functions for simple
 269    tasks, even when the function is only called in one place.  This may
 270    measurably slow things down [or may not] by creating more function call
 271    overhead than is necessary.  The source is laid out so that it's trivial
 272    to make the affected functions inline so that one can measure what speed
 273    up, if any, can be achieved, and maybe later when things settle things can
 274    be rearranged.
 275
 276    Help stamp out big monolithic functions!  */
 277 \f
 278 /* GCSE global vars.  */
 279
 280 /* -dG dump file.  */
 281 static FILE *gcse_file;
 282
 283 /* Note whether or not we should run jump optimization after gcse.  We
 284    want to do this for two cases.
 285
 286     * If we changed any jumps via cprop.
 287
 288     * If we added any labels via edge splitting.  */
 289
 290 static int run_jump_opt_after_gcse;
 291
 292 /* Bitmaps are normally not included in debugging dumps.
 293    However it's useful to be able to print them from GDB.
 294    We could create special functions for this, but it's simpler to
 295    just allow passing stderr to the dump_foo fns.  Since stderr can
 296    be a macro, we store a copy here.  */
 297 static FILE *debug_stderr;
 298
 299 /* An obstack for our working variables.  */
 300 static struct obstack gcse_obstack;
 301
 302 /* Non-zero for each mode that supports (set (reg) (reg)).
 303    This is trivially true for integer and floating point values.
 304    It may or may not be true for condition codes.  */
 305 static char can_copy_p[(int) NUM_MACHINE_MODES];
 306
 307 /* Non-zero if can_copy_p has been initialized.  */
 308 static int can_copy_init_p;
 309
 310 struct reg_use {
 311   rtx reg_rtx;
 312 };
 313
 314 /* Hash table of expressions.  */
 315
 316 struct expr
 317 {
 318   /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
 319   rtx expr;
 320   /* Index in the available expression bitmaps.  */
 321   int bitmap_index;
 322   /* Next entry with the same hash.  */
 323   struct expr *next_same_hash;
 324   /* List of anticipatable occurrences in basic blocks in the function.
 325      An "anticipatable occurrence" is one that is the first occurrence in the
 326      basic block, the operands are not modified in the basic block prior
 327      to the occurrence and the output is not used between the start of
 328      the block and the occurrence.  */
 329   struct occr *antic_occr;
 330   /* List of available occurrence in basic blocks in the function.
 331      An "available occurrence" is one that is the last occurrence in the
 332      basic block and the operands are not modified by following statements in
 333      the basic block [including this insn].  */
 334   struct occr *avail_occr;
 335   /* Non-null if the computation is PRE redundant.
 336      The value is the newly created pseudo-reg to record a copy of the
 337      expression in all the places that reach the redundant copy.  */
 338   rtx reaching_reg;
 339 };
 340
 341 /* Occurrence of an expression.
 342    There is one per basic block.  If a pattern appears more than once the
 343    last appearance is used [or first for anticipatable expressions].  */
 344
 345 struct occr
 346 {
 347   /* Next occurrence of this expression.  */
 348   struct occr *next;
 349   /* The insn that computes the expression.  */
 350   rtx insn;
 351   /* Non-zero if this [anticipatable] occurrence has been deleted.  */
 352   char deleted_p;
 353   /* Non-zero if this [available] occurrence has been copied to
 354      reaching_reg.  */
 355   /* ??? This is mutually exclusive with deleted_p, so they could share
 356      the same byte.  */
 357   char copied_p;
 358 };
 359
 360 /* Expression and copy propagation hash tables.
 361    Each hash table is an array of buckets.
 362    ??? It is known that if it were an array of entries, structure elements
 363    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 364    not clear whether in the final analysis a sufficient amount of memory would
 365    be saved as the size of the available expression bitmaps would be larger
 366    [one could build a mapping table without holes afterwards though].
 367    Someday I'll perform the computation and figure it out.
 368 */
 369
 370 /* Total size of the expression hash table, in elements.  */
 371 static int expr_hash_table_size;
 372 /* The table itself.
 373    This is an array of `expr_hash_table_size' elements.  */
 374 static struct expr **expr_hash_table;
 375
 376 /* Total size of the copy propagation hash table, in elements.  */
 377 static int set_hash_table_size;
 378 /* The table itself.
 379    This is an array of `set_hash_table_size' elements.  */
 380 static struct expr **set_hash_table;
 381
 382 /* Mapping of uids to cuids.
 383    Only real insns get cuids.  */
 384 static int *uid_cuid;
 385
 386 /* Highest UID in UID_CUID.  */
 387 static int max_uid;
 388
 389 /* Get the cuid of an insn.  */
 390 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
 391
 392 /* Number of cuids.  */
 393 static int max_cuid;
 394
 395 /* Mapping of cuids to insns.  */
 396 static rtx *cuid_insn;
 397
 398 /* Get insn from cuid.  */
 399 #define CUID_INSN(CUID) (cuid_insn[CUID])
 400
 401 /* Maximum register number in function prior to doing gcse + 1.
 402    Registers created during this pass have regno >= max_gcse_regno.
 403    This is named with "gcse" to not collide with global of same name.  */
 404 static int max_gcse_regno;
 405
 406 /* Maximum number of cse-able expressions found.  */
 407 static int n_exprs;
 408 /* Maximum number of assignments for copy propagation found.  */
 409 static int n_sets;
 410
 411 /* Table of registers that are modified.
 412    For each register, each element is a list of places where the pseudo-reg
 413    is set.
 414
 415    For simplicity, GCSE is done on sets of pseudo-regs only.  PRE GCSE only
 416    requires knowledge of which blocks kill which regs [and thus could use
 417    a bitmap instead of the lists `reg_set_table' uses].
 418
 419    `reg_set_table' and could be turned into an array of bitmaps
 420    (num-bbs x num-regs)
 421    [however perhaps it may be useful to keep the data as is].
 422    One advantage of recording things this way is that `reg_set_table' is
 423    fairly sparse with respect to pseudo regs but for hard regs could be
 424    fairly dense [relatively speaking].
 425    And recording sets of pseudo-regs in lists speeds
 426    up functions like compute_transp since in the case of pseudo-regs we only
 427    need to iterate over the number of times a pseudo-reg is set, not over the
 428    number of basic blocks [clearly there is a bit of a slow down in the cases
 429    where a pseudo is set more than once in a block, however it is believed
 430    that the net effect is to speed things up].  This isn't done for hard-regs
 431    because recording call-clobbered hard-regs in `reg_set_table' at each
 432    function call can consume a fair bit of memory, and iterating over hard-regs
 433    stored this way in compute_transp will be more expensive.  */
 434
 435 typedef struct reg_set {
 436   /* The next setting of this register.  */
 437   struct reg_set *next;
 438   /* The insn where it was set.  */
 439   rtx insn;
 440 } reg_set;
 441 static reg_set **reg_set_table;
 442 /* Size of `reg_set_table'.
 443    The table starts out at max_gcse_regno + slop, and is enlarged as
 444    necessary.  */
 445 static int reg_set_table_size;
 446 /* Amount to grow `reg_set_table' by when it's full.  */
 447 #define REG_SET_TABLE_SLOP 100
 448
 449 /* Bitmap containing one bit for each register in the program.
 450    Used when performing GCSE to track which registers have been set since
 451    the start of the basic block.  */
 452 static sbitmap reg_set_bitmap;
 453
 454 /* For each block, a bitmap of registers set in the block.
 455    This is used by expr_killed_p and compute_transp.
 456    It is computed during hash table computation and not by compute_sets
 457    as it includes registers added since the last pass (or between cprop and
 458    gcse) and it's currently not easy to realloc sbitmap vectors.  */
 459 static sbitmap *reg_set_in_block;
 460
 461 /* For each block, non-zero if memory is set in that block.
 462    This is computed during hash table computation and is used by
 463    expr_killed_p and compute_transp.
 464    ??? Handling of memory is very simple, we don't make any attempt
 465    to optimize things (later).
 466    ??? This can be computed by compute_sets since the information
 467    doesn't change.  */
 468 static char *mem_set_in_block;
 469
 470 /* Various variables for statistics gathering.  */
 471
 472 /* Memory used in a pass.
 473    This isn't intended to be absolutely precise.  Its intent is only
 474    to keep an eye on memory usage.  */
 475 static int bytes_used;
 476 /* GCSE substitutions made.  */
 477 static int gcse_subst_count;
 478 /* Number of copy instructions created.  */
 479 static int gcse_create_count;
 480 /* Number of constants propagated.  */
 481 static int const_prop_count;
 482 /* Number of copys propagated.  */
 483 static int copy_prop_count;
 484 \f
 485 /* These variables are used by classic GCSE.
 486    Normally they'd be defined a bit later, but `rd_gen' needs to
 487    be declared sooner.  */
 488
 489 /* A bitmap of all ones for implementing the algorithm for available
 490    expressions and reaching definitions.  */
 491 /* ??? Available expression bitmaps have a different size than reaching
 492    definition bitmaps.  This should be the larger of the two, however, it
 493    is not currently used for reaching definitions.  */
 494 static sbitmap u_bitmap;
 495
 496 /* Each block has a bitmap of each type.
 497    The length of each blocks bitmap is:
 498
 499        max_cuid  - for reaching definitions
 500        n_exprs - for available expressions
 501
 502    Thus we view the bitmaps as 2 dimensional arrays.  i.e.
 503    rd_kill[block_num][cuid_num]
 504    ae_kill[block_num][expr_num]
 505 */
 506
 507 /* For reaching defs */
 508 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
 509
 510 /* for available exprs */
 511 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
 512
 513 /* Objects of this type are passed around by the null-pointer check
 514    removal routines.  */
 515 struct null_pointer_info {
 516   /* The basic block being processed.  */
 517   int current_block;
 518   /* The first register to be handled in this pass.  */
 519   int min_reg;
 520   /* One greater than the last register to be handled in this pass.  */
 521   int max_reg;
 522   sbitmap *nonnull_local;
 523   sbitmap *nonnull_killed;
 524 };
 525 \f
 526 static void compute_can_copy      PARAMS ((void));
 527
 528 static char *gmalloc              PARAMS ((unsigned int));
 529 static char *grealloc            PARAMS ((char *, unsigned int));
 530 static char *gcse_alloc        PARAMS ((unsigned long));
 531 static void alloc_gcse_mem          PARAMS ((rtx));
 532 static void free_gcse_mem            PARAMS ((void));
 533 static void alloc_reg_set_mem    PARAMS ((int));
 534 static void free_reg_set_mem      PARAMS ((void));
 535 static int get_bitmap_width           PARAMS ((int, int, int));
 536 static void record_one_set          PARAMS ((int, rtx));
 537 static void record_set_info        PARAMS ((rtx, rtx, void *));
 538 static void compute_sets              PARAMS ((rtx));
 539
 540 static void hash_scan_insn          PARAMS ((rtx, int, int));
 541 static void hash_scan_set            PARAMS ((rtx, rtx, int));
 542 static void hash_scan_clobber    PARAMS ((rtx, rtx));
 543 static void hash_scan_call          PARAMS ((rtx, rtx));
 544 static int want_to_gcse_p            PARAMS ((rtx));
 545 static int oprs_unchanged_p        PARAMS ((rtx, rtx, int));
 546 static int oprs_anticipatable_p       PARAMS ((rtx, rtx));
 547 static int oprs_available_p        PARAMS ((rtx, rtx));
 548 static void insert_expr_in_table      PARAMS ((rtx, enum machine_mode,
 549                                               rtx, int, int));
 550 static void insert_set_in_table       PARAMS ((rtx, rtx));
 551 static unsigned int hash_expr    PARAMS ((rtx, enum machine_mode,
 552                                           int *, int));
 553 static unsigned int hash_expr_1       PARAMS ((rtx, enum machine_mode, int *));
 554 static unsigned int hash_set      PARAMS ((int, int));
 555 static int expr_equiv_p        PARAMS ((rtx, rtx));
 556 static void record_last_reg_set_info  PARAMS ((rtx, int));
 557 static void record_last_mem_set_info  PARAMS ((rtx));
 558 static void record_last_set_info      PARAMS ((rtx, rtx, void *));
 559 static void compute_hash_table  PARAMS ((int));
 560 static void alloc_set_hash_table      PARAMS ((int));
 561 static void free_set_hash_table       PARAMS ((void));
 562 static void compute_set_hash_table    PARAMS ((void));
 563 static void alloc_expr_hash_table     PARAMS ((int));
 564 static void free_expr_hash_table      PARAMS ((void));
 565 static void compute_expr_hash_table   PARAMS ((void));
 566 static void dump_hash_table        PARAMS ((FILE *, const char *,
 567                                             struct expr **, int, int));
 568 static struct expr *lookup_expr       PARAMS ((rtx));
 569 static struct expr *lookup_set  PARAMS ((int, rtx));
 570 static struct expr *next_set      PARAMS ((int, struct expr *));
 571 static void reset_opr_set_tables      PARAMS ((void));
 572 static int oprs_not_set_p            PARAMS ((rtx, rtx));
 573 static void mark_call            PARAMS ((rtx));
 574 static void mark_set              PARAMS ((rtx, rtx));
 575 static void mark_clobber              PARAMS ((rtx, rtx));
 576 static void mark_oprs_set            PARAMS ((rtx));
 577
 578 static void alloc_cprop_mem        PARAMS ((int, int));
 579 static void free_cprop_mem          PARAMS ((void));
 580 static void compute_transp          PARAMS ((rtx, int, sbitmap *, int));
 581 static void compute_transpout       PARAMS ((void));
 582 static void compute_local_properties  PARAMS ((sbitmap *, sbitmap *,
 583                                                sbitmap *, int));
 584 static void compute_cprop_data  PARAMS ((void));
 585 static void find_used_regs          PARAMS ((rtx));
 586 static int try_replace_reg          PARAMS ((rtx, rtx, rtx));
 587 static struct expr *find_avail_set    PARAMS ((int, rtx));
 588 static int cprop_jump                   PARAMS ((rtx, rtx, struct reg_use *, rtx));
 589 #ifdef HAVE_cc0
 590 static int cprop_cc0_jump               PARAMS ((rtx, struct reg_use *, rtx));
 591 #endif
 592 static int cprop_insn            PARAMS ((rtx, int));
 593 static int cprop                      PARAMS ((int));
 594 static int one_cprop_pass            PARAMS ((int, int));
 595
 596 static void alloc_pre_mem            PARAMS ((int, int));
 597 static void free_pre_mem              PARAMS ((void));
 598 static void compute_pre_data      PARAMS ((void));
 599 static int pre_expr_reaches_here_p    PARAMS ((int, struct expr *, int));
 600 static void insert_insn_end_bb  PARAMS ((struct expr *, int, int));
 601 static void pre_insert_copy_insn      PARAMS ((struct expr *, rtx));
 602 static void pre_insert_copies    PARAMS ((void));
 603 static int pre_delete            PARAMS ((void));
 604 static int pre_gcse                PARAMS ((void));
 605 static int one_pre_gcse_pass      PARAMS ((int));
 606
 607 static void add_label_notes           PARAMS ((rtx, rtx));
 608
 609 static void alloc_code_hoist_mem        PARAMS ((int, int));
 610 static void free_code_hoist_mem         PARAMS ((void));
 611 static void compute_code_hoist_vbeinout PARAMS ((void));
 612 static void compute_code_hoist_data     PARAMS ((void));
 613 static int hoist_expr_reaches_here_p    PARAMS ((int, int, int, char *));
 614 static void hoist_code                  PARAMS ((void));
 615 static int one_code_hoisting_pass       PARAMS ((void));
 616
 617 static void alloc_rd_mem              PARAMS ((int, int));
 618 static void free_rd_mem        PARAMS ((void));
 619 static void handle_rd_kill_set  PARAMS ((rtx, int, int));
 620 static void compute_kill_rd        PARAMS ((void));
 621 static void compute_rd          PARAMS ((void));
 622 static void alloc_avail_expr_mem      PARAMS ((int, int));
 623 static void free_avail_expr_mem       PARAMS ((void));
 624 static void compute_ae_gen          PARAMS ((void));
 625 static int expr_killed_p              PARAMS ((rtx, int));
 626 static void compute_ae_kill        PARAMS ((sbitmap *, sbitmap *));
 627 static int expr_reaches_here_p  PARAMS ((struct occr *, struct expr *,
 628                                          int, int));
 629 static rtx computing_insn            PARAMS ((struct expr *, rtx));
 630 static int def_reaches_here_p    PARAMS ((rtx, rtx));
 631 static int can_disregard_other_sets   PARAMS ((struct reg_set **, rtx, int));
 632 static int handle_avail_expr      PARAMS ((rtx, struct expr *));
 633 static int classic_gcse        PARAMS ((void));
 634 static int one_classic_gcse_pass      PARAMS ((int));
 635 static void invalidate_nonnull_info     PARAMS ((rtx, rtx, void *));
 636 static void delete_null_pointer_checks_1 PARAMS ((int *, sbitmap *, sbitmap *,
 637                                                   struct null_pointer_info *));
 638 static rtx process_insert_insn  PARAMS ((struct expr *));
 639 static int pre_edge_insert      PARAMS ((struct edge_list *, struct expr **));
 640 static int expr_reaches_here_p_work     PARAMS ((struct occr *, struct expr *,
 641                                                  int, int, char *));
 642 static int pre_expr_reaches_here_p_work PARAMS ((int, struct expr *,
 643                                                  int, char *));
 644 \f
 645 /* Entry point for global common subexpression elimination.
 646    F is the first instruction in the function.  */
 647
 648 int
 649 gcse_main (f, file)
 650      rtx f;
 651      FILE *file;
 652 {
 653   int changed, pass;
 654   /* Bytes used at start of pass.  */
 655   int initial_bytes_used;
 656   /* Maximum number of bytes used by a pass.  */
 657   int max_pass_bytes;
 658   /* Point to release obstack data from for each pass.  */
 659   char *gcse_obstack_bottom;
 660
 661   /* We do not construct an accurate cfg in functions which call
 662      setjmp, so just punt to be safe.  */
 663   if (current_function_calls_setjmp)
 664     return 0;
 665
 666   /* Assume that we do not need to run jump optimizations after gcse.  */
 667   run_jump_opt_after_gcse = 0;
 668
 669   /* For calling dump_foo fns from gdb.  */
 670   debug_stderr = stderr;
 671   gcse_file = file;
 672
 673   /* Identify the basic block information for this function, including
 674      successors and predecessors.  */
 675   max_gcse_regno = max_reg_num ();
 676   find_basic_blocks (f, max_gcse_regno, file, 1);
 677
 678   if (file)
 679     dump_flow_info (file);
 680
 681   /* Return if there's nothing to do.  */
 682   if (n_basic_blocks <= 1)
 683     {
 684       /* Free storage allocated by find_basic_blocks.  */
 685       free_basic_block_vars (0);
 686       return 0;
 687     }
 688
 689   /* Trying to perform global optimizations on flow graphs which have
 690      a high connectivity will take a long time and is unlikely to be
 691      particularly useful.
 692
 693      In normal circumstances a cfg should have about twice has many edges
 694      as blocks.  But we do not want to punish small functions which have
 695      a couple switch statements.  So we require a relatively large number
 696      of basic blocks and the ratio of edges to blocks to be high.  */
 697   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
 698     {
 699       /* Free storage allocated by find_basic_blocks.  */
 700       free_basic_block_vars (0);
 701       return 0;
 702     }
 703
 704   /* See what modes support reg/reg copy operations.  */
 705   if (! can_copy_init_p)
 706     {
 707       compute_can_copy ();
 708       can_copy_init_p = 1;
 709     }
 710
 711   gcc_obstack_init (&gcse_obstack);
 712   bytes_used = 0;
 713
 714   /* Record where pseudo-registers are set.
 715      This data is kept accurate during each pass.
 716      ??? We could also record hard-reg information here
 717      [since it's unchanging], however it is currently done during
 718      hash table computation.
 719
 720      It may be tempting to compute MEM set information here too, but MEM
 721      sets will be subject to code motion one day and thus we need to compute
 722      information about memory sets when we build the hash tables.  */
 723
 724   alloc_reg_set_mem (max_gcse_regno);
 725   compute_sets (f);
 726
 727   pass = 0;
 728   initial_bytes_used = bytes_used;
 729   max_pass_bytes = 0;
 730   gcse_obstack_bottom = gcse_alloc (1);
 731   changed = 1;
 732   while (changed && pass < MAX_PASSES)
 733     {
 734       changed = 0;
 735       if (file)
 736         fprintf (file, "GCSE pass %d\n\n", pass + 1);
 737
 738       /* Initialize bytes_used to the space for the pred/succ lists,
 739          and the reg_set_table data.  */
 740       bytes_used = initial_bytes_used;
 741
 742       /* Each pass may create new registers, so recalculate each time.  */
 743       max_gcse_regno = max_reg_num ();
 744
 745       alloc_gcse_mem (f);
 746
 747       /* Don't allow constant propagation to modify jumps
 748          during this pass.  */
 749       changed = one_cprop_pass (pass + 1, 0);
 750
 751       if (optimize_size)
 752         changed |= one_classic_gcse_pass (pass + 1);
 753       else
 754         {
 755           changed |= one_pre_gcse_pass (pass + 1);
 756           free_reg_set_mem ();
 757           alloc_reg_set_mem (max_reg_num ());
 758           compute_sets (f);
 759           run_jump_opt_after_gcse = 1;
 760         }
 761
 762       if (max_pass_bytes < bytes_used)
 763         max_pass_bytes = bytes_used;
 764
 765       /* Free up memory, then reallocate for code hoisting.  We can
 766          not re-use the existing allocated memory because the tables
 767          will not have info for the insns or registers created by
 768          partial redundancy elimination.  */
 769       free_gcse_mem ();
 770
 771       /* It does not make sense to run code hoisting unless we optimizing
 772          for code size -- it rarely makes programs faster, and can make
 773          them bigger if we did partial redundancy elimination (when optimizing
 774          for space, we use a classic gcse algorithm instead of partial
 775          redundancy algorithms).  */
 776       if (optimize_size)
 777         {
 778           max_gcse_regno = max_reg_num ();
 779           alloc_gcse_mem (f);
 780           changed |= one_code_hoisting_pass ();
 781           free_gcse_mem ();
 782
 783           if (max_pass_bytes < bytes_used)
 784             max_pass_bytes = bytes_used;
 785         }
 786
 787       if (file)
 788         {
 789           fprintf (file, "\n");
 790           fflush (file);
 791         }
 792       obstack_free (&gcse_obstack, gcse_obstack_bottom);
 793       pass++;
 794     }
 795
 796   /* Do one last pass of copy propagation, including cprop into
 797      conditional jumps.  */
 798
 799   max_gcse_regno = max_reg_num ();
 800   alloc_gcse_mem (f);
 801   /* This time, go ahead and allow cprop to alter jumps.  */
 802   one_cprop_pass (pass + 1, 1);
 803   free_gcse_mem ();
 804
 805   if (file)
 806     {
 807       fprintf (file, "GCSE of %s: %d basic blocks, ",
 808                current_function_name, n_basic_blocks);
 809       fprintf (file, "%d pass%s, %d bytes\n\n",
 810                pass, pass > 1 ? "es" : "", max_pass_bytes);
 811     }
 812
 813   /* Free our obstack.  */
 814   obstack_free (&gcse_obstack, NULL_PTR);
 815   /* Free reg_set_table.  */
 816   free_reg_set_mem ();
 817   /* Free storage allocated by find_basic_blocks.  */
 818   free_basic_block_vars (0);
 819   return run_jump_opt_after_gcse;
 820 }
 821 \f
 822 /* Misc. utilities.  */
 823
 824 /* Compute which modes support reg/reg copy operations.  */
 825
 826 static void
 827 compute_can_copy ()
 828 {
 829   int i;
 830 #ifndef AVOID_CCMODE_COPIES
 831   rtx reg,insn;
 832 #endif
 833   char *free_point = (char *) oballoc (1);
 834
 835   bzero (can_copy_p, NUM_MACHINE_MODES);
 836
 837   start_sequence ();
 838   for (i = 0; i < NUM_MACHINE_MODES; i++)
 839     {
 840       switch (GET_MODE_CLASS (i))
 841         {
 842         case MODE_CC :
 843 #ifdef AVOID_CCMODE_COPIES
 844           can_copy_p[i] = 0;
 845 #else
 846           reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 847           insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 848           if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
 849             can_copy_p[i] = 1;
 850 #endif
 851           break;
 852         default :
 853           can_copy_p[i] = 1;
 854           break;
 855         }
 856     }
 857   end_sequence ();
 858
 859   /* Free the objects we just allocated.  */
 860   obfree (free_point);
 861 }
 862 \f
 863 /* Cover function to xmalloc to record bytes allocated.  */
 864
 865 static char *
 866 gmalloc (size)
 867      unsigned int size;
 868 {
 869   bytes_used += size;
 870   return xmalloc (size);
 871 }
 872
 873 /* Cover function to xrealloc.
 874    We don't record the additional size since we don't know it.
 875    It won't affect memory usage stats much anyway.  */
 876
 877 static char *
 878 grealloc (ptr, size)
 879      char *ptr;
 880      unsigned int size;
 881 {
 882   return xrealloc (ptr, size);
 883 }
 884
 885 /* Cover function to obstack_alloc.
 886    We don't need to record the bytes allocated here since
 887    obstack_chunk_alloc is set to gmalloc.  */
 888
 889 static char *
 890 gcse_alloc (size)
 891      unsigned long size;
 892 {
 893   return (char *) obstack_alloc (&gcse_obstack, size);
 894 }
 895
 896 /* Allocate memory for the cuid mapping array,
 897    and reg/memory set tracking tables.
 898
 899    This is called at the start of each pass.  */
 900
 901 static void
 902 alloc_gcse_mem (f)
 903      rtx f;
 904 {
 905   int i,n;
 906   rtx insn;
 907
 908   /* Find the largest UID and create a mapping from UIDs to CUIDs.
 909      CUIDs are like UIDs except they increase monotonically, have no gaps,
 910      and only apply to real insns.  */
 911
 912   max_uid = get_max_uid ();
 913   n = (max_uid + 1) * sizeof (int);
 914   uid_cuid = (int *) gmalloc (n);
 915   bzero ((char *) uid_cuid, n);
 916   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 917     {
 918       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
 919         INSN_CUID (insn) = i++;
 920       else
 921         INSN_CUID (insn) = i;
 922     }
 923
 924   /* Create a table mapping cuids to insns.  */
 925
 926   max_cuid = i;
 927   n = (max_cuid + 1) * sizeof (rtx);
 928   cuid_insn = (rtx *) gmalloc (n);
 929   bzero ((char *) cuid_insn, n);
 930   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 931     {
 932       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
 933         {
 934           CUID_INSN (i) = insn;
 935           i++;
 936         }
 937     }
 938
 939   /* Allocate vars to track sets of regs.  */
 940
 941   reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
 942
 943   /* Allocate vars to track sets of regs, memory per block.  */
 944
 945   reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
 946                                                        max_gcse_regno);
 947   mem_set_in_block = (char *) gmalloc (n_basic_blocks);
 948 }
 949
 950 /* Free memory allocated by alloc_gcse_mem.  */
 951
 952 static void
 953 free_gcse_mem ()
 954 {
 955   free (uid_cuid);
 956   free (cuid_insn);
 957
 958   free (reg_set_bitmap);
 959
 960   free (reg_set_in_block);
 961   free (mem_set_in_block);
 962 }
 963
 964 /* Many of the global optimization algorithms work by solving dataflow
 965    equations for various expressions.  Initially, some local value is
 966    computed for each expression in each block.  Then, the values
 967    across the various blocks are combined (by following flow graph
 968    edges) to arrive at global values.  Conceptually, each set of
 969    equations is independent.  We may therefore solve all the equations
 970    in parallel, solve them one at a time, or pick any intermediate
 971    approach.
 972
 973    When you're going to need N two-dimensional bitmaps, each X (say,
 974    the number of blocks) by Y (say, the number of expressions), call
 975    this function.  It's not important what X and Y represent; only
 976    that Y correspond to the things that can be done in parallel.  This
 977    function will return an appropriate chunking factor C; you should
 978    solve C sets of equations in parallel.  By going through this
 979    function, we can easily trade space against time; by solving fewer
 980    equations in parallel we use less space.  */
 981
 982 static int
 983 get_bitmap_width (n, x, y)
 984      int n;
 985      int x;
 986      int y;
 987 {
 988   /* It's not really worth figuring out *exactly* how much memory will
 989      be used by a particular choice.  The important thing is to get
 990      something approximately right.  */
 991   size_t max_bitmap_memory = 10 * 1024 * 1024;
 992
 993   /* The number of bytes we'd use for a single column of minimum
 994      width.  */
 995   size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
 996
 997   /* Often, it's reasonable just to solve all the equations in
 998      parallel.  */
 999   if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
1000     return y;
1001
1002   /* Otherwise, pick the largest width we can, without going over the
1003      limit.  */
1004   return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
1005                              / column_size);
1006 }
1007
1008 \f
1009 /* Compute the local properties of each recorded expression.
1010    Local properties are those that are defined by the block, irrespective
1011    of other blocks.
1012
1013    An expression is transparent in a block if its operands are not modified
1014    in the block.
1015
1016    An expression is computed (locally available) in a block if it is computed
1017    at least once and expression would contain the same value if the
1018    computation was moved to the end of the block.
1019
1020    An expression is locally anticipatable in a block if it is computed at
1021    least once and expression would contain the same value if the computation
1022    was moved to the beginning of the block.
1023
1024    We call this routine for cprop, pre and code hoisting.  They all
1025    compute basically the same information and thus can easily share
1026    this code.
1027
1028    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording
1029    local properties.  If NULL, then it is not necessary to compute
1030    or record that particular property.
1031
1032    SETP controls which hash table to look at.  If zero, this routine
1033    looks at the expr hash table; if nonzero this routine looks at
1034    the set hash table.  Additionally, TRANSP is computed as ~TRANSP,
1035    since this is really cprop's ABSALTERED.  */
1036
1037 static void
1038 compute_local_properties (transp, comp, antloc, setp)
1039      sbitmap *transp;
1040      sbitmap *comp;
1041      sbitmap *antloc;
1042      int setp;
1043 {
1044   int i, hash_table_size;
1045   struct expr **hash_table;
1046
1047   /* Initialize any bitmaps that were passed in.  */
1048   if (transp)
1049     {
1050       if (setp)
1051         sbitmap_vector_zero (transp, n_basic_blocks);
1052       else
1053         sbitmap_vector_ones (transp, n_basic_blocks);
1054     }
1055   if (comp)
1056     sbitmap_vector_zero (comp, n_basic_blocks);
1057   if (antloc)
1058     sbitmap_vector_zero (antloc, n_basic_blocks);
1059
1060   /* We use the same code for cprop, pre and hoisting.  For cprop
1061      we care about the set hash table, for pre and hoisting we
1062      care about the expr hash table.  */
1063   hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1064   hash_table = setp ? set_hash_table : expr_hash_table;
1065
1066   for (i = 0; i < hash_table_size; i++)
1067     {
1068       struct expr *expr;
1069
1070       for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1071         {
1072           struct occr *occr;
1073           int indx = expr->bitmap_index;
1074
1075           /* The expression is transparent in this block if it is not killed.
1076              We start by assuming all are transparent [none are killed], and
1077              then reset the bits for those that are.  */
1078
1079           if (transp)
1080             compute_transp (expr->expr, indx, transp, setp);
1081
1082           /* The occurrences recorded in antic_occr are exactly those that
1083              we want to set to non-zero in ANTLOC.  */
1084
1085           if (antloc)
1086             {
1087               for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1088                 {
1089                   int bb = BLOCK_NUM (occr->insn);
1090                   SET_BIT (antloc[bb], indx);
1091
1092                   /* While we're scanning the table, this is a good place to
1093                      initialize this.  */
1094                   occr->deleted_p = 0;
1095                 }
1096             }
1097
1098           /* The occurrences recorded in avail_occr are exactly those that
1099              we want to set to non-zero in COMP.  */
1100           if (comp)
1101             {
1102
1103               for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1104                 {
1105                   int bb = BLOCK_NUM (occr->insn);
1106                   SET_BIT (comp[bb], indx);
1107
1108                   /* While we're scanning the table, this is a good place to
1109                      initialize this.  */
1110                   occr->copied_p = 0;
1111                 }
1112             }
1113
1114           /* While we're scanning the table, this is a good place to
1115              initialize this.  */
1116           expr->reaching_reg = 0;
1117         }
1118     }
1119 }
1120
1121 \f
1122 /* Register set information.
1123
1124    `reg_set_table' records where each register is set or otherwise
1125    modified.  */
1126
1127 static struct obstack reg_set_obstack;
1128
1129 static void
1130 alloc_reg_set_mem (n_regs)
1131      int n_regs;
1132 {
1133   int n;
1134
1135   reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1136   n = reg_set_table_size * sizeof (struct reg_set *);
1137   reg_set_table = (struct reg_set **) gmalloc (n);
1138   bzero ((char *) reg_set_table, n);
1139
1140   gcc_obstack_init (&reg_set_obstack);
1141 }
1142
1143 static void
1144 free_reg_set_mem ()
1145 {
1146   free (reg_set_table);
1147   obstack_free (&reg_set_obstack, NULL_PTR);
1148 }
1149
1150 /* Record REGNO in the reg_set table.  */
1151
1152 static void
1153 record_one_set (regno, insn)
1154      int regno;
1155      rtx insn;
1156 {
1157   /* allocate a new reg_set element and link it onto the list */
1158   struct reg_set *new_reg_info, *reg_info_ptr1, *reg_info_ptr2;
1159
1160   /* If the table isn't big enough, enlarge it.  */
1161   if (regno >= reg_set_table_size)
1162     {
1163       int new_size = regno + REG_SET_TABLE_SLOP;
1164       reg_set_table = (struct reg_set **)
1165         grealloc ((char *) reg_set_table,
1166                   new_size * sizeof (struct reg_set *));
1167       bzero ((char *) (reg_set_table + reg_set_table_size),
1168              (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1169       reg_set_table_size = new_size;
1170     }
1171
1172   new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1173                                                    sizeof (struct reg_set));
1174   bytes_used += sizeof (struct reg_set);
1175   new_reg_info->insn = insn;
1176   new_reg_info->next = NULL;
1177   if (reg_set_table[regno] == NULL)
1178     reg_set_table[regno] = new_reg_info;
1179   else
1180     {
1181       reg_info_ptr1 = reg_info_ptr2 = reg_set_table[regno];
1182       /* ??? One could keep a "last" pointer to speed this up.  */
1183       while (reg_info_ptr1 != NULL)
1184         {
1185           reg_info_ptr2 = reg_info_ptr1;
1186           reg_info_ptr1 = reg_info_ptr1->next;
1187         }
1188       reg_info_ptr2->next = new_reg_info;
1189     }
1190 }
1191
1192 /* Called from compute_sets via note_stores to handle one
1193    SET or CLOBBER in an insn.  The DATA is really the instruction
1194    in which the SET is occurring.  */
1195
1196 static void
1197 record_set_info (dest, setter, data)
1198      rtx dest, setter ATTRIBUTE_UNUSED;
1199      void *data;
1200 {
1201   rtx record_set_insn = (rtx) data;
1202
1203   if (GET_CODE (dest) == SUBREG)
1204     dest = SUBREG_REG (dest);
1205
1206   if (GET_CODE (dest) == REG)
1207     {
1208       if (REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1209         record_one_set (REGNO (dest), record_set_insn);
1210     }
1211 }
1212
1213 /* Scan the function and record each set of each pseudo-register.
1214
1215    This is called once, at the start of the gcse pass.
1216    See the comments for `reg_set_table' for further docs.  */
1217
1218 static void
1219 compute_sets (f)
1220      rtx f;
1221 {
1222   rtx insn = f;
1223
1224   while (insn)
1225     {
1226       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
1227         note_stores (PATTERN (insn), record_set_info, insn);
1228       insn = NEXT_INSN (insn);
1229     }
1230 }
1231 \f
1232 /* Hash table support.  */
1233
1234 #define NEVER_SET -1
1235
1236 /* For each register, the cuid of the first/last insn in the block to set it,
1237    or -1 if not set.  */
1238 static int *reg_first_set;
1239 static int *reg_last_set;
1240
1241 /* While computing "first/last set" info, this is the CUID of first/last insn
1242    to set memory or -1 if not set.  `mem_last_set' is also used when
1243    performing GCSE to record whether memory has been set since the beginning
1244    of the block.
1245    Note that handling of memory is very simple, we don't make any attempt
1246    to optimize things (later).  */
1247 static int mem_first_set;
1248 static int mem_last_set;
1249
1250 /* Perform a quick check whether X, the source of a set, is something
1251    we want to consider for GCSE.  */
1252
1253 static int
1254 want_to_gcse_p (x)
1255      rtx x;
1256 {
1257   enum rtx_code code = GET_CODE (x);
1258
1259   switch (code)
1260     {
1261     case REG:
1262     case SUBREG:
1263     case CONST_INT:
1264     case CONST_DOUBLE:
1265     case CALL:
1266       return 0;
1267
1268     default:
1269       break;
1270     }
1271
1272   return 1;
1273 }
1274
1275 /* Return non-zero if the operands of expression X are unchanged from the
1276    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1277    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
1278
1279 static int
1280 oprs_unchanged_p (x, insn, avail_p)
1281      rtx x, insn;
1282      int avail_p;
1283 {
1284   int i;
1285   enum rtx_code code;
1286   const char *fmt;
1287
1288   /* repeat is used to turn tail-recursion into iteration.  */
1289  repeat:
1290
1291   if (x == 0)
1292     return 1;
1293
1294   code = GET_CODE (x);
1295   switch (code)
1296     {
1297     case REG:
1298       if (avail_p)
1299         return (reg_last_set[REGNO (x)] == NEVER_SET
1300                 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1301       else
1302         return (reg_first_set[REGNO (x)] == NEVER_SET
1303                 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1304
1305     case MEM:
1306       if (avail_p)
1307         {
1308           if (mem_last_set != NEVER_SET
1309               && mem_last_set >= INSN_CUID (insn))
1310             return 0;
1311         }
1312       else
1313         {
1314           if (mem_first_set != NEVER_SET
1315               && mem_first_set < INSN_CUID (insn))
1316             return 0;
1317         }
1318       x = XEXP (x, 0);
1319       goto repeat;
1320
1321     case PRE_DEC:
1322     case PRE_INC:
1323     case POST_DEC:
1324     case POST_INC:
1325       return 0;
1326
1327     case PC:
1328     case CC0: /*FIXME*/
1329     case CONST:
1330     case CONST_INT:
1331     case CONST_DOUBLE:
1332     case SYMBOL_REF:
1333     case LABEL_REF:
1334     case ADDR_VEC:
1335     case ADDR_DIFF_VEC:
1336       return 1;
1337
1338     default:
1339       break;
1340     }
1341
1342   i = GET_RTX_LENGTH (code) - 1;
1343   fmt = GET_RTX_FORMAT (code);
1344   for (; i >= 0; i--)
1345     {
1346       if (fmt[i] == 'e')
1347         {
1348           rtx tem = XEXP (x, i);
1349
1350           /* If we are about to do the last recursive call
1351              needed at this level, change it into iteration.
1352              This function is called enough to be worth it.  */
1353           if (i == 0)
1354             {
1355               x = tem;
1356               goto repeat;
1357             }
1358           if (! oprs_unchanged_p (tem, insn, avail_p))
1359             return 0;
1360         }
1361       else if (fmt[i] == 'E')
1362         {
1363           int j;
1364           for (j = 0; j < XVECLEN (x, i); j++)
1365             {
1366               if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1367                 return 0;
1368             }
1369         }
1370     }
1371
1372   return 1;
1373 }
1374
1375 /* Return non-zero if the operands of expression X are unchanged from
1376    the start of INSN's basic block up to but not including INSN.  */
1377
1378 static int
1379 oprs_anticipatable_p (x, insn)
1380      rtx x, insn;
1381 {
1382   return oprs_unchanged_p (x, insn, 0);
1383 }
1384
1385 /* Return non-zero if the operands of expression X are unchanged from
1386    INSN to the end of INSN's basic block.  */
1387
1388 static int
1389 oprs_available_p (x, insn)
1390      rtx x, insn;
1391 {
1392   return oprs_unchanged_p (x, insn, 1);
1393 }
1394
1395 /* Hash expression X.
1396    MODE is only used if X is a CONST_INT.
1397    A boolean indicating if a volatile operand is found or if the expression
1398    contains something we don't want to insert in the table is stored in
1399    DO_NOT_RECORD_P.
1400
1401    ??? One might want to merge this with canon_hash.  Later.  */
1402
1403 static unsigned int
1404 hash_expr (x, mode, do_not_record_p, hash_table_size)
1405      rtx x;
1406      enum machine_mode mode;
1407      int *do_not_record_p;
1408      int hash_table_size;
1409 {
1410   unsigned int hash;
1411
1412   *do_not_record_p = 0;
1413
1414   hash = hash_expr_1 (x, mode, do_not_record_p);
1415   return hash % hash_table_size;
1416 }
1417
1418 /* Subroutine of hash_expr to do the actual work.  */
1419
1420 static unsigned int
1421 hash_expr_1 (x, mode, do_not_record_p)
1422      rtx x;
1423      enum machine_mode mode;
1424      int *do_not_record_p;
1425 {
1426   int i, j;
1427   unsigned hash = 0;
1428   enum rtx_code code;
1429   const char *fmt;
1430
1431   /* repeat is used to turn tail-recursion into iteration.  */
1432  repeat:
1433
1434   if (x == 0)
1435     return hash;
1436
1437   code = GET_CODE (x);
1438   switch (code)
1439     {
1440     case REG:
1441       {
1442         register int regno = REGNO (x);
1443         hash += ((unsigned) REG << 7) + regno;
1444         return hash;
1445       }
1446
1447     case CONST_INT:
1448       {
1449         unsigned HOST_WIDE_INT tem = INTVAL (x);
1450         hash += ((unsigned) CONST_INT << 7) + (unsigned) mode + tem;
1451         return hash;
1452       }
1453
1454     case CONST_DOUBLE:
1455       /* This is like the general case, except that it only counts
1456          the integers representing the constant.  */
1457       hash += (unsigned) code + (unsigned) GET_MODE (x);
1458       if (GET_MODE (x) != VOIDmode)
1459         for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1460           {
1461             unsigned tem = XWINT (x, i);
1462             hash += tem;
1463           }
1464       else
1465         hash += ((unsigned) CONST_DOUBLE_LOW (x)
1466                  + (unsigned) CONST_DOUBLE_HIGH (x));
1467       return hash;
1468
1469       /* Assume there is only one rtx object for any given label.  */
1470     case LABEL_REF:
1471       /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1472          differences and differences between each stage's debugging dumps.  */
1473       hash += ((unsigned) LABEL_REF << 7) + CODE_LABEL_NUMBER (XEXP (x, 0));
1474       return hash;
1475
1476     case SYMBOL_REF:
1477       {
1478         /* Don't hash on the symbol's address to avoid bootstrap differences.
1479            Different hash values may cause expressions to be recorded in
1480            different orders and thus different registers to be used in the
1481            final assembler.  This also avoids differences in the dump files
1482            between various stages.  */
1483         unsigned int h = 0;
1484         unsigned char *p = (unsigned char *) XSTR (x, 0);
1485         while (*p)
1486           h += (h << 7) + *p++; /* ??? revisit */
1487         hash += ((unsigned) SYMBOL_REF << 7) + h;
1488         return hash;
1489       }
1490
1491     case MEM:
1492       if (MEM_VOLATILE_P (x))
1493         {
1494           *do_not_record_p = 1;
1495           return 0;
1496         }
1497       hash += (unsigned) MEM;
1498       hash += MEM_ALIAS_SET (x);
1499       x = XEXP (x, 0);
1500       goto repeat;
1501
1502     case PRE_DEC:
1503     case PRE_INC:
1504     case POST_DEC:
1505     case POST_INC:
1506     case PC:
1507     case CC0:
1508     case CALL:
1509     case UNSPEC_VOLATILE:
1510       *do_not_record_p = 1;
1511       return 0;
1512
1513     case ASM_OPERANDS:
1514       if (MEM_VOLATILE_P (x))
1515         {
1516           *do_not_record_p = 1;
1517           return 0;
1518         }
1519
1520     default:
1521       break;
1522     }
1523
1524   i = GET_RTX_LENGTH (code) - 1;
1525   hash += (unsigned) code + (unsigned) GET_MODE (x);
1526   fmt = GET_RTX_FORMAT (code);
1527   for (; i >= 0; i--)
1528     {
1529       if (fmt[i] == 'e')
1530         {
1531           rtx tem = XEXP (x, i);
1532
1533           /* If we are about to do the last recursive call
1534              needed at this level, change it into iteration.
1535              This function is called enough to be worth it.  */
1536           if (i == 0)
1537             {
1538               x = tem;
1539               goto repeat;
1540             }
1541           hash += hash_expr_1 (tem, 0, do_not_record_p);
1542           if (*do_not_record_p)
1543             return 0;
1544         }
1545       else if (fmt[i] == 'E')
1546         for (j = 0; j < XVECLEN (x, i); j++)
1547           {
1548             hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1549             if (*do_not_record_p)
1550               return 0;
1551           }
1552       else if (fmt[i] == 's')
1553         {
1554           register unsigned char *p = (unsigned char *) XSTR (x, i);
1555           if (p)
1556             while (*p)
1557               hash += *p++;
1558         }
1559       else if (fmt[i] == 'i')
1560         {
1561           register unsigned tem = XINT (x, i);
1562           hash += tem;
1563         }
1564       else
1565         abort ();
1566     }
1567
1568   return hash;
1569 }
1570
1571 /* Hash a set of register REGNO.
1572
1573    Sets are hashed on the register that is set.
1574    This simplifies the PRE copy propagation code.
1575
1576    ??? May need to make things more elaborate.  Later, as necessary.  */
1577
1578 static unsigned int
1579 hash_set (regno, hash_table_size)
1580      int regno;
1581      int hash_table_size;
1582 {
1583   unsigned int hash;
1584
1585   hash = regno;
1586   return hash % hash_table_size;
1587 }
1588
1589 /* Return non-zero if exp1 is equivalent to exp2.
1590    ??? Borrowed from cse.c.  Might want to remerge with cse.c.  Later.  */
1591
1592 static int
1593 expr_equiv_p (x, y)
1594      rtx x, y;
1595 {
1596   register int i, j;
1597   register enum rtx_code code;
1598   register const char *fmt;
1599
1600   if (x == y)
1601     return 1;
1602   if (x == 0 || y == 0)
1603     return x == y;
1604
1605   code = GET_CODE (x);
1606   if (code != GET_CODE (y))
1607     return 0;
1608
1609   /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.  */
1610   if (GET_MODE (x) != GET_MODE (y))
1611     return 0;
1612
1613   switch (code)
1614     {
1615     case PC:
1616     case CC0:
1617       return x == y;
1618
1619     case CONST_INT:
1620       return INTVAL (x) == INTVAL (y);
1621
1622     case LABEL_REF:
1623       return XEXP (x, 0) == XEXP (y, 0);
1624
1625     case SYMBOL_REF:
1626       return XSTR (x, 0) == XSTR (y, 0);
1627
1628     case REG:
1629       return REGNO (x) == REGNO (y);
1630
1631     case MEM:
1632       /* Can't merge two expressions in different alias sets, since we can
1633          decide that the expression is transparent in a block when it isn't,
1634          due to it being set with the different alias set.  */
1635       if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1636         return 0;
1637       break;
1638
1639     /*  For commutative operations, check both orders.  */
1640     case PLUS:
1641     case MULT:
1642     case AND:
1643     case IOR:
1644     case XOR:
1645     case NE:
1646     case EQ:
1647       return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1648                && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1649               || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1650                   && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1651
1652     default:
1653       break;
1654     }
1655
1656   /* Compare the elements.  If any pair of corresponding elements
1657      fail to match, return 0 for the whole thing.  */
1658
1659   fmt = GET_RTX_FORMAT (code);
1660   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1661     {
1662       switch (fmt[i])
1663         {
1664         case 'e':
1665           if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1666             return 0;
1667           break;
1668
1669         case 'E':
1670           if (XVECLEN (x, i) != XVECLEN (y, i))
1671             return 0;
1672           for (j = 0; j < XVECLEN (x, i); j++)
1673             if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1674               return 0;
1675           break;
1676
1677         case 's':
1678           if (strcmp (XSTR (x, i), XSTR (y, i)))
1679             return 0;
1680           break;
1681
1682         case 'i':
1683           if (XINT (x, i) != XINT (y, i))
1684             return 0;
1685           break;
1686
1687         case 'w':
1688           if (XWINT (x, i) != XWINT (y, i))
1689             return 0;
1690         break;
1691
1692         case '0':
1693           break;
1694
1695         default:
1696           abort ();
1697         }
1698       }
1699
1700   return 1;
1701 }
1702
1703 /* Insert expression X in INSN in the hash table.
1704    If it is already present, record it as the last occurrence in INSN's
1705    basic block.
1706
1707    MODE is the mode of the value X is being stored into.
1708    It is only used if X is a CONST_INT.
1709
1710    ANTIC_P is non-zero if X is an anticipatable expression.
1711    AVAIL_P is non-zero if X is an available expression.  */
1712
1713 static void
1714 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1715      rtx x;
1716      enum machine_mode mode;
1717      rtx insn;
1718      int antic_p, avail_p;
1719 {
1720   int found, do_not_record_p;
1721   unsigned int hash;
1722   struct expr *cur_expr, *last_expr = NULL;
1723   struct occr *antic_occr, *avail_occr;
1724   struct occr *last_occr = NULL;
1725
1726   hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1727
1728   /* Do not insert expression in table if it contains volatile operands,
1729      or if hash_expr determines the expression is something we don't want
1730      to or can't handle.  */
1731   if (do_not_record_p)
1732     return;
1733
1734   cur_expr = expr_hash_table[hash];
1735   found = 0;
1736
1737   while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1738     {
1739       /* If the expression isn't found, save a pointer to the end of
1740          the list.  */
1741       last_expr = cur_expr;
1742       cur_expr = cur_expr->next_same_hash;
1743     }
1744
1745   if (! found)
1746     {
1747       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1748       bytes_used += sizeof (struct expr);
1749       if (expr_hash_table[hash] == NULL)
1750         {
1751           /* This is the first pattern that hashed to this index.  */
1752           expr_hash_table[hash] = cur_expr;
1753         }
1754       else
1755         {
1756           /* Add EXPR to end of this hash chain.  */
1757           last_expr->next_same_hash = cur_expr;
1758         }
1759       /* Set the fields of the expr element.  */
1760       cur_expr->expr = x;
1761       cur_expr->bitmap_index = n_exprs++;
1762       cur_expr->next_same_hash = NULL;
1763       cur_expr->antic_occr = NULL;
1764       cur_expr->avail_occr = NULL;
1765     }
1766
1767   /* Now record the occurrence(s).  */
1768
1769   if (antic_p)
1770     {
1771       antic_occr = cur_expr->antic_occr;
1772
1773       /* Search for another occurrence in the same basic block.  */
1774       while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1775         {
1776           /* If an occurrence isn't found, save a pointer to the end of
1777              the list.  */
1778           last_occr = antic_occr;
1779           antic_occr = antic_occr->next;
1780         }
1781
1782       if (antic_occr)
1783         {
1784           /* Found another instance of the expression in the same basic block.
1785              Prefer the currently recorded one.  We want the first one in the
1786              block and the block is scanned from start to end.  */
1787           ; /* nothing to do */
1788         }
1789       else
1790         {
1791           /* First occurrence of this expression in this basic block.  */
1792           antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1793           bytes_used += sizeof (struct occr);
1794           /* First occurrence of this expression in any block?  */
1795           if (cur_expr->antic_occr == NULL)
1796             cur_expr->antic_occr = antic_occr;
1797           else
1798             last_occr->next = antic_occr;
1799           antic_occr->insn = insn;
1800           antic_occr->next = NULL;
1801         }
1802     }
1803
1804   if (avail_p)
1805     {
1806       avail_occr = cur_expr->avail_occr;
1807
1808       /* Search for another occurrence in the same basic block.  */
1809       while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1810         {
1811           /* If an occurrence isn't found, save a pointer to the end of
1812              the list.  */
1813           last_occr = avail_occr;
1814           avail_occr = avail_occr->next;
1815         }
1816
1817       if (avail_occr)
1818         {
1819           /* Found another instance of the expression in the same basic block.
1820              Prefer this occurrence to the currently recorded one.  We want
1821              the last one in the block and the block is scanned from start
1822              to end.  */
1823           avail_occr->insn = insn;
1824         }
1825       else
1826         {
1827           /* First occurrence of this expression in this basic block.  */
1828           avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1829           bytes_used += sizeof (struct occr);
1830           /* First occurrence of this expression in any block?  */
1831           if (cur_expr->avail_occr == NULL)
1832             cur_expr->avail_occr = avail_occr;
1833           else
1834             last_occr->next = avail_occr;
1835           avail_occr->insn = insn;
1836           avail_occr->next = NULL;
1837         }
1838     }
1839 }
1840
1841 /* Insert pattern X in INSN in the hash table.
1842    X is a SET of a reg to either another reg or a constant.
1843    If it is already present, record it as the last occurrence in INSN's
1844    basic block.  */
1845
1846 static void
1847 insert_set_in_table (x, insn)
1848      rtx x;
1849      rtx insn;
1850 {
1851   int found;
1852   unsigned int hash;
1853   struct expr *cur_expr, *last_expr = NULL;
1854   struct occr *cur_occr, *last_occr = NULL;
1855
1856   if (GET_CODE (x) != SET
1857       || GET_CODE (SET_DEST (x)) != REG)
1858     abort ();
1859
1860   hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1861
1862   cur_expr = set_hash_table[hash];
1863   found = 0;
1864
1865   while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1866     {
1867       /* If the expression isn't found, save a pointer to the end of
1868          the list.  */
1869       last_expr = cur_expr;
1870       cur_expr = cur_expr->next_same_hash;
1871     }
1872
1873   if (! found)
1874     {
1875       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1876       bytes_used += sizeof (struct expr);
1877       if (set_hash_table[hash] == NULL)
1878         {
1879           /* This is the first pattern that hashed to this index.  */
1880           set_hash_table[hash] = cur_expr;
1881         }
1882       else
1883         {
1884           /* Add EXPR to end of this hash chain.  */
1885           last_expr->next_same_hash = cur_expr;
1886         }
1887       /* Set the fields of the expr element.
1888          We must copy X because it can be modified when copy propagation is
1889          performed on its operands.  */
1890       /* ??? Should this go in a different obstack?  */
1891       cur_expr->expr = copy_rtx (x);
1892       cur_expr->bitmap_index = n_sets++;
1893       cur_expr->next_same_hash = NULL;
1894       cur_expr->antic_occr = NULL;
1895       cur_expr->avail_occr = NULL;
1896     }
1897
1898   /* Now record the occurrence.  */
1899
1900   cur_occr = cur_expr->avail_occr;
1901
1902   /* Search for another occurrence in the same basic block.  */
1903   while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1904     {
1905       /* If an occurrence isn't found, save a pointer to the end of
1906          the list.  */
1907       last_occr = cur_occr;
1908       cur_occr = cur_occr->next;
1909     }
1910
1911   if (cur_occr)
1912     {
1913       /* Found another instance of the expression in the same basic block.
1914          Prefer this occurrence to the currently recorded one.  We want
1915          the last one in the block and the block is scanned from start
1916          to end.  */
1917       cur_occr->insn = insn;
1918     }
1919   else
1920     {
1921       /* First occurrence of this expression in this basic block.  */
1922       cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1923       bytes_used += sizeof (struct occr);
1924       /* First occurrence of this expression in any block?  */
1925       if (cur_expr->avail_occr == NULL)
1926         cur_expr->avail_occr = cur_occr;
1927       else
1928         last_occr->next = cur_occr;
1929       cur_occr->insn = insn;
1930       cur_occr->next = NULL;
1931     }
1932 }
1933
1934 /* Scan pattern PAT of INSN and add an entry to the hash table.
1935    If SET_P is non-zero, this is for the assignment hash table,
1936    otherwise it is for the expression hash table.  */
1937
1938 static void
1939 hash_scan_set (pat, insn, set_p)
1940      rtx pat, insn;
1941      int set_p;
1942 {
1943   rtx src = SET_SRC (pat);
1944   rtx dest = SET_DEST (pat);
1945
1946   if (GET_CODE (src) == CALL)
1947     hash_scan_call (src, insn);
1948
1949   if (GET_CODE (dest) == REG)
1950     {
1951       int regno = REGNO (dest);
1952       rtx tmp;
1953
1954       /* Only record sets of pseudo-regs in the hash table.  */
1955       if (! set_p
1956           && regno >= FIRST_PSEUDO_REGISTER
1957           /* Don't GCSE something if we can't do a reg/reg copy.  */
1958           && can_copy_p [GET_MODE (dest)]
1959           /* Is SET_SRC something we want to gcse?  */
1960           && want_to_gcse_p (src))
1961         {
1962           /* An expression is not anticipatable if its operands are
1963              modified before this insn.  */
1964           int antic_p = oprs_anticipatable_p (src, insn);
1965           /* An expression is not available if its operands are
1966              subsequently modified, including this insn.  */
1967           int avail_p = oprs_available_p (src, insn);
1968           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1969         }
1970       /* Record sets for constant/copy propagation.  */
1971       else if (set_p
1972                && regno >= FIRST_PSEUDO_REGISTER
1973                && ((GET_CODE (src) == REG
1974                     && REGNO (src) >= FIRST_PSEUDO_REGISTER
1975                     && can_copy_p [GET_MODE (dest)])
1976                    || GET_CODE (src) == CONST_INT
1977                    || GET_CODE (src) == SYMBOL_REF
1978                    || GET_CODE (src) == CONST_DOUBLE)
1979                /* A copy is not available if its src or dest is subsequently
1980                   modified.  Here we want to search from INSN+1 on, but
1981                   oprs_available_p searches from INSN on.  */
1982                && (insn == BLOCK_END (BLOCK_NUM (insn))
1983                    || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1984                        && oprs_available_p (pat, tmp))))
1985         insert_set_in_table (pat, insn);
1986     }
1987 }
1988
1989 static void
1990 hash_scan_clobber (x, insn)
1991      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1992 {
1993   /* Currently nothing to do.  */
1994 }
1995
1996 static void
1997 hash_scan_call (x, insn)
1998      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1999 {
2000   /* Currently nothing to do.  */
2001 }
2002
2003 /* Process INSN and add hash table entries as appropriate.
2004
2005    Only available expressions that set a single pseudo-reg are recorded.
2006
2007    Single sets in a PARALLEL could be handled, but it's an extra complication
2008    that isn't dealt with right now.  The trick is handling the CLOBBERs that
2009    are also in the PARALLEL.  Later.
2010
2011    If SET_P is non-zero, this is for the assignment hash table,
2012    otherwise it is for the expression hash table.
2013    If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
2014    not record any expressions.  */
2015
2016 static void
2017 hash_scan_insn (insn, set_p, in_libcall_block)
2018      rtx insn;
2019      int set_p;
2020      int in_libcall_block;
2021 {
2022   rtx pat = PATTERN (insn);
2023
2024   /* Pick out the sets of INSN and for other forms of instructions record
2025      what's been modified.  */
2026
2027   if (GET_CODE (pat) == SET && ! in_libcall_block)
2028     {
2029       /* Ignore obvious no-ops.  */
2030       if (SET_SRC (pat) != SET_DEST (pat))
2031         hash_scan_set (pat, insn, set_p);
2032     }
2033   else if (GET_CODE (pat) == PARALLEL)
2034     {
2035       int i;
2036
2037       for (i = 0; i < XVECLEN (pat, 0); i++)
2038         {
2039           rtx x = XVECEXP (pat, 0, i);
2040
2041           if (GET_CODE (x) == SET)
2042             {
2043               if (GET_CODE (SET_SRC (x)) == CALL)
2044                 hash_scan_call (SET_SRC (x), insn);
2045             }
2046           else if (GET_CODE (x) == CLOBBER)
2047             hash_scan_clobber (x, insn);
2048           else if (GET_CODE (x) == CALL)
2049             hash_scan_call (x, insn);
2050         }
2051     }
2052   else if (GET_CODE (pat) == CLOBBER)
2053     hash_scan_clobber (pat, insn);
2054   else if (GET_CODE (pat) == CALL)
2055     hash_scan_call (pat, insn);
2056 }
2057
2058 static void
2059 dump_hash_table (file, name, table, table_size, total_size)
2060      FILE *file;
2061      const char *name;
2062      struct expr **table;
2063      int table_size, total_size;
2064 {
2065   int i;
2066   /* Flattened out table, so it's printed in proper order.  */
2067   struct expr **flat_table;
2068   unsigned int *hash_val;
2069
2070   flat_table
2071     = (struct expr **) xcalloc (total_size, sizeof (struct expr *));
2072   hash_val = (unsigned int *) xmalloc (total_size * sizeof (unsigned int));
2073
2074   for (i = 0; i < table_size; i++)
2075     {
2076       struct expr *expr;
2077
2078       for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
2079         {
2080           flat_table[expr->bitmap_index] = expr;
2081           hash_val[expr->bitmap_index] = i;
2082         }
2083     }
2084
2085   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2086            name, table_size, total_size);
2087
2088   for (i = 0; i < total_size; i++)
2089     {
2090       struct expr *expr = flat_table[i];
2091
2092       fprintf (file, "Index %d (hash value %d)\n  ",
2093                expr->bitmap_index, hash_val[i]);
2094       print_rtl (file, expr->expr);
2095       fprintf (file, "\n");
2096     }
2097
2098   fprintf (file, "\n");
2099
2100   /* Clean up.  */
2101   free (flat_table);
2102   free (hash_val);
2103 }
2104
2105 /* Record register first/last/block set information for REGNO in INSN.
2106    reg_first_set records the first place in the block where the register
2107    is set and is used to compute "anticipatability".
2108    reg_last_set records the last place in the block where the register
2109    is set and is used to compute "availability".
2110    reg_set_in_block records whether the register is set in the block
2111    and is used to compute "transparency".  */
2112
2113 static void
2114 record_last_reg_set_info (insn, regno)
2115      rtx insn;
2116      int regno;
2117 {
2118   if (reg_first_set[regno] == NEVER_SET)
2119     reg_first_set[regno] = INSN_CUID (insn);
2120   reg_last_set[regno] = INSN_CUID (insn);
2121   SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2122 }
2123
2124 /* Record memory first/last/block set information for INSN.  */
2125
2126 static void
2127 record_last_mem_set_info (insn)
2128      rtx insn;
2129 {
2130   if (mem_first_set == NEVER_SET)
2131     mem_first_set = INSN_CUID (insn);
2132   mem_last_set = INSN_CUID (insn);
2133   mem_set_in_block[BLOCK_NUM (insn)] = 1;
2134 }
2135
2136 /* Called from compute_hash_table via note_stores to handle one
2137    SET or CLOBBER in an insn.  DATA is really the instruction in which
2138    the SET is taking place.  */
2139
2140 static void
2141 record_last_set_info (dest, setter, data)
2142      rtx dest, setter ATTRIBUTE_UNUSED;
2143      void *data;
2144 {
2145   rtx last_set_insn = (rtx) data;
2146
2147   if (GET_CODE (dest) == SUBREG)
2148     dest = SUBREG_REG (dest);
2149
2150   if (GET_CODE (dest) == REG)
2151     record_last_reg_set_info (last_set_insn, REGNO (dest));
2152   else if (GET_CODE (dest) == MEM
2153            /* Ignore pushes, they clobber nothing.  */
2154            && ! push_operand (dest, GET_MODE (dest)))
2155     record_last_mem_set_info (last_set_insn);
2156 }
2157
2158 /* Top level function to create an expression or assignment hash table.
2159
2160    Expression entries are placed in the hash table if
2161    - they are of the form (set (pseudo-reg) src),
2162    - src is something we want to perform GCSE on,
2163    - none of the operands are subsequently modified in the block
2164
2165    Assignment entries are placed in the hash table if
2166    - they are of the form (set (pseudo-reg) src),
2167    - src is something we want to perform const/copy propagation on,
2168    - none of the operands or target are subsequently modified in the block
2169    Currently src must be a pseudo-reg or a const_int.
2170
2171    F is the first insn.
2172    SET_P is non-zero for computing the assignment hash table.  */
2173
2174 static void
2175 compute_hash_table (set_p)
2176      int set_p;
2177 {
2178   int bb;
2179
2180   /* While we compute the hash table we also compute a bit array of which
2181      registers are set in which blocks.
2182      We also compute which blocks set memory, in the absence of aliasing
2183      support [which is TODO].
2184      ??? This isn't needed during const/copy propagation, but it's cheap to
2185      compute.  Later.  */
2186   sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2187   bzero ((char *) mem_set_in_block, n_basic_blocks);
2188
2189   /* Some working arrays used to track first and last set in each block.  */
2190   /* ??? One could use alloca here, but at some size a threshold is crossed
2191      beyond which one should use malloc.  Are we at that threshold here?  */
2192   reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2193   reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2194
2195   for (bb = 0; bb < n_basic_blocks; bb++)
2196     {
2197       rtx insn;
2198       int regno;
2199       int in_libcall_block;
2200       int i;
2201
2202       /* First pass over the instructions records information used to
2203          determine when registers and memory are first and last set.
2204          ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2205          could be moved to compute_sets since they currently don't change.  */
2206
2207       for (i = 0; i < max_gcse_regno; i++)
2208         reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2209       mem_first_set = NEVER_SET;
2210       mem_last_set = NEVER_SET;
2211
2212       for (insn = BLOCK_HEAD (bb);
2213            insn && insn != NEXT_INSN (BLOCK_END (bb));
2214            insn = NEXT_INSN (insn))
2215         {
2216 #ifdef NON_SAVING_SETJMP
2217           if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2218               && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2219             {
2220               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2221                 record_last_reg_set_info (insn, regno);
2222               continue;
2223             }
2224 #endif
2225
2226           if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
2227             continue;
2228
2229           if (GET_CODE (insn) == CALL_INSN)
2230             {
2231               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2232                 if ((call_used_regs[regno]
2233                      && regno != STACK_POINTER_REGNUM
2234 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2235                      && regno != HARD_FRAME_POINTER_REGNUM
2236 #endif
2237 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2238                      && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2239 #endif
2240 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2241                      && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2242 #endif
2243
2244                      && regno != FRAME_POINTER_REGNUM)
2245                     || global_regs[regno])
2246                   record_last_reg_set_info (insn, regno);
2247               if (! CONST_CALL_P (insn))
2248                 record_last_mem_set_info (insn);
2249             }
2250
2251           note_stores (PATTERN (insn), record_last_set_info, insn);
2252         }
2253
2254       /* The next pass builds the hash table.  */
2255
2256       for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2257            insn && insn != NEXT_INSN (BLOCK_END (bb));
2258            insn = NEXT_INSN (insn))
2259         {
2260           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
2261             {
2262               if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2263                 in_libcall_block = 1;
2264               else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2265                 in_libcall_block = 0;
2266               hash_scan_insn (insn, set_p, in_libcall_block);
2267             }
2268         }
2269     }
2270
2271   free (reg_first_set);
2272   free (reg_last_set);
2273   /* Catch bugs early.  */
2274   reg_first_set = reg_last_set = 0;
2275 }
2276
2277 /* Allocate space for the set hash table.
2278    N_INSNS is the number of instructions in the function.
2279    It is used to determine the number of buckets to use.  */
2280
2281 static void
2282 alloc_set_hash_table (n_insns)
2283      int n_insns;
2284 {
2285   int n;
2286
2287   set_hash_table_size = n_insns / 4;
2288   if (set_hash_table_size < 11)
2289     set_hash_table_size = 11;
2290   /* Attempt to maintain efficient use of hash table.
2291      Making it an odd number is simplest for now.
2292      ??? Later take some measurements.  */
2293   set_hash_table_size |= 1;
2294   n = set_hash_table_size * sizeof (struct expr *);
2295   set_hash_table = (struct expr **) gmalloc (n);
2296 }
2297
2298 /* Free things allocated by alloc_set_hash_table.  */
2299
2300 static void
2301 free_set_hash_table ()
2302 {
2303   free (set_hash_table);
2304 }
2305
2306 /* Compute the hash table for doing copy/const propagation.  */
2307
2308 static void
2309 compute_set_hash_table ()
2310 {
2311   /* Initialize count of number of entries in hash table.  */
2312   n_sets = 0;
2313   bzero ((char *) set_hash_table, set_hash_table_size * sizeof (struct expr *));
2314
2315   compute_hash_table (1);
2316 }
2317
2318 /* Allocate space for the expression hash table.
2319    N_INSNS is the number of instructions in the function.
2320    It is used to determine the number of buckets to use.  */
2321
2322 static void
2323 alloc_expr_hash_table (n_insns)
2324      int n_insns;
2325 {
2326   int n;
2327
2328   expr_hash_table_size = n_insns / 2;
2329   /* Make sure the amount is usable.  */
2330   if (expr_hash_table_size < 11)
2331     expr_hash_table_size = 11;
2332   /* Attempt to maintain efficient use of hash table.
2333      Making it an odd number is simplest for now.
2334      ??? Later take some measurements.  */
2335   expr_hash_table_size |= 1;
2336   n = expr_hash_table_size * sizeof (struct expr *);
2337   expr_hash_table = (struct expr **) gmalloc (n);
2338 }
2339
2340 /* Free things allocated by alloc_expr_hash_table.  */
2341
2342 static void
2343 free_expr_hash_table ()
2344 {
2345   free (expr_hash_table);
2346 }
2347
2348 /* Compute the hash table for doing GCSE.  */
2349
2350 static void
2351 compute_expr_hash_table ()
2352 {
2353   /* Initialize count of number of entries in hash table.  */
2354   n_exprs = 0;
2355   bzero ((char *) expr_hash_table, expr_hash_table_size * sizeof (struct expr *));
2356
2357   compute_hash_table (0);
2358 }
2359 \f
2360 /* Expression tracking support.  */
2361
2362 /* Lookup pattern PAT in the expression table.
2363    The result is a pointer to the table entry, or NULL if not found.  */
2364
2365 static struct expr *
2366 lookup_expr (pat)
2367      rtx pat;
2368 {
2369   int do_not_record_p;
2370   unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2371                                  expr_hash_table_size);
2372   struct expr *expr;
2373
2374   if (do_not_record_p)
2375     return NULL;
2376
2377   expr = expr_hash_table[hash];
2378
2379   while (expr && ! expr_equiv_p (expr->expr, pat))
2380     expr = expr->next_same_hash;
2381
2382   return expr;
2383 }
2384
2385 /* Lookup REGNO in the set table.
2386    If PAT is non-NULL look for the entry that matches it, otherwise return
2387    the first entry for REGNO.
2388    The result is a pointer to the table entry, or NULL if not found.  */
2389
2390 static struct expr *
2391 lookup_set (regno, pat)
2392      int regno;
2393      rtx pat;
2394 {
2395   unsigned int hash = hash_set (regno, set_hash_table_size);
2396   struct expr *expr;
2397
2398   expr = set_hash_table[hash];
2399
2400   if (pat)
2401     {
2402       while (expr && ! expr_equiv_p (expr->expr, pat))
2403         expr = expr->next_same_hash;
2404     }
2405   else
2406     {
2407       while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2408         expr = expr->next_same_hash;
2409     }
2410
2411   return expr;
2412 }
2413
2414 /* Return the next entry for REGNO in list EXPR.  */
2415
2416 static struct expr *
2417 next_set (regno, expr)
2418      int regno;
2419      struct expr *expr;
2420 {
2421   do
2422     expr = expr->next_same_hash;
2423   while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2424   return expr;
2425 }
2426
2427 /* Reset tables used to keep track of what's still available [since the
2428    start of the block].  */
2429
2430 static void
2431 reset_opr_set_tables ()
2432 {
2433   /* Maintain a bitmap of which regs have been set since beginning of
2434      the block.  */
2435   sbitmap_zero (reg_set_bitmap);
2436   /* Also keep a record of the last instruction to modify memory.
2437      For now this is very trivial, we only record whether any memory
2438      location has been modified.  */
2439   mem_last_set = 0;
2440 }
2441
2442 /* Return non-zero if the operands of X are not set before INSN in
2443    INSN's basic block.  */
2444
2445 static int
2446 oprs_not_set_p (x, insn)
2447      rtx x, insn;
2448 {
2449   int i;
2450   enum rtx_code code;
2451   const char *fmt;
2452
2453   /* repeat is used to turn tail-recursion into iteration.  */
2454 repeat:
2455
2456   if (x == 0)
2457     return 1;
2458
2459   code = GET_CODE (x);
2460   switch (code)
2461     {
2462     case PC:
2463     case CC0:
2464     case CONST:
2465     case CONST_INT:
2466     case CONST_DOUBLE:
2467     case SYMBOL_REF:
2468     case LABEL_REF:
2469     case ADDR_VEC:
2470     case ADDR_DIFF_VEC:
2471       return 1;
2472
2473     case MEM:
2474       if (mem_last_set != 0)
2475         return 0;
2476       x = XEXP (x, 0);
2477       goto repeat;
2478
2479     case REG:
2480       return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2481
2482     default:
2483       break;
2484     }
2485
2486   fmt = GET_RTX_FORMAT (code);
2487   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2488     {
2489       if (fmt[i] == 'e')
2490         {
2491           int not_set_p;
2492           /* If we are about to do the last recursive call
2493              needed at this level, change it into iteration.
2494              This function is called enough to be worth it.  */
2495           if (i == 0)
2496             {
2497               x = XEXP (x, 0);
2498               goto repeat;
2499             }
2500           not_set_p = oprs_not_set_p (XEXP (x, i), insn);
2501           if (! not_set_p)
2502             return 0;
2503         }
2504       else if (fmt[i] == 'E')
2505         {
2506           int j;
2507           for (j = 0; j < XVECLEN (x, i); j++)
2508             {
2509               int not_set_p = oprs_not_set_p (XVECEXP (x, i, j), insn);
2510               if (! not_set_p)
2511                 return 0;
2512             }
2513         }
2514     }
2515
2516   return 1;
2517 }
2518
2519 /* Mark things set by a CALL.  */
2520
2521 static void
2522 mark_call (insn)
2523      rtx insn;
2524 {
2525   mem_last_set = INSN_CUID (insn);
2526 }
2527
2528 /* Mark things set by a SET.  */
2529
2530 static void
2531 mark_set (pat, insn)
2532      rtx pat, insn;
2533 {
2534   rtx dest = SET_DEST (pat);
2535
2536   while (GET_CODE (dest) == SUBREG
2537          || GET_CODE (dest) == ZERO_EXTRACT
2538          || GET_CODE (dest) == SIGN_EXTRACT
2539          || GET_CODE (dest) == STRICT_LOW_PART)
2540     dest = XEXP (dest, 0);
2541
2542   if (GET_CODE (dest) == REG)
2543     SET_BIT (reg_set_bitmap, REGNO (dest));
2544   else if (GET_CODE (dest) == MEM)
2545     mem_last_set = INSN_CUID (insn);
2546
2547   if (GET_CODE (SET_SRC (pat)) == CALL)
2548     mark_call (insn);
2549 }
2550
2551 /* Record things set by a CLOBBER.  */
2552
2553 static void
2554 mark_clobber (pat, insn)
2555      rtx pat, insn;
2556 {
2557   rtx clob = XEXP (pat, 0);
2558
2559   while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2560     clob = XEXP (clob, 0);
2561
2562   if (GET_CODE (clob) == REG)
2563     SET_BIT (reg_set_bitmap, REGNO (clob));
2564   else
2565     mem_last_set = INSN_CUID (insn);
2566 }
2567
2568 /* Record things set by INSN.
2569    This data is used by oprs_not_set_p.  */
2570
2571 static void
2572 mark_oprs_set (insn)
2573      rtx insn;
2574 {
2575   rtx pat = PATTERN (insn);
2576
2577   if (GET_CODE (pat) == SET)
2578     mark_set (pat, insn);
2579   else if (GET_CODE (pat) == PARALLEL)
2580     {
2581       int i;
2582
2583       for (i = 0; i < XVECLEN (pat, 0); i++)
2584         {
2585           rtx x = XVECEXP (pat, 0, i);
2586
2587           if (GET_CODE (x) == SET)
2588             mark_set (x, insn);
2589           else if (GET_CODE (x) == CLOBBER)
2590             mark_clobber (x, insn);
2591           else if (GET_CODE (x) == CALL)
2592             mark_call (insn);
2593         }
2594     }
2595   else if (GET_CODE (pat) == CLOBBER)
2596     mark_clobber (pat, insn);
2597   else if (GET_CODE (pat) == CALL)
2598     mark_call (insn);
2599 }
2600
2601 \f
2602 /* Classic GCSE reaching definition support.  */
2603
2604 /* Allocate reaching def variables.  */
2605
2606 static void
2607 alloc_rd_mem (n_blocks, n_insns)
2608      int n_blocks, n_insns;
2609 {
2610   rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2611   sbitmap_vector_zero (rd_kill, n_basic_blocks);
2612
2613   rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2614   sbitmap_vector_zero (rd_gen, n_basic_blocks);
2615
2616   reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2617   sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2618
2619   rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2620   sbitmap_vector_zero (rd_out, n_basic_blocks);
2621 }
2622
2623 /* Free reaching def variables.  */
2624
2625 static void
2626 free_rd_mem ()
2627 {
2628   free (rd_kill);
2629   free (rd_gen);
2630   free (reaching_defs);
2631   free (rd_out);
2632 }
2633
2634 /* Add INSN to the kills of BB.
2635    REGNO, set in BB, is killed by INSN.  */
2636
2637 static void
2638 handle_rd_kill_set (insn, regno, bb)
2639      rtx insn;
2640      int regno, bb;
2641 {
2642   struct reg_set *this_reg = reg_set_table[regno];
2643
2644   while (this_reg)
2645     {
2646       if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2647         SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2648       this_reg = this_reg->next;
2649     }
2650 }
2651
2652 /* Compute the set of kill's for reaching definitions.  */
2653
2654 static void
2655 compute_kill_rd ()
2656 {
2657   int bb,cuid;
2658
2659   /* For each block
2660        For each set bit in `gen' of the block (i.e each insn which
2661            generates a definition in the block)
2662          Call the reg set by the insn corresponding to that bit regx
2663          Look at the linked list starting at reg_set_table[regx]
2664          For each setting of regx in the linked list, which is not in
2665              this block
2666            Set the bit in `kill' corresponding to that insn
2667     */
2668
2669   for (bb = 0; bb < n_basic_blocks; bb++)
2670     {
2671       for (cuid = 0; cuid < max_cuid; cuid++)
2672         {
2673           if (TEST_BIT (rd_gen[bb], cuid))
2674             {
2675               rtx insn = CUID_INSN (cuid);
2676               rtx pat = PATTERN (insn);
2677
2678               if (GET_CODE (insn) == CALL_INSN)
2679                 {
2680                   int regno;
2681
2682                   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2683                     {
2684                       if ((call_used_regs[regno]
2685                            && regno != STACK_POINTER_REGNUM
2686 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2687                            && regno != HARD_FRAME_POINTER_REGNUM
2688 #endif
2689 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2690                            && ! (regno == ARG_POINTER_REGNUM
2691                                  && fixed_regs[regno])
2692 #endif
2693 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2694                            && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2695 #endif
2696                            && regno != FRAME_POINTER_REGNUM)
2697                           || global_regs[regno])
2698                         handle_rd_kill_set (insn, regno, bb);
2699                     }
2700                 }
2701
2702               if (GET_CODE (pat) == PARALLEL)
2703                 {
2704                   int i;
2705
2706                   /* We work backwards because ... */
2707                   for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2708                     {
2709                       enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2710                       if ((code == SET || code == CLOBBER)
2711                           && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2712                         handle_rd_kill_set (insn,
2713                                             REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2714                                             bb);
2715                     }
2716                 }
2717               else if (GET_CODE (pat) == SET)
2718                 {
2719                   if (GET_CODE (SET_DEST (pat)) == REG)
2720                     {
2721                       /* Each setting of this register outside of this block
2722                          must be marked in the set of kills in this block.  */
2723                       handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2724                     }
2725                 }
2726               /* FIXME: CLOBBER? */
2727             }
2728         }
2729     }
2730 }
2731
2732 /* Compute the reaching definitions as in
2733    Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2734    Chapter 10.  It is the same algorithm as used for computing available
2735    expressions but applied to the gens and kills of reaching definitions.  */
2736
2737 static void
2738 compute_rd ()
2739 {
2740   int bb, changed, passes;
2741
2742   for (bb = 0; bb < n_basic_blocks; bb++)
2743     sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2744
2745   passes = 0;
2746   changed = 1;
2747   while (changed)
2748     {
2749       changed = 0;
2750       for (bb = 0; bb < n_basic_blocks; bb++)
2751         {
2752           sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
2753           changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2754                                             reaching_defs[bb], rd_kill[bb]);
2755         }
2756       passes++;
2757     }
2758
2759   if (gcse_file)
2760     fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2761 }
2762 \f
2763 /* Classic GCSE available expression support.  */
2764
2765 /* Allocate memory for available expression computation.  */
2766
2767 static void
2768 alloc_avail_expr_mem (n_blocks, n_exprs)
2769      int n_blocks, n_exprs;
2770 {
2771   ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2772   sbitmap_vector_zero (ae_kill, n_basic_blocks);
2773
2774   ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2775   sbitmap_vector_zero (ae_gen, n_basic_blocks);
2776
2777   ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2778   sbitmap_vector_zero (ae_in, n_basic_blocks);
2779
2780   ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2781   sbitmap_vector_zero (ae_out, n_basic_blocks);
2782
2783   u_bitmap = (sbitmap) sbitmap_alloc (n_exprs);
2784   sbitmap_ones (u_bitmap);
2785 }
2786
2787 static void
2788 free_avail_expr_mem ()
2789 {
2790   free (ae_kill);
2791   free (ae_gen);
2792   free (ae_in);
2793   free (ae_out);
2794   free (u_bitmap);
2795 }
2796
2797 /* Compute the set of available expressions generated in each basic block.  */
2798
2799 static void
2800 compute_ae_gen ()
2801 {
2802   int i;
2803
2804   /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2805      This is all we have to do because an expression is not recorded if it
2806      is not available, and the only expressions we want to work with are the
2807      ones that are recorded.  */
2808
2809   for (i = 0; i < expr_hash_table_size; i++)
2810     {
2811       struct expr *expr = expr_hash_table[i];
2812       while (expr != NULL)
2813         {
2814           struct occr *occr = expr->avail_occr;
2815           while (occr != NULL)
2816             {
2817               SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2818               occr = occr->next;
2819             }
2820           expr = expr->next_same_hash;
2821         }
2822     }
2823 }
2824
2825 /* Return non-zero if expression X is killed in BB.  */
2826
2827 static int
2828 expr_killed_p (x, bb)
2829      rtx x;
2830      int bb;
2831 {
2832   int i;
2833   enum rtx_code code;
2834   const char *fmt;
2835
2836   /* repeat is used to turn tail-recursion into iteration.  */
2837  repeat:
2838
2839   if (x == 0)
2840     return 1;
2841
2842   code = GET_CODE (x);
2843   switch (code)
2844     {
2845     case REG:
2846       return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2847
2848     case MEM:
2849       if (mem_set_in_block[bb])
2850         return 1;
2851       x = XEXP (x, 0);
2852       goto repeat;
2853
2854     case PC:
2855     case CC0: /*FIXME*/
2856     case CONST:
2857     case CONST_INT:
2858     case CONST_DOUBLE:
2859     case SYMBOL_REF:
2860     case LABEL_REF:
2861     case ADDR_VEC:
2862     case ADDR_DIFF_VEC:
2863       return 0;
2864
2865     default:
2866       break;
2867     }
2868
2869   i = GET_RTX_LENGTH (code) - 1;
2870   fmt = GET_RTX_FORMAT (code);
2871   for (; i >= 0; i--)
2872     {
2873       if (fmt[i] == 'e')
2874         {
2875           rtx tem = XEXP (x, i);
2876
2877           /* If we are about to do the last recursive call
2878              needed at this level, change it into iteration.
2879              This function is called enough to be worth it.  */
2880           if (i == 0)
2881             {
2882               x = tem;
2883               goto repeat;
2884             }
2885           if (expr_killed_p (tem, bb))
2886             return 1;
2887         }
2888       else if (fmt[i] == 'E')
2889         {
2890           int j;
2891           for (j = 0; j < XVECLEN (x, i); j++)
2892             {
2893               if (expr_killed_p (XVECEXP (x, i, j), bb))
2894                 return 1;
2895             }
2896         }
2897     }
2898
2899   return 0;
2900 }
2901
2902 /* Compute the set of available expressions killed in each basic block.  */
2903
2904 static void
2905 compute_ae_kill (ae_gen, ae_kill)
2906      sbitmap *ae_gen, *ae_kill;
2907 {
2908   int bb,i;
2909
2910   for (bb = 0; bb < n_basic_blocks; bb++)
2911     {
2912       for (i = 0; i < expr_hash_table_size; i++)
2913         {
2914           struct expr *expr = expr_hash_table[i];
2915
2916           for ( ; expr != NULL; expr = expr->next_same_hash)
2917             {
2918               /* Skip EXPR if generated in this block.  */
2919               if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2920                 continue;
2921
2922               if (expr_killed_p (expr->expr, bb))
2923                 SET_BIT (ae_kill[bb], expr->bitmap_index);
2924             }
2925         }
2926     }
2927 }
2928 \f
2929 /* Actually perform the Classic GCSE optimizations.  */
2930
2931 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2932
2933    CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2934    as a positive reach.  We want to do this when there are two computations
2935    of the expression in the block.
2936
2937    VISITED is a pointer to a working buffer for tracking which BB's have
2938    been visited.  It is NULL for the top-level call.
2939
2940    We treat reaching expressions that go through blocks containing the same
2941    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2942    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2943    2 as not reaching.  The intent is to improve the probability of finding
2944    only one reaching expression and to reduce register lifetimes by picking
2945    the closest such expression.  */
2946
2947 static int
2948 expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
2949      struct occr *occr;
2950      struct expr *expr;
2951      int bb;
2952      int check_self_loop;
2953      char *visited;
2954 {
2955   edge pred;
2956
2957   for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
2958     {
2959       int pred_bb = pred->src->index;
2960
2961       if (visited[pred_bb])
2962         {
2963           /* This predecessor has already been visited.
2964              Nothing to do.  */
2965           ;
2966         }
2967       else if (pred_bb == bb)
2968         {
2969           /* BB loops on itself.  */
2970           if (check_self_loop
2971               && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2972               && BLOCK_NUM (occr->insn) == pred_bb)
2973             return 1;
2974           visited[pred_bb] = 1;
2975         }
2976       /* Ignore this predecessor if it kills the expression.  */
2977       else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2978         visited[pred_bb] = 1;
2979       /* Does this predecessor generate this expression?  */
2980       else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2981         {
2982           /* Is this the occurrence we're looking for?
2983              Note that there's only one generating occurrence per block
2984              so we just need to check the block number.  */
2985           if (BLOCK_NUM (occr->insn) == pred_bb)
2986             return 1;
2987           visited[pred_bb] = 1;
2988         }
2989       /* Neither gen nor kill.  */
2990       else
2991         {
2992           visited[pred_bb] = 1;
2993           if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
2994               visited))
2995             return 1;
2996         }
2997     }
2998
2999   /* All paths have been checked.  */
3000   return 0;
3001 }
3002
3003 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
3004    memory allocated for that function is returned. */
3005
3006 static int
3007 expr_reaches_here_p (occr, expr, bb, check_self_loop)
3008      struct occr *occr;
3009      struct expr *expr;
3010      int bb;
3011      int check_self_loop;
3012 {
3013   int rval;
3014   char * visited = (char *) xcalloc (n_basic_blocks, 1);
3015
3016   rval = expr_reaches_here_p_work(occr, expr, bb, check_self_loop, visited);
3017
3018   free (visited);
3019
3020   return (rval);
3021 }
3022
3023 /* Return the instruction that computes EXPR that reaches INSN's basic block.
3024    If there is more than one such instruction, return NULL.
3025
3026    Called only by handle_avail_expr.  */
3027
3028 static rtx
3029 computing_insn (expr, insn)
3030      struct expr *expr;
3031      rtx insn;
3032 {
3033   int bb = BLOCK_NUM (insn);
3034
3035   if (expr->avail_occr->next == NULL)
3036     {
3037       if (BLOCK_NUM (expr->avail_occr->insn) == bb)
3038         {
3039           /* The available expression is actually itself
3040              (i.e. a loop in the flow graph) so do nothing.  */
3041           return NULL;
3042         }
3043       /* (FIXME) Case that we found a pattern that was created by
3044          a substitution that took place.  */
3045       return expr->avail_occr->insn;
3046     }
3047   else
3048     {
3049       /* Pattern is computed more than once.
3050          Search backwards from this insn to see how many of these
3051          computations actually reach this insn.  */
3052       struct occr *occr;
3053       rtx insn_computes_expr = NULL;
3054       int can_reach = 0;
3055
3056       for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
3057         {
3058           if (BLOCK_NUM (occr->insn) == bb)
3059             {
3060               /* The expression is generated in this block.
3061                  The only time we care about this is when the expression
3062                  is generated later in the block [and thus there's a loop].
3063                  We let the normal cse pass handle the other cases.  */
3064               if (INSN_CUID (insn) < INSN_CUID (occr->insn))
3065                 {
3066                   if (expr_reaches_here_p (occr, expr, bb, 1))
3067                     {
3068                       can_reach++;
3069                       if (can_reach > 1)
3070                         return NULL;
3071                       insn_computes_expr = occr->insn;
3072                     }
3073                 }
3074             }
3075           else /* Computation of the pattern outside this block.  */
3076             {
3077               if (expr_reaches_here_p (occr, expr, bb, 0))
3078                 {
3079                   can_reach++;
3080                   if (can_reach > 1)
3081                     return NULL;
3082                   insn_computes_expr = occr->insn;
3083                 }
3084             }
3085         }
3086
3087       if (insn_computes_expr == NULL)
3088         abort ();
3089       return insn_computes_expr;
3090     }
3091 }
3092
3093 /* Return non-zero if the definition in DEF_INSN can reach INSN.
3094    Only called by can_disregard_other_sets.  */
3095
3096 static int
3097 def_reaches_here_p (insn, def_insn)
3098      rtx insn, def_insn;
3099 {
3100   rtx reg;
3101
3102   if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
3103     return 1;
3104
3105   if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
3106     {
3107       if (INSN_CUID (def_insn) < INSN_CUID (insn))
3108         {
3109           if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
3110             return 1;
3111           if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
3112             reg = XEXP (PATTERN (def_insn), 0);
3113           else if (GET_CODE (PATTERN (def_insn)) == SET)
3114             reg = SET_DEST (PATTERN (def_insn));
3115           else
3116             abort ();
3117           return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
3118         }
3119       else
3120         return 0;
3121     }
3122
3123   return 0;
3124 }
3125
3126 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN.
3127    The value returned is the number of definitions that reach INSN.
3128    Returning a value of zero means that [maybe] more than one definition
3129    reaches INSN and the caller can't perform whatever optimization it is
3130    trying.  i.e. it is always safe to return zero.  */
3131
3132 static int
3133 can_disregard_other_sets (addr_this_reg, insn, for_combine)
3134      struct reg_set **addr_this_reg;
3135      rtx insn;
3136      int for_combine;
3137 {
3138   int number_of_reaching_defs = 0;
3139   struct reg_set *this_reg = *addr_this_reg;
3140
3141   while (this_reg)
3142     {
3143       if (def_reaches_here_p (insn, this_reg->insn))
3144         {
3145           number_of_reaching_defs++;
3146           /* Ignore parallels for now.  */
3147           if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3148             return 0;
3149           if (!for_combine
3150               && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3151                   || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3152                                     SET_SRC (PATTERN (insn)))))
3153             {
3154               /* A setting of the reg to a different value reaches INSN.  */
3155               return 0;
3156             }
3157           if (number_of_reaching_defs > 1)
3158             {
3159               /* If in this setting the value the register is being
3160                  set to is equal to the previous value the register
3161                  was set to and this setting reaches the insn we are
3162                  trying to do the substitution on then we are ok.  */
3163
3164               if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3165                 return 0;
3166               if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3167                                  SET_SRC (PATTERN (insn))))
3168                 return 0;
3169             }
3170           *addr_this_reg = this_reg;
3171         }
3172
3173       /* prev_this_reg = this_reg; */
3174       this_reg = this_reg->next;
3175     }
3176
3177   return number_of_reaching_defs;
3178 }
3179
3180 /* Expression computed by insn is available and the substitution is legal,
3181    so try to perform the substitution.
3182
3183    The result is non-zero if any changes were made.  */
3184
3185 static int
3186 handle_avail_expr (insn, expr)
3187      rtx insn;
3188      struct expr *expr;
3189 {
3190   rtx pat, insn_computes_expr;
3191   rtx to;
3192   struct reg_set *this_reg;
3193   int found_setting, use_src;
3194   int changed = 0;
3195
3196   /* We only handle the case where one computation of the expression
3197      reaches this instruction.  */
3198   insn_computes_expr = computing_insn (expr, insn);
3199   if (insn_computes_expr == NULL)
3200     return 0;
3201
3202   found_setting = 0;
3203   use_src = 0;
3204
3205   /* At this point we know only one computation of EXPR outside of this
3206      block reaches this insn.  Now try to find a register that the
3207      expression is computed into.  */
3208
3209   if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3210     {
3211       /* This is the case when the available expression that reaches
3212          here has already been handled as an available expression.  */
3213       int regnum_for_replacing = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3214       /* If the register was created by GCSE we can't use `reg_set_table',
3215          however we know it's set only once.  */
3216       if (regnum_for_replacing >= max_gcse_regno
3217           /* If the register the expression is computed into is set only once,
3218              or only one set reaches this insn, we can use it.  */
3219           || (((this_reg = reg_set_table[regnum_for_replacing]),
3220                this_reg->next == NULL)
3221               || can_disregard_other_sets (&this_reg, insn, 0)))
3222        {
3223          use_src = 1;
3224          found_setting = 1;
3225        }
3226     }
3227
3228   if (!found_setting)
3229     {
3230       int regnum_for_replacing = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3231       /* This shouldn't happen.  */
3232       if (regnum_for_replacing >= max_gcse_regno)
3233         abort ();
3234       this_reg = reg_set_table[regnum_for_replacing];
3235       /* If the register the expression is computed into is set only once,
3236          or only one set reaches this insn, use it.  */
3237       if (this_reg->next == NULL
3238           || can_disregard_other_sets (&this_reg, insn, 0))
3239         found_setting = 1;
3240     }
3241
3242   if (found_setting)
3243     {
3244       pat = PATTERN (insn);
3245       if (use_src)
3246         to = SET_SRC (PATTERN (insn_computes_expr));
3247       else
3248         to = SET_DEST (PATTERN (insn_computes_expr));
3249       changed = validate_change (insn, &SET_SRC (pat), to, 0);
3250
3251       /* We should be able to ignore the return code from validate_change but
3252          to play it safe we check.  */
3253       if (changed)
3254         {
3255           gcse_subst_count++;
3256           if (gcse_file != NULL)
3257             {
3258               fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d %s insn %d\n",
3259                        INSN_UID (insn), REGNO (to),
3260                        use_src ? "from" : "set in",
3261                        INSN_UID (insn_computes_expr));
3262             }
3263
3264         }
3265     }
3266   /* The register that the expr is computed into is set more than once.  */
3267   else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3268     {
3269       /* Insert an insn after insnx that copies the reg set in insnx
3270          into a new pseudo register call this new register REGN.
3271          From insnb until end of basic block or until REGB is set
3272          replace all uses of REGB with REGN.  */
3273       rtx new_insn;
3274
3275       to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3276
3277       /* Generate the new insn.  */
3278       /* ??? If the change fails, we return 0, even though we created
3279          an insn.  I think this is ok.  */
3280       new_insn
3281         = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3282                                         SET_DEST (PATTERN (insn_computes_expr))),
3283                                   insn_computes_expr);
3284       /* Keep block number table up to date.  */
3285       set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3286       /* Keep register set table up to date.  */
3287       record_one_set (REGNO (to), new_insn);
3288
3289       gcse_create_count++;
3290       if (gcse_file != NULL)
3291         {
3292           fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d, computed in insn %d,\n",
3293                    INSN_UID (NEXT_INSN (insn_computes_expr)),
3294                    REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))),
3295                    INSN_UID (insn_computes_expr));
3296           fprintf (gcse_file, "      into newly allocated reg %d\n", REGNO (to));
3297         }
3298
3299       pat = PATTERN (insn);
3300
3301       /* Do register replacement for INSN.  */
3302       changed = validate_change (insn, &SET_SRC (pat),
3303                                  SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr))),
3304                                  0);
3305
3306       /* We should be able to ignore the return code from validate_change but
3307          to play it safe we check.  */
3308       if (changed)
3309         {
3310           gcse_subst_count++;
3311           if (gcse_file != NULL)
3312             {
3313               fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d set in insn %d\n",
3314                        INSN_UID (insn),
3315                        REGNO (SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr)))),
3316                        INSN_UID (insn_computes_expr));
3317             }
3318
3319         }
3320     }
3321
3322   return changed;
3323 }
3324
3325 /* Perform classic GCSE.
3326    This is called by one_classic_gcse_pass after all the dataflow analysis
3327    has been done.
3328
3329    The result is non-zero if a change was made.  */
3330
3331 static int
3332 classic_gcse ()
3333 {
3334   int bb, changed;
3335   rtx insn;
3336
3337   /* Note we start at block 1.  */
3338
3339   changed = 0;
3340   for (bb = 1; bb < n_basic_blocks; bb++)
3341     {
3342       /* Reset tables used to keep track of what's still valid [since the
3343          start of the block].  */
3344       reset_opr_set_tables ();
3345
3346       for (insn = BLOCK_HEAD (bb);
3347            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3348            insn = NEXT_INSN (insn))
3349         {
3350           /* Is insn of form (set (pseudo-reg) ...)?  */
3351
3352           if (GET_CODE (insn) == INSN
3353               && GET_CODE (PATTERN (insn)) == SET
3354               && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3355               && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3356             {
3357               rtx pat = PATTERN (insn);
3358               rtx src = SET_SRC (pat);
3359               struct expr *expr;
3360
3361               if (want_to_gcse_p (src)
3362                   /* Is the expression recorded?  */
3363                   && ((expr = lookup_expr (src)) != NULL)
3364                   /* Is the expression available [at the start of the
3365                      block]?  */
3366                   && TEST_BIT (ae_in[bb], expr->bitmap_index)
3367                   /* Are the operands unchanged since the start of the
3368                      block?  */
3369                   && oprs_not_set_p (src, insn))
3370                 changed |= handle_avail_expr (insn, expr);
3371             }
3372
3373           /* Keep track of everything modified by this insn.  */
3374           /* ??? Need to be careful w.r.t. mods done to INSN.  */
3375           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3376             mark_oprs_set (insn);
3377         }
3378     }
3379
3380   return changed;
3381 }
3382
3383 /* Top level routine to perform one classic GCSE pass.
3384
3385    Return non-zero if a change was made.  */
3386
3387 static int
3388 one_classic_gcse_pass (pass)
3389      int pass;
3390 {
3391   int changed = 0;
3392
3393   gcse_subst_count = 0;
3394   gcse_create_count = 0;
3395
3396   alloc_expr_hash_table (max_cuid);
3397   alloc_rd_mem (n_basic_blocks, max_cuid);
3398   compute_expr_hash_table ();
3399   if (gcse_file)
3400     dump_hash_table (gcse_file, "Expression", expr_hash_table,
3401                      expr_hash_table_size, n_exprs);
3402   if (n_exprs > 0)
3403     {
3404       compute_kill_rd ();
3405       compute_rd ();
3406       alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3407       compute_ae_gen ();
3408       compute_ae_kill (ae_gen, ae_kill);
3409       compute_available (ae_gen, ae_kill, ae_out, ae_in);
3410       changed = classic_gcse ();
3411       free_avail_expr_mem ();
3412     }
3413   free_rd_mem ();
3414   free_expr_hash_table ();
3415
3416   if (gcse_file)
3417     {
3418       fprintf (gcse_file, "\n");
3419       fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
3420                current_function_name, pass,
3421                bytes_used, gcse_subst_count, gcse_create_count);
3422     }
3423
3424   return changed;
3425 }
3426 \f
3427 /* Compute copy/constant propagation working variables.  */
3428
3429 /* Local properties of assignments.  */
3430
3431 static sbitmap *cprop_pavloc;
3432 static sbitmap *cprop_absaltered;
3433
3434 /* Global properties of assignments (computed from the local properties).  */
3435
3436 static sbitmap *cprop_avin;
3437 static sbitmap *cprop_avout;
3438
3439 /* Allocate vars used for copy/const propagation.
3440    N_BLOCKS is the number of basic blocks.
3441    N_SETS is the number of sets.  */
3442
3443 static void
3444 alloc_cprop_mem (n_blocks, n_sets)
3445      int n_blocks, n_sets;
3446 {
3447   cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3448   cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3449
3450   cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3451   cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3452 }
3453
3454 /* Free vars used by copy/const propagation.  */
3455
3456 static void
3457 free_cprop_mem ()
3458 {
3459   free (cprop_pavloc);
3460   free (cprop_absaltered);
3461   free (cprop_avin);
3462   free (cprop_avout);
3463 }
3464
3465 /* For each block, compute whether X is transparent.
3466    X is either an expression or an assignment [though we don't care which,
3467    for this context an assignment is treated as an expression].
3468    For each block where an element of X is modified, set (SET_P == 1) or reset
3469    (SET_P == 0) the INDX bit in BMAP.  */
3470
3471 static void
3472 compute_transp (x, indx, bmap, set_p)
3473      rtx x;
3474      int indx;
3475      sbitmap *bmap;
3476      int set_p;
3477 {
3478   int bb,i;
3479   enum rtx_code code;
3480   const char *fmt;
3481
3482   /* repeat is used to turn tail-recursion into iteration.  */
3483  repeat:
3484
3485   if (x == 0)
3486     return;
3487
3488   code = GET_CODE (x);
3489   switch (code)
3490     {
3491     case REG:
3492       {
3493         reg_set *r;
3494         int regno = REGNO (x);
3495
3496         if (set_p)
3497           {
3498             if (regno < FIRST_PSEUDO_REGISTER)
3499               {
3500                 for (bb = 0; bb < n_basic_blocks; bb++)
3501                   if (TEST_BIT (reg_set_in_block[bb], regno))
3502                     SET_BIT (bmap[bb], indx);
3503               }
3504             else
3505               {
3506                 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3507                   {
3508                     bb = BLOCK_NUM (r->insn);
3509                     SET_BIT (bmap[bb], indx);
3510                   }
3511               }
3512           }
3513         else
3514           {
3515             if (regno < FIRST_PSEUDO_REGISTER)
3516               {
3517                 for (bb = 0; bb < n_basic_blocks; bb++)
3518                   if (TEST_BIT (reg_set_in_block[bb], regno))
3519                     RESET_BIT (bmap[bb], indx);
3520               }
3521             else
3522               {
3523                 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3524                   {
3525                     bb = BLOCK_NUM (r->insn);
3526                     RESET_BIT (bmap[bb], indx);
3527                   }
3528               }
3529           }
3530         return;
3531       }
3532
3533     case MEM:
3534       if (set_p)
3535         {
3536           for (bb = 0; bb < n_basic_blocks; bb++)
3537             if (mem_set_in_block[bb])
3538               SET_BIT (bmap[bb], indx);
3539         }
3540       else
3541         {
3542           for (bb = 0; bb < n_basic_blocks; bb++)
3543             if (mem_set_in_block[bb])
3544               RESET_BIT (bmap[bb], indx);
3545         }
3546       x = XEXP (x, 0);
3547       goto repeat;
3548
3549     case PC:
3550     case CC0: /*FIXME*/
3551     case CONST:
3552     case CONST_INT:
3553     case CONST_DOUBLE:
3554     case SYMBOL_REF:
3555     case LABEL_REF:
3556     case ADDR_VEC:
3557     case ADDR_DIFF_VEC:
3558       return;
3559
3560     default:
3561       break;
3562     }
3563
3564   i = GET_RTX_LENGTH (code) - 1;
3565   fmt = GET_RTX_FORMAT (code);
3566   for (; i >= 0; i--)
3567     {
3568       if (fmt[i] == 'e')
3569         {
3570           rtx tem = XEXP (x, i);
3571
3572           /* If we are about to do the last recursive call
3573              needed at this level, change it into iteration.
3574              This function is called enough to be worth it.  */
3575           if (i == 0)
3576             {
3577               x = tem;
3578               goto repeat;
3579             }
3580           compute_transp (tem, indx, bmap, set_p);
3581         }
3582       else if (fmt[i] == 'E')
3583         {
3584           int j;
3585           for (j = 0; j < XVECLEN (x, i); j++)
3586             compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3587         }
3588     }
3589 }
3590
3591 /* Top level routine to do the dataflow analysis needed by copy/const
3592    propagation.  */
3593
3594 static void
3595 compute_cprop_data ()
3596 {
3597   compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
3598   compute_available (cprop_pavloc, cprop_absaltered,
3599                      cprop_avout, cprop_avin);
3600 }
3601 \f
3602 /* Copy/constant propagation.  */
3603
3604 /* Maximum number of register uses in an insn that we handle.  */
3605 #define MAX_USES 8
3606
3607 /* Table of uses found in an insn.
3608    Allocated statically to avoid alloc/free complexity and overhead.  */
3609 static struct reg_use reg_use_table[MAX_USES];
3610
3611 /* Index into `reg_use_table' while building it.  */
3612 static int reg_use_count;
3613
3614 /* Set up a list of register numbers used in INSN.
3615    The found uses are stored in `reg_use_table'.
3616    `reg_use_count' is initialized to zero before entry, and
3617    contains the number of uses in the table upon exit.
3618
3619    ??? If a register appears multiple times we will record it multiple
3620    times.  This doesn't hurt anything but it will slow things down.  */
3621
3622 static void
3623 find_used_regs (x)
3624      rtx x;
3625 {
3626   int i;
3627   enum rtx_code code;
3628   const char *fmt;
3629
3630   /* repeat is used to turn tail-recursion into iteration.  */
3631  repeat:
3632
3633   if (x == 0)
3634     return;
3635
3636   code = GET_CODE (x);
3637   switch (code)
3638     {
3639     case REG:
3640       if (reg_use_count == MAX_USES)
3641         return;
3642       reg_use_table[reg_use_count].reg_rtx = x;
3643       reg_use_count++;
3644       return;
3645
3646     case MEM:
3647       x = XEXP (x, 0);
3648       goto repeat;
3649
3650     case PC:
3651     case CC0:
3652     case CONST:
3653     case CONST_INT:
3654     case CONST_DOUBLE:
3655     case SYMBOL_REF:
3656     case LABEL_REF:
3657     case CLOBBER:
3658     case ADDR_VEC:
3659     case ADDR_DIFF_VEC:
3660     case ASM_INPUT: /*FIXME*/
3661       return;
3662
3663     case SET:
3664       if (GET_CODE (SET_DEST (x)) == MEM)
3665         find_used_regs (SET_DEST (x));
3666       x = SET_SRC (x);
3667       goto repeat;
3668
3669     default:
3670       break;
3671     }
3672
3673   /* Recursively scan the operands of this expression.  */
3674
3675   fmt = GET_RTX_FORMAT (code);
3676   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3677     {
3678       if (fmt[i] == 'e')
3679         {
3680           /* If we are about to do the last recursive call
3681              needed at this level, change it into iteration.
3682              This function is called enough to be worth it.  */
3683           if (i == 0)
3684             {
3685               x = XEXP (x, 0);
3686               goto repeat;
3687             }
3688           find_used_regs (XEXP (x, i));
3689         }
3690       else if (fmt[i] == 'E')
3691         {
3692           int j;
3693           for (j = 0; j < XVECLEN (x, i); j++)
3694             find_used_regs (XVECEXP (x, i, j));
3695         }
3696     }
3697 }
3698
3699 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3700    Returns non-zero is successful.  */
3701
3702 static int
3703 try_replace_reg (from, to, insn)
3704      rtx from, to, insn;
3705 {
3706   rtx note;
3707   rtx src;
3708   int success;
3709   rtx set;
3710
3711   note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3712
3713   if (!note)
3714     note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3715
3716   /* If this fails we could try to simplify the result of the
3717      replacement and attempt to recognize the simplified insn.
3718
3719      But we need a general simplify_rtx that doesn't have pass
3720      specific state variables.  I'm not aware of one at the moment.  */
3721
3722
3723   success = validate_replace_src (from, to, insn);
3724   set = single_set (insn);
3725
3726   /* We've failed to do replacement. Try to add REG_EQUAL note to not loose
3727      information.  */
3728   if (!success && !note)
3729     {
3730       if (!set)
3731         return 0;
3732       note = REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL,
3733                                                    copy_rtx (SET_SRC (set)),
3734                                                    REG_NOTES (insn));
3735     }
3736
3737   /* Always do the replacement in REQ_EQUAL and REG_EQUIV notes.  Also
3738      try to simplify them.  */
3739   if (note)
3740     {
3741       rtx simplified;
3742       src = XEXP (note, 0);
3743       replace_rtx (src, from, to);
3744
3745       /* Try to simplify resulting note. */
3746       simplified = simplify_rtx (src);
3747       if (simplified)
3748         {
3749           src = simplified;
3750           XEXP (note, 0) = src;
3751         }
3752
3753       /* REG_EQUAL may get simplified into register.
3754          We don't allow that. Remove that note. This code ought
3755          not to hapen, because previous code ought to syntetize
3756          reg-reg move, but be on the safe side.  */
3757       else if (REG_P (src))
3758         remove_note (insn, note);
3759     }
3760   return success;
3761 }
3762 /* Find a set of REGNO that is available on entry to INSN's block.
3763    Returns NULL if not found.  */
3764
3765 static struct expr *
3766 find_avail_set (regno, insn)
3767      int regno;
3768      rtx insn;
3769 {
3770   /* SET1 contains the last set found that can be returned to the caller for
3771      use in a substitution.  */
3772   struct expr *set1 = 0;
3773
3774   /* Loops are not possible here.  To get a loop we would need two sets
3775      available at the start of the block containing INSN.  ie we would
3776      need two sets like this available at the start of the block:
3777
3778        (set (reg X) (reg Y))
3779        (set (reg Y) (reg X))
3780
3781      This can not happen since the set of (reg Y) would have killed the
3782      set of (reg X) making it unavailable at the start of this block.  */
3783   while (1)
3784      {
3785       rtx src;
3786       struct expr *set = lookup_set (regno, NULL_RTX);
3787
3788       /* Find a set that is available at the start of the block
3789          which contains INSN.  */
3790       while (set)
3791         {
3792           if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3793             break;
3794           set = next_set (regno, set);
3795         }
3796
3797       /* If no available set was found we've reached the end of the
3798          (possibly empty) copy chain.  */
3799       if (set == 0)
3800         break;
3801
3802       if (GET_CODE (set->expr) != SET)
3803         abort ();
3804
3805       src = SET_SRC (set->expr);
3806
3807       /* We know the set is available.
3808          Now check that SRC is ANTLOC (i.e. none of the source operands
3809          have changed since the start of the block).
3810
3811          If the source operand changed, we may still use it for the next
3812          iteration of this loop, but we may not use it for substitutions.  */
3813       if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3814         set1 = set;
3815
3816       /* If the source of the set is anything except a register, then
3817          we have reached the end of the copy chain.  */
3818       if (GET_CODE (src) != REG)
3819         break;
3820
3821       /* Follow the copy chain, ie start another iteration of the loop
3822          and see if we have an available copy into SRC.  */
3823       regno = REGNO (src);
3824      }
3825
3826   /* SET1 holds the last set that was available and anticipatable at
3827      INSN.  */
3828   return set1;
3829 }
3830
3831 /* Subroutine of cprop_insn that tries to propagate constants into
3832    JUMP_INSNS.  INSN must be a conditional jump; COPY is a copy of it
3833    that we can use for substitutions.
3834    REG_USED is the use we will try to replace, SRC is the constant we
3835    will try to substitute for it.
3836    Returns nonzero if a change was made.  */
3837 static int
3838 cprop_jump (insn, copy, reg_used, src)
3839      rtx insn, copy;
3840      struct reg_use *reg_used;
3841      rtx src;
3842 {
3843   rtx set = PATTERN (copy);
3844   rtx temp;
3845
3846   /* Replace the register with the appropriate constant.  */
3847   replace_rtx (SET_SRC (set), reg_used->reg_rtx, src);
3848
3849   temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)),
3850                                      GET_MODE (SET_SRC (set)),
3851                                      GET_MODE (XEXP (SET_SRC (set), 0)),
3852                                      XEXP (SET_SRC (set), 0),
3853                                      XEXP (SET_SRC (set), 1),
3854                                      XEXP (SET_SRC (set), 2));
3855
3856   /* If no simplification can be made, then try the next
3857      register.  */
3858   if (temp == 0)
3859     return 0;
3860
3861   SET_SRC (set) = temp;
3862
3863   /* That may have changed the structure of TEMP, so
3864      force it to be rerecognized if it has not turned
3865      into a nop or unconditional jump.  */
3866
3867   INSN_CODE (copy) = -1;
3868   if ((SET_DEST (set) == pc_rtx
3869        && (SET_SRC (set) == pc_rtx
3870            || GET_CODE (SET_SRC (set)) == LABEL_REF))
3871       || recog (PATTERN (copy), copy, NULL) >= 0)
3872     {
3873       /* This has either become an unconditional jump
3874          or a nop-jump.  We'd like to delete nop jumps
3875          here, but doing so confuses gcse.  So we just
3876          make the replacement and let later passes
3877          sort things out.  */
3878       PATTERN (insn) = set;
3879       INSN_CODE (insn) = -1;
3880
3881       /* One less use of the label this insn used to jump to
3882          if we turned this into a NOP jump.  */
3883       if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0)
3884         --LABEL_NUSES (JUMP_LABEL (insn));
3885
3886       /* If this has turned into an unconditional jump,
3887          then put a barrier after it so that the unreachable
3888          code will be deleted.  */
3889       if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3890         emit_barrier_after (insn);
3891
3892       run_jump_opt_after_gcse = 1;
3893
3894       const_prop_count++;
3895       if (gcse_file != NULL)
3896         {
3897           int regno = REGNO (reg_used->reg_rtx);
3898           fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ",
3899                    regno, INSN_UID (insn));
3900           print_rtl (gcse_file, src);
3901           fprintf (gcse_file, "\n");
3902         }
3903       return 1;
3904     }
3905   return 0;
3906 }
3907
3908 #ifdef HAVE_cc0
3909 /* Subroutine of cprop_insn that tries to propagate constants into
3910    JUMP_INSNS for machines that have CC0.  INSN is a single set that
3911    stores into CC0; the insn following it is a conditional jump.
3912    REG_USED is the use we will try to replace, SRC is the constant we
3913    will try to substitute for it.
3914    Returns nonzero if a change was made.  */
3915 static int
3916 cprop_cc0_jump (insn, reg_used, src)
3917      rtx insn;
3918      struct reg_use *reg_used;
3919      rtx src;
3920 {
3921   rtx jump = NEXT_INSN (insn);
3922   rtx copy = copy_rtx (jump);
3923   rtx set = PATTERN (copy);
3924
3925   /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3926      substitute into it.  */
3927   replace_rtx (SET_SRC (set), cc0_rtx, copy_rtx (SET_SRC (PATTERN (insn))));
3928   if (! cprop_jump (jump, copy, reg_used, src))
3929     return 0;
3930
3931   /* If we succeeded, delete the cc0 setter.  */
3932   PUT_CODE (insn, NOTE);
3933   NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3934   NOTE_SOURCE_FILE (insn) = 0;
3935   return 1;
3936  }
3937 #endif
3938
3939 /* Perform constant and copy propagation on INSN.
3940    The result is non-zero if a change was made.  */
3941
3942 static int
3943 cprop_insn (insn, alter_jumps)
3944      rtx insn;
3945      int alter_jumps;
3946 {
3947   struct reg_use *reg_used;
3948   int changed = 0;
3949   rtx note;
3950
3951   /* Only propagate into SETs.  Note that a conditional jump is a
3952      SET with pc_rtx as the destination.  */
3953   if ((GET_CODE (insn) != INSN
3954        && GET_CODE (insn) != JUMP_INSN)
3955       || GET_CODE (PATTERN (insn)) != SET)
3956     return 0;
3957
3958   reg_use_count = 0;
3959   find_used_regs (PATTERN (insn));
3960
3961   note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3962   if (!note)
3963     note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3964
3965   /* We may win even when propagating constants into notes. */
3966   if (note)
3967     find_used_regs (XEXP (note, 0));
3968
3969   reg_used = &reg_use_table[0];
3970   for ( ; reg_use_count > 0; reg_used++, reg_use_count--)
3971     {
3972       rtx pat, src;
3973       struct expr *set;
3974       int regno = REGNO (reg_used->reg_rtx);
3975
3976       /* Ignore registers created by GCSE.
3977          We do this because ... */
3978       if (regno >= max_gcse_regno)
3979         continue;
3980
3981       /* If the register has already been set in this block, there's
3982          nothing we can do.  */
3983       if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3984         continue;
3985
3986       /* Find an assignment that sets reg_used and is available
3987          at the start of the block.  */
3988       set = find_avail_set (regno, insn);
3989       if (! set)
3990         continue;
3991
3992       pat = set->expr;
3993       /* ??? We might be able to handle PARALLELs.  Later.  */
3994       if (GET_CODE (pat) != SET)
3995         abort ();
3996       src = SET_SRC (pat);
3997
3998       /* Constant propagation.  */
3999       if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
4000           || GET_CODE (src) == SYMBOL_REF)
4001         {
4002           /* Handle normal insns first.  */
4003           if (GET_CODE (insn) == INSN
4004               && try_replace_reg (reg_used->reg_rtx, src, insn))
4005             {
4006               changed = 1;
4007               const_prop_count++;
4008               if (gcse_file != NULL)
4009                 {
4010                   fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ",
4011                            regno, INSN_UID (insn));
4012                   print_rtl (gcse_file, src);
4013                   fprintf (gcse_file, "\n");
4014                 }
4015
4016               /* The original insn setting reg_used may or may not now be
4017                  deletable.  We leave the deletion to flow.  */
4018             }
4019
4020           /* Try to propagate a CONST_INT into a conditional jump.
4021              We're pretty specific about what we will handle in this
4022              code, we can extend this as necessary over time.
4023
4024              Right now the insn in question must look like
4025              (set (pc) (if_then_else ...))  */
4026           else if (alter_jumps
4027                    && GET_CODE (insn) == JUMP_INSN
4028                    && condjump_p (insn)
4029                    && ! simplejump_p (insn))
4030             changed |= cprop_jump (insn, copy_rtx (insn), reg_used, src);
4031 #ifdef HAVE_cc0
4032           /* Similar code for machines that use a pair of CC0 setter and
4033              conditional jump insn.  */
4034           else if (alter_jumps
4035                    && GET_CODE (PATTERN (insn)) == SET
4036                    && SET_DEST (PATTERN (insn)) == cc0_rtx
4037                    && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
4038                    && condjump_p (NEXT_INSN (insn))
4039                    && ! simplejump_p (NEXT_INSN (insn)))
4040             changed |= cprop_cc0_jump (insn, reg_used, src);
4041 #endif
4042         }
4043       else if (GET_CODE (src) == REG
4044                && REGNO (src) >= FIRST_PSEUDO_REGISTER
4045                && REGNO (src) != regno)
4046         {
4047           if (try_replace_reg (reg_used->reg_rtx, src, insn))
4048             {
4049               changed = 1;
4050               copy_prop_count++;
4051               if (gcse_file != NULL)
4052                 {
4053                   fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d with reg %d\n",
4054                            regno, INSN_UID (insn), REGNO (src));
4055                 }
4056
4057               /* The original insn setting reg_used may or may not now be
4058                  deletable.  We leave the deletion to flow.  */
4059               /* FIXME: If it turns out that the insn isn't deletable,
4060                  then we may have unnecessarily extended register lifetimes
4061                  and made things worse.  */
4062             }
4063         }
4064     }
4065
4066   return changed;
4067 }
4068
4069 /* Forward propagate copies.
4070    This includes copies and constants.
4071    Return non-zero if a change was made.  */
4072
4073 static int
4074 cprop (alter_jumps)
4075      int alter_jumps;
4076 {
4077   int bb, changed;
4078   rtx insn;
4079
4080   /* Note we start at block 1.  */
4081
4082   changed = 0;
4083   for (bb = 1; bb < n_basic_blocks; bb++)
4084     {
4085       /* Reset tables used to keep track of what's still valid [since the
4086          start of the block].  */
4087       reset_opr_set_tables ();
4088
4089       for (insn = BLOCK_HEAD (bb);
4090            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
4091            insn = NEXT_INSN (insn))
4092         {
4093           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4094             {
4095               changed |= cprop_insn (insn, alter_jumps);
4096
4097               /* Keep track of everything modified by this insn.  */
4098               /* ??? Need to be careful w.r.t. mods done to INSN.  Don't
4099                  call mark_oprs_set if we turned the insn into a NOTE.  */
4100               if (GET_CODE (insn) != NOTE)
4101                 mark_oprs_set (insn);
4102             }
4103         }
4104     }
4105
4106   if (gcse_file != NULL)
4107     fprintf (gcse_file, "\n");
4108
4109   return changed;
4110 }
4111
4112 /* Perform one copy/constant propagation pass.
4113    F is the first insn in the function.
4114    PASS is the pass count.  */
4115
4116 static int
4117 one_cprop_pass (pass, alter_jumps)
4118      int pass;
4119      int alter_jumps;
4120 {
4121   int changed = 0;
4122
4123   const_prop_count = 0;
4124   copy_prop_count = 0;
4125
4126   alloc_set_hash_table (max_cuid);
4127   compute_set_hash_table ();
4128   if (gcse_file)
4129     dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
4130                      n_sets);
4131   if (n_sets > 0)
4132     {
4133       alloc_cprop_mem (n_basic_blocks, n_sets);
4134       compute_cprop_data ();
4135       changed = cprop (alter_jumps);
4136       free_cprop_mem ();
4137     }
4138   free_set_hash_table ();
4139
4140   if (gcse_file)
4141     {
4142       fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, %d const props, %d copy props\n",
4143                current_function_name, pass,
4144                bytes_used, const_prop_count, copy_prop_count);
4145       fprintf (gcse_file, "\n");
4146     }
4147
4148   return changed;
4149 }
4150 \f
4151 /* Compute PRE+LCM working variables.  */
4152
4153 /* Local properties of expressions.  */
4154 /* Nonzero for expressions that are transparent in the block.  */
4155 static sbitmap *transp;
4156
4157 /* Nonzero for expressions that are transparent at the end of the block.
4158    This is only zero for expressions killed by abnormal critical edge
4159    created by a calls.  */
4160 static sbitmap *transpout;
4161
4162 /* Nonzero for expressions that are computed (available) in the block.  */
4163 static sbitmap *comp;
4164
4165 /* Nonzero for expressions that are locally anticipatable in the block.  */
4166 static sbitmap *antloc;
4167
4168 /* Nonzero for expressions where this block is an optimal computation
4169    point.  */
4170 static sbitmap *pre_optimal;
4171
4172 /* Nonzero for expressions which are redundant in a particular block.  */
4173 static sbitmap *pre_redundant;
4174
4175 /* Nonzero for expressions which should be inserted on a specific edge.  */
4176 static sbitmap *pre_insert_map;
4177
4178 /* Nonzero for expressions which should be deleted in a specific block.  */
4179 static sbitmap *pre_delete_map;
4180
4181 /* Contains the edge_list returned by pre_edge_lcm.  */
4182 static struct edge_list *edge_list;
4183
4184 static sbitmap *temp_bitmap;
4185
4186 /* Redundant insns.  */
4187 static sbitmap pre_redundant_insns;
4188
4189 /* Allocate vars used for PRE analysis.  */
4190
4191 static void
4192 alloc_pre_mem (n_blocks, n_exprs)
4193      int n_blocks, n_exprs;
4194 {
4195   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4196   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4197   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4198   temp_bitmap = sbitmap_vector_alloc (n_blocks, n_exprs);
4199
4200   pre_optimal = NULL;
4201   pre_redundant = NULL;
4202   pre_insert_map = NULL;
4203   pre_delete_map = NULL;
4204   ae_in = NULL;
4205   ae_out = NULL;
4206   u_bitmap = NULL;
4207   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
4208   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
4209   /* pre_insert and pre_delete are allocated later.  */
4210 }
4211
4212 /* Free vars used for PRE analysis.  */
4213
4214 static void
4215 free_pre_mem ()
4216 {
4217   free (transp);
4218   free (comp);
4219   free (antloc);
4220   free (temp_bitmap);
4221
4222   if (pre_optimal)
4223     free (pre_optimal);
4224   if (pre_redundant)
4225     free (pre_redundant);
4226   if (pre_insert_map)
4227     free (pre_insert_map);
4228   if (pre_delete_map)
4229     free (pre_delete_map);
4230   if (transpout)
4231     free (transpout);
4232
4233   if (ae_in)
4234     free (ae_in);
4235   if (ae_out)
4236     free (ae_out);
4237   if (ae_kill)
4238     free (ae_kill);
4239   if (u_bitmap)
4240     free (u_bitmap);
4241
4242   transp = comp = antloc = NULL;
4243   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
4244   transpout = ae_in = ae_out = ae_kill = NULL;
4245   u_bitmap = NULL;
4246
4247 }
4248
4249 /* Top level routine to do the dataflow analysis needed by PRE.  */
4250
4251 static void
4252 compute_pre_data ()
4253 {
4254   compute_local_properties (transp, comp, antloc, 0);
4255   compute_transpout ();
4256   sbitmap_vector_zero (ae_kill, n_basic_blocks);
4257   compute_ae_kill (comp, ae_kill);
4258   edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
4259                             ae_kill, &pre_insert_map, &pre_delete_map);
4260 }
4261
4262 \f
4263 /* PRE utilities */
4264
4265 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4266    block BB.
4267
4268    VISITED is a pointer to a working buffer for tracking which BB's have
4269    been visited.  It is NULL for the top-level call.
4270
4271    We treat reaching expressions that go through blocks containing the same
4272    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
4273    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4274    2 as not reaching.  The intent is to improve the probability of finding
4275    only one reaching expression and to reduce register lifetimes by picking
4276    the closest such expression.  */
4277
4278 static int
4279 pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited)
4280      int occr_bb;
4281      struct expr *expr;
4282      int bb;
4283      char *visited;
4284 {
4285   edge pred;
4286
4287   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
4288     {
4289       int pred_bb = pred->src->index;
4290
4291       if (pred->src == ENTRY_BLOCK_PTR
4292           /* Has predecessor has already been visited?  */
4293           || visited[pred_bb])
4294         {
4295           /* Nothing to do.  */
4296         }
4297       /* Does this predecessor generate this expression?  */
4298       else if (TEST_BIT (comp[pred_bb], expr->bitmap_index))
4299         {
4300           /* Is this the occurrence we're looking for?
4301              Note that there's only one generating occurrence per block
4302              so we just need to check the block number.  */
4303           if (occr_bb == pred_bb)
4304             return 1;
4305           visited[pred_bb] = 1;
4306         }
4307       /* Ignore this predecessor if it kills the expression.  */
4308       else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
4309         visited[pred_bb] = 1;
4310       /* Neither gen nor kill.  */
4311       else
4312         {
4313           visited[pred_bb] = 1;
4314           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
4315             return 1;
4316         }
4317     }
4318
4319   /* All paths have been checked.  */
4320   return 0;
4321 }
4322
4323 /* The wrapper for pre_expr_reaches_here_work that ensures that any
4324    memory allocated for that function is returned. */
4325
4326 static int
4327 pre_expr_reaches_here_p (occr_bb, expr, bb)
4328      int occr_bb;
4329      struct expr *expr;
4330      int bb;
4331 {
4332   int rval;
4333   char * visited = (char *) xcalloc (n_basic_blocks, 1);
4334
4335   rval = pre_expr_reaches_here_p_work(occr_bb, expr, bb, visited);
4336
4337   free (visited);
4338
4339   return (rval);
4340 }
4341 \f
4342
4343 /* Given an expr, generate RTL which we can insert at the end of a BB,
4344    or on an edge.  Set the block number of any insns generated to
4345    the value of BB.  */
4346
4347 static rtx
4348 process_insert_insn (expr)
4349      struct expr *expr;
4350 {
4351   rtx reg = expr->reaching_reg;
4352   rtx pat, copied_expr;
4353   rtx first_new_insn;
4354
4355   start_sequence ();
4356   copied_expr = copy_rtx (expr->expr);
4357   emit_move_insn (reg, copied_expr);
4358   first_new_insn = get_insns ();
4359   pat = gen_sequence ();
4360   end_sequence ();
4361
4362   return pat;
4363 }
4364
4365 /* Add EXPR to the end of basic block BB.
4366
4367    This is used by both the PRE and code hoisting.
4368
4369    For PRE, we want to verify that the expr is either transparent
4370    or locally anticipatable in the target block.  This check makes
4371    no sense for code hoisting.  */
4372
4373 static void
4374 insert_insn_end_bb (expr, bb, pre)
4375      struct expr *expr;
4376      int bb;
4377      int pre;
4378 {
4379   rtx insn = BLOCK_END (bb);
4380   rtx new_insn;
4381   rtx reg = expr->reaching_reg;
4382   int regno = REGNO (reg);
4383   rtx pat;
4384
4385   pat = process_insert_insn (expr);
4386
4387   /* If the last insn is a jump, insert EXPR in front [taking care to
4388      handle cc0, etc. properly].  */
4389
4390   if (GET_CODE (insn) == JUMP_INSN)
4391     {
4392 #ifdef HAVE_cc0
4393       rtx note;
4394 #endif
4395
4396       /* If this is a jump table, then we can't insert stuff here.  Since
4397          we know the previous real insn must be the tablejump, we insert
4398          the new instruction just before the tablejump.  */
4399       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4400           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4401         insn = prev_real_insn (insn);
4402
4403 #ifdef HAVE_cc0
4404       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4405          if cc0 isn't set.  */
4406       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4407       if (note)
4408         insn = XEXP (note, 0);
4409       else
4410         {
4411           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4412           if (maybe_cc0_setter
4413               && GET_RTX_CLASS (GET_CODE (maybe_cc0_setter)) == 'i'
4414               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4415             insn = maybe_cc0_setter;
4416         }
4417 #endif
4418       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
4419       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4420     }
4421   /* Likewise if the last insn is a call, as will happen in the presence
4422      of exception handling.  */
4423   else if (GET_CODE (insn) == CALL_INSN)
4424     {
4425       HARD_REG_SET parm_regs;
4426       int nparm_regs;
4427       rtx p;
4428
4429       /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4430          we search backward and place the instructions before the first
4431          parameter is loaded.  Do this for everyone for consistency and a
4432          presumtion that we'll get better code elsewhere as well.  */
4433
4434       /* It should always be the case that we can put these instructions
4435          anywhere in the basic block with performing PRE optimizations.
4436          Check this.  */
4437       if (pre
4438           && !TEST_BIT (antloc[bb], expr->bitmap_index)
4439           && !TEST_BIT (transp[bb], expr->bitmap_index))
4440         abort ();
4441
4442       /* Since different machines initialize their parameter registers
4443          in different orders, assume nothing.  Collect the set of all
4444          parameter registers.  */
4445       CLEAR_HARD_REG_SET (parm_regs);
4446       nparm_regs = 0;
4447       for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4448         if (GET_CODE (XEXP (p, 0)) == USE
4449             && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4450           {
4451             int regno = REGNO (XEXP (XEXP (p, 0), 0));
4452             if (regno >= FIRST_PSEUDO_REGISTER)
4453               abort ();
4454             SET_HARD_REG_BIT (parm_regs, regno);
4455             nparm_regs++;
4456           }
4457
4458       /* Search backward for the first set of a register in this set.  */
4459       while (nparm_regs && BLOCK_HEAD (bb) != insn)
4460         {
4461           insn = PREV_INSN (insn);
4462           p = single_set (insn);
4463           if (p && GET_CODE (SET_DEST (p)) == REG
4464               && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4465               && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4466             {
4467               CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4468               nparm_regs--;
4469             }
4470         }
4471
4472       /* If we found all the parameter loads, then we want to insert
4473          before the first parameter load.
4474
4475          If we did not find all the parameter loads, then we might have
4476          stopped on the head of the block, which could be a CODE_LABEL.
4477          If we inserted before the CODE_LABEL, then we would be putting
4478          the insn in the wrong basic block.  In that case, put the insn
4479          after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
4480       if (GET_CODE (insn) == CODE_LABEL)
4481         insn = NEXT_INSN (insn);
4482       if (GET_CODE (insn) == NOTE
4483           && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4484         insn = NEXT_INSN (insn);
4485       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4486     }
4487   else
4488     {
4489       new_insn = emit_insn_after (pat, insn);
4490       BLOCK_END (bb) = new_insn;
4491     }
4492
4493   /* Keep block number table up to date.
4494      Note, PAT could be a multiple insn sequence, we have to make
4495      sure that each insn in the sequence is handled.  */
4496   if (GET_CODE (pat) == SEQUENCE)
4497     {
4498       int i;
4499
4500       for (i = 0; i < XVECLEN (pat, 0); i++)
4501         {
4502           rtx insn = XVECEXP (pat, 0, i);
4503           set_block_num (insn, bb);
4504           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4505             add_label_notes (PATTERN (insn), new_insn);
4506           note_stores (PATTERN (insn), record_set_info, insn);
4507         }
4508     }
4509   else
4510     {
4511       add_label_notes (SET_SRC (pat), new_insn);
4512       set_block_num (new_insn, bb);
4513       /* Keep register set table up to date.  */
4514       record_one_set (regno, new_insn);
4515     }
4516
4517   gcse_create_count++;
4518
4519   if (gcse_file)
4520     {
4521       fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, copying expression %d to reg %d\n",
4522                bb, INSN_UID (new_insn), expr->bitmap_index, regno);
4523     }
4524 }
4525
4526 /* Insert partially redundant expressions on edges in the CFG to make
4527    the expressions fully redundant.  */
4528
4529 static int
4530 pre_edge_insert (edge_list, index_map)
4531      struct edge_list *edge_list;
4532      struct expr **index_map;
4533 {
4534   int e, i, num_edges, set_size, did_insert = 0;
4535   sbitmap *inserted;
4536
4537   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4538      if it reaches any of the deleted expressions.  */
4539
4540   set_size = pre_insert_map[0]->size;
4541   num_edges = NUM_EDGES (edge_list);
4542   inserted = sbitmap_vector_alloc (num_edges, n_exprs);
4543   sbitmap_vector_zero (inserted, num_edges);
4544
4545   for (e = 0; e < num_edges; e++)
4546     {
4547       int indx;
4548       basic_block pred = INDEX_EDGE_PRED_BB (edge_list, e);
4549       int bb = pred->index;
4550
4551       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4552         {
4553           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
4554           int j;
4555
4556           for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
4557             {
4558               if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4559                 {
4560                   struct expr *expr = index_map[j];
4561                   struct occr *occr;
4562
4563                   /* Now look at each deleted occurence of this expression.  */
4564                   for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4565                     {
4566                       if (! occr->deleted_p)
4567                         continue;
4568
4569                       /* Insert this expression on this edge if if it would
4570                          reach the deleted occurence in BB.  */
4571                       if (!TEST_BIT (inserted[e], j))
4572                         {
4573                           rtx insn;
4574                           edge eg = INDEX_EDGE (edge_list, e);
4575                           /* We can't insert anything on an abnormal
4576                              and critical edge, so we insert the
4577                              insn at the end of the previous block. There
4578                              are several alternatives detailed in
4579                              Morgans book P277 (sec 10.5) for handling
4580                              this situation.  This one is easiest for now.  */
4581
4582                           if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
4583                             {
4584                               insert_insn_end_bb (index_map[j], bb, 0);
4585                             }
4586                           else
4587                             {
4588                               insn = process_insert_insn (index_map[j]);
4589                               insert_insn_on_edge (insn, eg);
4590                             }
4591                           if (gcse_file)
4592                             {
4593                               fprintf (gcse_file,
4594                                        "PRE/HOIST: edge (%d,%d), copy expression %d\n",
4595                                         bb,
4596                                         INDEX_EDGE_SUCC_BB (edge_list, e)->index, expr->bitmap_index);
4597                             }
4598                           SET_BIT (inserted[e], j);
4599                           did_insert = 1;
4600                           gcse_create_count++;
4601                         }
4602                     }
4603                 }
4604             }
4605         }
4606     }
4607
4608   /* Clean up.  */
4609   free (inserted);
4610
4611   return did_insert;
4612 }
4613
4614 /* Copy the result of INSN to REG.
4615    INDX is the expression number.  */
4616
4617 static void
4618 pre_insert_copy_insn (expr, insn)
4619      struct expr *expr;
4620      rtx insn;
4621 {
4622   rtx reg = expr->reaching_reg;
4623   int regno = REGNO (reg);
4624   int indx = expr->bitmap_index;
4625   rtx set = single_set (insn);
4626   rtx new_insn;
4627   int bb = BLOCK_NUM (insn);
4628
4629   if (!set)
4630     abort ();
4631   new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4632                               insn);
4633   /* Keep block number table up to date.  */
4634   set_block_num (new_insn, bb);
4635   /* Keep register set table up to date.  */
4636   record_one_set (regno, new_insn);
4637   if (insn == BLOCK_END (bb))
4638     BLOCK_END (bb) = new_insn;
4639
4640   gcse_create_count++;
4641
4642   if (gcse_file)
4643     fprintf (gcse_file,
4644              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4645               BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4646               INSN_UID (insn), regno);
4647 }
4648
4649 /* Copy available expressions that reach the redundant expression
4650    to `reaching_reg'.  */
4651
4652 static void
4653 pre_insert_copies ()
4654 {
4655   int i;
4656
4657   /* For each available expression in the table, copy the result to
4658      `reaching_reg' if the expression reaches a deleted one.
4659
4660      ??? The current algorithm is rather brute force.
4661      Need to do some profiling.  */
4662
4663   for (i = 0; i < expr_hash_table_size; i++)
4664     {
4665       struct expr *expr;
4666
4667       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4668         {
4669           struct occr *occr;
4670
4671           /* If the basic block isn't reachable, PPOUT will be TRUE.
4672              However, we don't want to insert a copy here because the
4673              expression may not really be redundant.  So only insert
4674              an insn if the expression was deleted.
4675              This test also avoids further processing if the expression
4676              wasn't deleted anywhere.  */
4677           if (expr->reaching_reg == NULL)
4678             continue;
4679
4680           for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4681             {
4682               struct occr *avail;
4683
4684               if (! occr->deleted_p)
4685                 continue;
4686
4687               for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4688                 {
4689                   rtx insn = avail->insn;
4690
4691                   /* No need to handle this one if handled already.  */
4692                   if (avail->copied_p)
4693                     continue;
4694                   /* Don't handle this one if it's a redundant one.  */
4695                   if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4696                     continue;
4697                   /* Or if the expression doesn't reach the deleted one.  */
4698                   if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
4699                                                  BLOCK_NUM (occr->insn)))
4700                     continue;
4701
4702                   /* Copy the result of avail to reaching_reg.  */
4703                   pre_insert_copy_insn (expr, insn);
4704                   avail->copied_p = 1;
4705                 }
4706             }
4707         }
4708     }
4709 }
4710
4711 /* Delete redundant computations.
4712    Deletion is done by changing the insn to copy the `reaching_reg' of
4713    the expression into the result of the SET.  It is left to later passes
4714    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4715
4716    Returns non-zero if a change is made.  */
4717
4718 static int
4719 pre_delete ()
4720 {
4721   int i, bb, changed;
4722
4723   /* Compute the expressions which are redundant and need to be replaced by
4724      copies from the reaching reg to the target reg.  */
4725   for (bb = 0; bb < n_basic_blocks; bb++)
4726     sbitmap_copy (temp_bitmap[bb], pre_delete_map[bb]);
4727
4728   changed = 0;
4729   for (i = 0; i < expr_hash_table_size; i++)
4730     {
4731       struct expr *expr;
4732
4733       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4734         {
4735           struct occr *occr;
4736           int indx = expr->bitmap_index;
4737
4738           /* We only need to search antic_occr since we require
4739              ANTLOC != 0.  */
4740
4741           for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4742             {
4743               rtx insn = occr->insn;
4744               rtx set;
4745               int bb = BLOCK_NUM (insn);
4746
4747               if (TEST_BIT (temp_bitmap[bb], indx))
4748                 {
4749                   set = single_set (insn);
4750                   if (! set)
4751                     abort ();
4752
4753                   /* Create a pseudo-reg to store the result of reaching
4754                      expressions into.  Get the mode for the new pseudo
4755                      from the mode of the original destination pseudo.  */
4756                   if (expr->reaching_reg == NULL)
4757                     expr->reaching_reg
4758                       = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4759
4760                   /* In theory this should never fail since we're creating
4761                      a reg->reg copy.
4762
4763                      However, on the x86 some of the movXX patterns actually
4764                      contain clobbers of scratch regs.  This may cause the
4765                      insn created by validate_change to not match any pattern
4766                      and thus cause validate_change to fail.   */
4767                   if (validate_change (insn, &SET_SRC (set),
4768                                        expr->reaching_reg, 0))
4769                     {
4770                       occr->deleted_p = 1;
4771                       SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4772                       changed = 1;
4773                       gcse_subst_count++;
4774                     }
4775
4776                   if (gcse_file)
4777                     {
4778                       fprintf (gcse_file,
4779                                "PRE: redundant insn %d (expression %d) in bb %d, reaching reg is %d\n",
4780                                INSN_UID (insn), indx, bb, REGNO (expr->reaching_reg));
4781                     }
4782                 }
4783             }
4784         }
4785     }
4786
4787   return changed;
4788 }
4789
4790 /* Perform GCSE optimizations using PRE.
4791    This is called by one_pre_gcse_pass after all the dataflow analysis
4792    has been done.
4793
4794    This is based on the original Morel-Renvoise paper Fred Chow's thesis,
4795    and lazy code motion from Knoop, Ruthing and Steffen as described in
4796    Advanced Compiler Design and Implementation.
4797
4798    ??? A new pseudo reg is created to hold the reaching expression.
4799    The nice thing about the classical approach is that it would try to
4800    use an existing reg.  If the register can't be adequately optimized
4801    [i.e. we introduce reload problems], one could add a pass here to
4802    propagate the new register through the block.
4803
4804    ??? We don't handle single sets in PARALLELs because we're [currently]
4805    not able to copy the rest of the parallel when we insert copies to create
4806    full redundancies from partial redundancies.  However, there's no reason
4807    why we can't handle PARALLELs in the cases where there are no partial
4808    redundancies.  */
4809
4810 static int
4811 pre_gcse ()
4812 {
4813   int i, did_insert;
4814   int changed;
4815   struct expr **index_map;
4816
4817   /* Compute a mapping from expression number (`bitmap_index') to
4818      hash table entry.  */
4819
4820   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
4821   for (i = 0; i < expr_hash_table_size; i++)
4822     {
4823       struct expr *expr;
4824
4825       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4826         index_map[expr->bitmap_index] = expr;
4827     }
4828
4829   /* Reset bitmap used to track which insns are redundant.  */
4830   pre_redundant_insns = sbitmap_alloc (max_cuid);
4831   sbitmap_zero (pre_redundant_insns);
4832
4833   /* Delete the redundant insns first so that
4834      - we know what register to use for the new insns and for the other
4835        ones with reaching expressions
4836      - we know which insns are redundant when we go to create copies  */
4837   changed = pre_delete ();
4838
4839   did_insert = pre_edge_insert (edge_list, index_map);
4840   /* In other places with reaching expressions, copy the expression to the
4841      specially allocated pseudo-reg that reaches the redundant expr.  */
4842   pre_insert_copies ();
4843   if (did_insert)
4844     {
4845       commit_edge_insertions ();
4846       changed = 1;
4847     }
4848
4849   free (index_map);
4850   free (pre_redundant_insns);
4851
4852   return changed;
4853 }
4854
4855 /* Top level routine to perform one PRE GCSE pass.
4856
4857    Return non-zero if a change was made.  */
4858
4859 static int
4860 one_pre_gcse_pass (pass)
4861      int pass;
4862 {
4863   int changed = 0;
4864
4865   gcse_subst_count = 0;
4866   gcse_create_count = 0;
4867
4868   alloc_expr_hash_table (max_cuid);
4869   add_noreturn_fake_exit_edges ();
4870   compute_expr_hash_table ();
4871   if (gcse_file)
4872     dump_hash_table (gcse_file, "Expression", expr_hash_table,
4873                      expr_hash_table_size, n_exprs);
4874   if (n_exprs > 0)
4875     {
4876       alloc_pre_mem (n_basic_blocks, n_exprs);
4877       compute_pre_data ();
4878       changed |= pre_gcse ();
4879       free_edge_list (edge_list);
4880       free_pre_mem ();
4881     }
4882   remove_fake_edges ();
4883   free_expr_hash_table ();
4884
4885   if (gcse_file)
4886     {
4887       fprintf (gcse_file, "\n");
4888       fprintf (gcse_file, "PRE GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
4889                current_function_name, pass,
4890                bytes_used, gcse_subst_count, gcse_create_count);
4891     }
4892
4893   return changed;
4894 }
4895 \f
4896 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4897    We have to add REG_LABEL notes, because the following loop optimization
4898    pass requires them.  */
4899
4900 /* ??? This is very similar to the loop.c add_label_notes function.  We
4901    could probably share code here.  */
4902
4903 /* ??? If there was a jump optimization pass after gcse and before loop,
4904    then we would not need to do this here, because jump would add the
4905    necessary REG_LABEL notes.  */
4906
4907 static void
4908 add_label_notes (x, insn)
4909      rtx x;
4910      rtx insn;
4911 {
4912   enum rtx_code code = GET_CODE (x);
4913   int i, j;
4914   const char *fmt;
4915
4916   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4917     {
4918       /* This code used to ignore labels that referred to dispatch tables to
4919          avoid flow generating (slighly) worse code.
4920
4921          We no longer ignore such label references (see LABEL_REF handling in
4922          mark_jump_label for additional information).  */
4923       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4924                                             REG_NOTES (insn));
4925       return;
4926     }
4927
4928   fmt = GET_RTX_FORMAT (code);
4929   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4930     {
4931       if (fmt[i] == 'e')
4932         add_label_notes (XEXP (x, i), insn);
4933       else if (fmt[i] == 'E')
4934         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4935           add_label_notes (XVECEXP (x, i, j), insn);
4936     }
4937 }
4938
4939 /* Compute transparent outgoing information for each block.
4940
4941    An expression is transparent to an edge unless it is killed by
4942    the edge itself.  This can only happen with abnormal control flow,
4943    when the edge is traversed through a call.  This happens with
4944    non-local labels and exceptions.
4945
4946    This would not be necessary if we split the edge.  While this is
4947    normally impossible for abnormal critical edges, with some effort
4948    it should be possible with exception handling, since we still have
4949    control over which handler should be invoked.  But due to increased
4950    EH table sizes, this may not be worthwhile.  */
4951
4952 static void
4953 compute_transpout ()
4954 {
4955   int bb;
4956
4957   sbitmap_vector_ones (transpout, n_basic_blocks);
4958
4959   for (bb = 0; bb < n_basic_blocks; ++bb)
4960     {
4961       int i;
4962
4963       /* Note that flow inserted a nop a the end of basic blocks that
4964          end in call instructions for reasons other than abnormal
4965          control flow.  */
4966       if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4967         continue;
4968
4969       for (i = 0; i < expr_hash_table_size; i++)
4970         {
4971           struct expr *expr;
4972           for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4973             if (GET_CODE (expr->expr) == MEM)
4974               {
4975                 rtx addr = XEXP (expr->expr, 0);
4976
4977                 if (GET_CODE (addr) == SYMBOL_REF
4978                     && CONSTANT_POOL_ADDRESS_P (addr))
4979                   continue;
4980
4981                 /* ??? Optimally, we would use interprocedural alias
4982                    analysis to determine if this mem is actually killed
4983                    by this call.  */
4984                 RESET_BIT (transpout[bb], expr->bitmap_index);
4985               }
4986         }
4987     }
4988 }
4989
4990 /* Removal of useless null pointer checks */
4991
4992 /* Called via note_stores.  X is set by SETTER.  If X is a register we must
4993    invalidate nonnull_local and set nonnull_killed.  DATA is really a
4994    `null_pointer_info *'.
4995
4996    We ignore hard registers.  */
4997 static void
4998 invalidate_nonnull_info (x, setter, data)
4999      rtx x;
5000      rtx setter ATTRIBUTE_UNUSED;
5001      void *data;
5002 {
5003   int offset, regno;
5004   struct null_pointer_info* npi = (struct null_pointer_info *) data;
5005
5006   offset = 0;
5007   while (GET_CODE (x) == SUBREG)
5008     x = SUBREG_REG (x);
5009
5010   /* Ignore anything that is not a register or is a hard register.  */
5011   if (GET_CODE (x) != REG
5012       || REGNO (x) < npi->min_reg
5013       || REGNO (x) >= npi->max_reg)
5014     return;
5015
5016   regno = REGNO (x) - npi->min_reg;
5017
5018   RESET_BIT (npi->nonnull_local[npi->current_block], regno);
5019   SET_BIT (npi->nonnull_killed[npi->current_block], regno);
5020 }
5021
5022 /* Do null-pointer check elimination for the registers indicated in
5023    NPI.  NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
5024    they are not our responsibility to free.  */
5025
5026 static void
5027 delete_null_pointer_checks_1 (block_reg, nonnull_avin, nonnull_avout, npi)
5028      int *block_reg;
5029      sbitmap *nonnull_avin;
5030      sbitmap *nonnull_avout;
5031      struct null_pointer_info *npi;
5032 {
5033   int bb;
5034   int current_block;
5035   sbitmap *nonnull_local = npi->nonnull_local;
5036   sbitmap *nonnull_killed = npi->nonnull_killed;
5037
5038   /* Compute local properties, nonnull and killed.  A register will have
5039      the nonnull property if at the end of the current block its value is
5040      known to be nonnull.  The killed property indicates that somewhere in
5041      the block any information we had about the register is killed.
5042
5043      Note that a register can have both properties in a single block.  That
5044      indicates that it's killed, then later in the block a new value is
5045      computed.  */
5046   sbitmap_vector_zero (nonnull_local, n_basic_blocks);
5047   sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
5048   for (current_block = 0; current_block < n_basic_blocks; current_block++)
5049     {
5050       rtx insn, stop_insn;
5051
5052       /* Set the current block for invalidate_nonnull_info.  */
5053       npi->current_block = current_block;
5054
5055       /* Scan each insn in the basic block looking for memory references and
5056          register sets.  */
5057       stop_insn = NEXT_INSN (BLOCK_END (current_block));
5058       for (insn = BLOCK_HEAD (current_block);
5059            insn != stop_insn;
5060            insn = NEXT_INSN (insn))
5061         {
5062           rtx set;
5063           rtx reg;
5064
5065           /* Ignore anything that is not a normal insn.  */
5066           if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
5067             continue;
5068
5069           /* Basically ignore anything that is not a simple SET.  We do have
5070              to make sure to invalidate nonnull_local and set nonnull_killed
5071              for such insns though.  */
5072           set = single_set (insn);
5073           if (!set)
5074             {
5075               note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5076               continue;
5077             }
5078
5079           /* See if we've got a useable memory load.  We handle it first
5080              in case it uses its address register as a dest (which kills
5081              the nonnull property).  */
5082           if (GET_CODE (SET_SRC (set)) == MEM
5083               && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
5084               && REGNO (reg) >= npi->min_reg
5085               && REGNO (reg) < npi->max_reg)
5086             SET_BIT (nonnull_local[current_block],
5087                      REGNO (reg) - npi->min_reg);
5088
5089           /* Now invalidate stuff clobbered by this insn.  */
5090           note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5091
5092           /* And handle stores, we do these last since any sets in INSN can
5093              not kill the nonnull property if it is derived from a MEM
5094              appearing in a SET_DEST.  */
5095           if (GET_CODE (SET_DEST (set)) == MEM
5096               && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
5097               && REGNO (reg) >= npi->min_reg
5098               && REGNO (reg) < npi->max_reg)
5099             SET_BIT (nonnull_local[current_block],
5100                      REGNO (reg) - npi->min_reg);
5101         }
5102     }
5103
5104   /* Now compute global properties based on the local properties.   This
5105      is a classic global availablity algorithm.  */
5106   compute_available (nonnull_local, nonnull_killed,
5107                      nonnull_avout, nonnull_avin);
5108
5109   /* Now look at each bb and see if it ends with a compare of a value
5110      against zero.  */
5111   for (bb = 0; bb < n_basic_blocks; bb++)
5112     {
5113       rtx last_insn = BLOCK_END (bb);
5114       rtx condition, earliest;
5115       int compare_and_branch;
5116
5117       /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
5118          since BLOCK_REG[BB] is zero if this block did not end with a
5119          comparison against zero, this condition works.  */
5120       if (block_reg[bb] < npi->min_reg
5121           || block_reg[bb] >= npi->max_reg)
5122         continue;
5123
5124       /* LAST_INSN is a conditional jump.  Get its condition.  */
5125       condition = get_condition (last_insn, &earliest);
5126
5127       /* If we can't determine the condition then skip.  */
5128       if (! condition)
5129         continue;
5130
5131       /* Is the register known to have a nonzero value?  */
5132       if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
5133         continue;
5134
5135       /* Try to compute whether the compare/branch at the loop end is one or
5136          two instructions.  */
5137       if (earliest == last_insn)
5138         compare_and_branch = 1;
5139       else if (earliest == prev_nonnote_insn (last_insn))
5140         compare_and_branch = 2;
5141       else
5142         continue;
5143
5144       /* We know the register in this comparison is nonnull at exit from
5145          this block.  We can optimize this comparison.  */
5146       if (GET_CODE (condition) == NE)
5147         {
5148           rtx new_jump;
5149
5150           new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
5151                                             last_insn);
5152           JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5153           LABEL_NUSES (JUMP_LABEL (new_jump))++;
5154           emit_barrier_after (new_jump);
5155         }
5156       delete_insn (last_insn);
5157       if (compare_and_branch == 2)
5158         delete_insn (earliest);
5159
5160       /* Don't check this block again.  (Note that BLOCK_END is
5161          invalid here; we deleted the last instruction in the
5162          block.)  */
5163       block_reg[bb] = 0;
5164     }
5165 }
5166
5167 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5168    at compile time.
5169
5170    This is conceptually similar to global constant/copy propagation and
5171    classic global CSE (it even uses the same dataflow equations as cprop).
5172
5173    If a register is used as memory address with the form (mem (reg)), then we
5174    know that REG can not be zero at that point in the program.  Any instruction
5175    which sets REG "kills" this property.
5176
5177    So, if every path leading to a conditional branch has an available memory
5178    reference of that form, then we know the register can not have the value
5179    zero at the conditional branch.
5180
5181    So we merely need to compute the local properies and propagate that data
5182    around the cfg, then optimize where possible.
5183
5184    We run this pass two times.  Once before CSE, then again after CSE.  This
5185    has proven to be the most profitable approach.  It is rare for new
5186    optimization opportunities of this nature to appear after the first CSE
5187    pass.
5188
5189    This could probably be integrated with global cprop with a little work.  */
5190
5191 void
5192 delete_null_pointer_checks (f)
5193      rtx f;
5194 {
5195   sbitmap *nonnull_avin, *nonnull_avout;
5196   int *block_reg;
5197   int bb;
5198   int reg;
5199   int regs_per_pass;
5200   int max_reg;
5201   struct null_pointer_info npi;
5202
5203   /* First break the program into basic blocks.  */
5204   find_basic_blocks (f, max_reg_num (), NULL, 1);
5205
5206   /* If we have only a single block, then there's nothing to do.  */
5207   if (n_basic_blocks <= 1)
5208     {
5209       /* Free storage allocated by find_basic_blocks.  */
5210       free_basic_block_vars (0);
5211       return;
5212     }
5213
5214   /* Trying to perform global optimizations on flow graphs which have
5215      a high connectivity will take a long time and is unlikely to be
5216      particularly useful.
5217
5218      In normal circumstances a cfg should have about twice has many edges
5219      as blocks.  But we do not want to punish small functions which have
5220      a couple switch statements.  So we require a relatively large number
5221      of basic blocks and the ratio of edges to blocks to be high.  */
5222   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
5223     {
5224       /* Free storage allocated by find_basic_blocks.  */
5225       free_basic_block_vars (0);
5226       return;
5227     }
5228
5229   /* We need four bitmaps, each with a bit for each register in each
5230      basic block.  */
5231   max_reg = max_reg_num ();
5232   regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
5233
5234   /* Allocate bitmaps to hold local and global properties.  */
5235   npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5236   npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5237   nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5238   nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5239
5240   /* Go through the basic blocks, seeing whether or not each block
5241      ends with a conditional branch whose condition is a comparison
5242      against zero.  Record the register compared in BLOCK_REG.  */
5243   block_reg = (int *) xcalloc (n_basic_blocks, sizeof (int));
5244   for (bb = 0; bb < n_basic_blocks; bb++)
5245     {
5246       rtx last_insn = BLOCK_END (bb);
5247       rtx condition, earliest, reg;
5248
5249       /* We only want conditional branches.  */
5250       if (GET_CODE (last_insn) != JUMP_INSN
5251           || !condjump_p (last_insn)
5252           || simplejump_p (last_insn))
5253         continue;
5254
5255       /* LAST_INSN is a conditional jump.  Get its condition.  */
5256       condition = get_condition (last_insn, &earliest);
5257
5258       /* If we were unable to get the condition, or it is not a equality
5259          comparison against zero then there's nothing we can do.  */
5260       if (!condition
5261           || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5262           || GET_CODE (XEXP (condition, 1)) != CONST_INT
5263           || (XEXP (condition, 1)
5264               != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
5265         continue;
5266
5267       /* We must be checking a register against zero.  */
5268       reg = XEXP (condition, 0);
5269       if (GET_CODE (reg) != REG)
5270         continue;
5271
5272       block_reg[bb] = REGNO (reg);
5273     }
5274
5275   /* Go through the algorithm for each block of registers.  */
5276   for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
5277     {
5278       npi.min_reg = reg;
5279       npi.max_reg = MIN (reg + regs_per_pass, max_reg);
5280       delete_null_pointer_checks_1 (block_reg, nonnull_avin,
5281                                     nonnull_avout, &npi);
5282     }
5283
5284   /* Free storage allocated by find_basic_blocks.  */
5285   free_basic_block_vars (0);
5286
5287   /* Free the table of registers compared at the end of every block.  */
5288   free (block_reg);
5289
5290   /* Free bitmaps.  */
5291   free (npi.nonnull_local);
5292   free (npi.nonnull_killed);
5293   free (nonnull_avin);
5294   free (nonnull_avout);
5295 }
5296
5297 /* Code Hoisting variables and subroutines.  */
5298
5299 /* Very busy expressions.  */
5300 static sbitmap *hoist_vbein;
5301 static sbitmap *hoist_vbeout;
5302
5303 /* Hoistable expressions.  */
5304 static sbitmap *hoist_exprs;
5305
5306 /* Dominator bitmaps.  */
5307 static sbitmap *dominators;
5308
5309 /* ??? We could compute post dominators and run this algorithm in
5310    reverse to to perform tail merging, doing so would probably be
5311    more effective than the tail merging code in jump.c.
5312
5313    It's unclear if tail merging could be run in parallel with
5314    code hoisting.  It would be nice.  */
5315
5316 /* Allocate vars used for code hoisting analysis.  */
5317
5318 static void
5319 alloc_code_hoist_mem (n_blocks, n_exprs)
5320      int n_blocks, n_exprs;
5321 {
5322   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5323   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5324   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5325
5326   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5327   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5328   hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5329   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5330
5331   dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5332 }
5333
5334 /* Free vars used for code hoisting analysis.  */
5335
5336 static void
5337 free_code_hoist_mem ()
5338 {
5339   free (antloc);
5340   free (transp);
5341   free (comp);
5342
5343   free (hoist_vbein);
5344   free (hoist_vbeout);
5345   free (hoist_exprs);
5346   free (transpout);
5347
5348   free (dominators);
5349 }
5350
5351 /* Compute the very busy expressions at entry/exit from each block.
5352
5353    An expression is very busy if all paths from a given point
5354    compute the expression.  */
5355
5356 static void
5357 compute_code_hoist_vbeinout ()
5358 {
5359   int bb, changed, passes;
5360
5361   sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5362   sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5363
5364   passes = 0;
5365   changed = 1;
5366   while (changed)
5367     {
5368       changed = 0;
5369       /* We scan the blocks in the reverse order to speed up
5370          the convergence.  */
5371       for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5372         {
5373           changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5374                                            hoist_vbeout[bb], transp[bb]);
5375           if (bb != n_basic_blocks - 1)
5376             sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
5377         }
5378       passes++;
5379     }
5380
5381   if (gcse_file)
5382     fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5383 }
5384
5385 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
5386
5387 static void
5388 compute_code_hoist_data ()
5389 {
5390   compute_local_properties (transp, comp, antloc, 0);
5391   compute_transpout ();
5392   compute_code_hoist_vbeinout ();
5393   compute_flow_dominators (dominators, NULL);
5394   if (gcse_file)
5395     fprintf (gcse_file, "\n");
5396 }
5397
5398 /* Determine if the expression identified by EXPR_INDEX would
5399    reach BB unimpared if it was placed at the end of EXPR_BB.
5400
5401    It's unclear exactly what Muchnick meant by "unimpared".  It seems
5402    to me that the expression must either be computed or transparent in
5403    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
5404    would allow the expression to be hoisted out of loops, even if
5405    the expression wasn't a loop invariant.
5406
5407    Contrast this to reachability for PRE where an expression is
5408    considered reachable if *any* path reaches instead of *all*
5409    paths.  */
5410
5411 static int
5412 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5413      int expr_bb;
5414      int expr_index;
5415      int bb;
5416      char *visited;
5417 {
5418   edge pred;
5419   int visited_allocated_locally = 0;
5420
5421
5422   if (visited == NULL)
5423     {
5424        visited_allocated_locally = 1;
5425        visited = xcalloc (n_basic_blocks, 1);
5426     }
5427
5428   visited[expr_bb] = 1;
5429   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5430     {
5431       int pred_bb = pred->src->index;
5432
5433       if (pred->src == ENTRY_BLOCK_PTR)
5434         break;
5435       else if (visited[pred_bb])
5436         continue;
5437       /* Does this predecessor generate this expression?  */
5438       else if (TEST_BIT (comp[pred_bb], expr_index))
5439         break;
5440       else if (! TEST_BIT (transp[pred_bb], expr_index))
5441         break;
5442       /* Not killed.  */
5443       else
5444         {
5445           visited[pred_bb] = 1;
5446           if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5447                                            pred_bb, visited))
5448             break;
5449         }
5450     }
5451   if (visited_allocated_locally)
5452     free (visited);
5453   return (pred == NULL);
5454 }
5455 \f
5456 /* Actually perform code hoisting.  */
5457 static void
5458 hoist_code ()
5459 {
5460   int bb, dominated, i;
5461   struct expr **index_map;
5462
5463   sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5464
5465   /* Compute a mapping from expression number (`bitmap_index') to
5466      hash table entry.  */
5467
5468   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
5469   for (i = 0; i < expr_hash_table_size; i++)
5470     {
5471       struct expr *expr;
5472
5473       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5474         index_map[expr->bitmap_index] = expr;
5475     }
5476
5477   /* Walk over each basic block looking for potentially hoistable
5478      expressions, nothing gets hoisted from the entry block.  */
5479   for (bb = 0; bb < n_basic_blocks; bb++)
5480     {
5481       int found = 0;
5482       int insn_inserted_p;
5483
5484       /* Examine each expression that is very busy at the exit of this
5485          block.  These are the potentially hoistable expressions.  */
5486       for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5487         {
5488           int hoistable = 0;
5489           if (TEST_BIT (hoist_vbeout[bb], i)
5490               && TEST_BIT (transpout[bb], i))
5491             {
5492               /* We've found a potentially hoistable expression, now
5493                  we look at every block BB dominates to see if it
5494                  computes the expression.  */
5495               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5496                 {
5497                   /* Ignore self dominance.  */
5498                   if (bb == dominated
5499                       || ! TEST_BIT (dominators[dominated], bb))
5500                     continue;
5501
5502                   /* We've found a dominated block, now see if it computes
5503                      the busy expression and whether or not moving that
5504                      expression to the "beginning" of that block is safe.  */
5505                   if (!TEST_BIT (antloc[dominated], i))
5506                     continue;
5507
5508                   /* Note if the expression would reach the dominated block
5509                      unimpared if it was placed at the end of BB.
5510
5511                      Keep track of how many times this expression is hoistable
5512                      from a dominated block into BB.  */
5513                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5514                     hoistable++;
5515                 }
5516
5517               /* If we found more than one hoistable occurence of this
5518                  expression, then note it in the bitmap of expressions to
5519                  hoist.  It makes no sense to hoist things which are computed
5520                  in only one BB, and doing so tends to pessimize register
5521                  allocation.  One could increase this value to try harder
5522                  to avoid any possible code expansion due to register
5523                  allocation issues; however experiments have shown that
5524                  the vast majority of hoistable expressions are only movable
5525                  from two successors, so raising this threshhold is likely
5526                  to nullify any benefit we get from code hoisting.  */
5527               if (hoistable > 1)
5528                 {
5529                   SET_BIT (hoist_exprs[bb], i);
5530                   found = 1;
5531                 }
5532             }
5533         }
5534
5535       /* If we found nothing to hoist, then quit now.  */
5536       if (! found)
5537         continue;
5538
5539       /* Loop over all the hoistable expressions.  */
5540       for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5541         {
5542           /* We want to insert the expression into BB only once, so
5543              note when we've inserted it.  */
5544           insn_inserted_p = 0;
5545
5546           /* These tests should be the same as the tests above.  */
5547           if (TEST_BIT (hoist_vbeout[bb], i))
5548             {
5549               /* We've found a potentially hoistable expression, now
5550                  we look at every block BB dominates to see if it
5551                  computes the expression.  */
5552               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5553                 {
5554                   /* Ignore self dominance.  */
5555                   if (bb == dominated
5556                       || ! TEST_BIT (dominators[dominated], bb))
5557                     continue;
5558
5559                   /* We've found a dominated block, now see if it computes
5560                      the busy expression and whether or not moving that
5561                      expression to the "beginning" of that block is safe.  */
5562                   if (!TEST_BIT (antloc[dominated], i))
5563                     continue;
5564
5565                   /* The expression is computed in the dominated block and
5566                      it would be safe to compute it at the start of the
5567                      dominated block.  Now we have to determine if the
5568                      expresion would reach the dominated block if it was
5569                      placed at the end of BB.  */
5570                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5571                     {
5572                       struct expr *expr = index_map[i];
5573                       struct occr *occr = expr->antic_occr;
5574                       rtx insn;
5575                       rtx set;
5576
5577
5578                       /* Find the right occurence of this expression.  */
5579                       while (BLOCK_NUM (occr->insn) != dominated && occr)
5580                         occr = occr->next;
5581
5582                       /* Should never happen.  */
5583                       if (!occr)
5584                         abort ();
5585
5586                       insn = occr->insn;
5587
5588                       set = single_set (insn);
5589                       if (! set)
5590                         abort ();
5591
5592                       /* Create a pseudo-reg to store the result of reaching
5593                          expressions into.  Get the mode for the new pseudo
5594                          from the mode of the original destination pseudo.  */
5595                       if (expr->reaching_reg == NULL)
5596                         expr->reaching_reg
5597                           = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5598
5599                       /* In theory this should never fail since we're creating
5600                          a reg->reg copy.
5601
5602                          However, on the x86 some of the movXX patterns actually
5603                          contain clobbers of scratch regs.  This may cause the
5604                          insn created by validate_change to not match any
5605                          pattern and thus cause validate_change to fail.   */
5606                       if (validate_change (insn, &SET_SRC (set),
5607                                            expr->reaching_reg, 0))
5608                         {
5609                           occr->deleted_p = 1;
5610                           if (!insn_inserted_p)
5611                             {
5612                               insert_insn_end_bb (index_map[i], bb, 0);
5613                               insn_inserted_p = 1;
5614                             }
5615                         }
5616                     }
5617                 }
5618             }
5619         }
5620     }
5621     free (index_map);
5622 }
5623
5624 /* Top level routine to perform one code hoisting (aka unification) pass
5625
5626    Return non-zero if a change was made.  */
5627
5628 static int
5629 one_code_hoisting_pass ()
5630 {
5631   int changed = 0;
5632
5633   alloc_expr_hash_table (max_cuid);
5634   compute_expr_hash_table ();
5635   if (gcse_file)
5636     dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5637                      expr_hash_table_size, n_exprs);
5638   if (n_exprs > 0)
5639     {
5640       alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5641       compute_code_hoist_data ();
5642       hoist_code ();
5643       free_code_hoist_mem ();
5644     }
5645   free_expr_hash_table ();
5646
5647   return changed;
5648 }