gcc/gcse.c

   1 /* Global common subexpression elimination/Partial redundancy elimination
   2    and global constant/copy propagation for GNU compiler.
   3    Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* TODO
  23    - reordering of memory allocation and freeing to be more space efficient
  24    - do rough calc of how many regs are needed in each block, and a rough
  25      calc of how many regs are available in each class and use that to
  26      throttle back the code in cases where RTX_COST is minimal.
  27    - dead store elimination
  28    - a store to the same address as a load does not kill the load if the
  29      source of the store is also the destination of the load.  Handling this
  30      allows more load motion, particularly out of loops.
  31    - ability to realloc sbitmap vectors would allow one initial computation
  32      of reg_set_in_block with only subsequent additions, rather than
  33      recomputing it for each pass
  34
  35 */
  36
  37 /* References searched while implementing this.
  38
  39    Compilers Principles, Techniques and Tools
  40    Aho, Sethi, Ullman
  41    Addison-Wesley, 1988
  42
  43    Global Optimization by Suppression of Partial Redundancies
  44    E. Morel, C. Renvoise
  45    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  46
  47    A Portable Machine-Independent Global Optimizer - Design and Measurements
  48    Frederick Chow
  49    Stanford Ph.D. thesis, Dec. 1983
  50
  51    A Fast Algorithm for Code Movement Optimization
  52    D.M. Dhamdhere
  53    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  54
  55    A Solution to a Problem with Morel and Renvoise's
  56    Global Optimization by Suppression of Partial Redundancies
  57    K-H Drechsler, M.P. Stadel
  58    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  59
  60    Practical Adaptation of the Global Optimization
  61    Algorithm of Morel and Renvoise
  62    D.M. Dhamdhere
  63    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  64
  65    Efficiently Computing Static Single Assignment Form and the Control
  66    Dependence Graph
  67    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  68    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  69
  70    Lazy Code Motion
  71    J. Knoop, O. Ruthing, B. Steffen
  72    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  73
  74    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  75    Time for Reducible Flow Control
  76    Thomas Ball
  77    ACM Letters on Programming Languages and Systems,
  78    Vol. 2, Num. 1-4, Mar-Dec 1993
  79
  80    An Efficient Representation for Sparse Sets
  81    Preston Briggs, Linda Torczon
  82    ACM Letters on Programming Languages and Systems,
  83    Vol. 2, Num. 1-4, Mar-Dec 1993
  84
  85    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  86    K-H Drechsler, M.P. Stadel
  87    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  88
  89    Partial Dead Code Elimination
  90    J. Knoop, O. Ruthing, B. Steffen
  91    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  92
  93    Effective Partial Redundancy Elimination
  94    P. Briggs, K.D. Cooper
  95    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  96
  97    The Program Structure Tree: Computing Control Regions in Linear Time
  98    R. Johnson, D. Pearson, K. Pingali
  99    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
 100
 101    Optimal Code Motion: Theory and Practice
 102    J. Knoop, O. Ruthing, B. Steffen
 103    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
 104
 105    The power of assignment motion
 106    J. Knoop, O. Ruthing, B. Steffen
 107    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 108
 109    Global code motion / global value numbering
 110    C. Click
 111    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 112
 113    Value Driven Redundancy Elimination
 114    L.T. Simpson
 115    Rice University Ph.D. thesis, Apr. 1996
 116
 117    Value Numbering
 118    L.T. Simpson
 119    Massively Scalar Compiler Project, Rice University, Sep. 1996
 120
 121    High Performance Compilers for Parallel Computing
 122    Michael Wolfe
 123    Addison-Wesley, 1996
 124
 125    Advanced Compiler Design and Implementation
 126    Steven Muchnick
 127    Morgan Kaufmann, 1997
 128
 129    Building an Optimizing Compiler
 130    Robert Morgan
 131    Digital Press, 1998
 132
 133    People wishing to speed up the code here should read:
 134      Elimination Algorithms for Data Flow Analysis
 135      B.G. Ryder, M.C. Paull
 136      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 137
 138      How to Analyze Large Programs Efficiently and Informatively
 139      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 140      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 141
 142    People wishing to do something different can find various possibilities
 143    in the above papers and elsewhere.
 144 */
 145
 146 #include "config.h"
 147 #include "system.h"
 148 #include "toplev.h"
 149
 150 #include "rtl.h"
 151 #include "tm_p.h"
 152 #include "regs.h"
 153 #include "hard-reg-set.h"
 154 #include "flags.h"
 155 #include "real.h"
 156 #include "insn-config.h"
 157 #include "recog.h"
 158 #include "basic-block.h"
 159 #include "output.h"
 160 #include "function.h"
 161 #include "expr.h"
 162
 163 #include "obstack.h"
 164 #define obstack_chunk_alloc gmalloc
 165 #define obstack_chunk_free free
 166
 167 /* Maximum number of passes to perform.  */
 168 #define MAX_PASSES 1
 169
 170 /* Propagate flow information through back edges and thus enable PRE's
 171    moving loop invariant calculations out of loops.
 172
 173    Originally this tended to create worse overall code, but several
 174    improvements during the development of PRE seem to have made following
 175    back edges generally a win.
 176
 177    Note much of the loop invariant code motion done here would normally
 178    be done by loop.c, which has more heuristics for when to move invariants
 179    out of loops.  At some point we might need to move some of those
 180    heuristics into gcse.c.  */
 181 #define FOLLOW_BACK_EDGES 1
 182
 183 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 184    are a superset of those done by GCSE.
 185
 186    We perform the following steps:
 187
 188    1) Compute basic block information.
 189
 190    2) Compute table of places where registers are set.
 191
 192    3) Perform copy/constant propagation.
 193
 194    4) Perform global cse.
 195
 196    5) Perform another pass of copy/constant propagation.
 197
 198    Two passes of copy/constant propagation are done because the first one
 199    enables more GCSE and the second one helps to clean up the copies that
 200    GCSE creates.  This is needed more for PRE than for Classic because Classic
 201    GCSE will try to use an existing register containing the common
 202    subexpression rather than create a new one.  This is harder to do for PRE
 203    because of the code motion (which Classic GCSE doesn't do).
 204
 205    Expressions we are interested in GCSE-ing are of the form
 206    (set (pseudo-reg) (expression)).
 207    Function want_to_gcse_p says what these are.
 208
 209    PRE handles moving invariant expressions out of loops (by treating them as
 210    partially redundant).
 211
 212    Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
 213    assignment) based GVN (global value numbering).  L. T. Simpson's paper
 214    (Rice University) on value numbering is a useful reference for this.
 215
 216    **********************
 217
 218    We used to support multiple passes but there are diminishing returns in
 219    doing so.  The first pass usually makes 90% of the changes that are doable.
 220    A second pass can make a few more changes made possible by the first pass.
 221    Experiments show any further passes don't make enough changes to justify
 222    the expense.
 223
 224    A study of spec92 using an unlimited number of passes:
 225    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 226    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 227    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 228
 229    It was found doing copy propagation between each pass enables further
 230    substitutions.
 231
 232    PRE is quite expensive in complicated functions because the DFA can take
 233    awhile to converge.  Hence we only perform one pass.  Macro MAX_PASSES can
 234    be modified if one wants to experiment.
 235
 236    **********************
 237
 238    The steps for PRE are:
 239
 240    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 241
 242    2) Perform the data flow analysis for PRE.
 243
 244    3) Delete the redundant instructions
 245
 246    4) Insert the required copies [if any] that make the partially
 247       redundant instructions fully redundant.
 248
 249    5) For other reaching expressions, insert an instruction to copy the value
 250       to a newly created pseudo that will reach the redundant instruction.
 251
 252    The deletion is done first so that when we do insertions we
 253    know which pseudo reg to use.
 254
 255    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 256    argue it is not.  The number of iterations for the algorithm to converge
 257    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 258
 259    PRE GCSE depends heavily on the second CSE pass to clean up the copies
 260    we create.  To make an expression reach the place where it's redundant,
 261    the result of the expression is copied to a new register, and the redundant
 262    expression is deleted by replacing it with this new register.  Classic GCSE
 263    doesn't have this problem as much as it computes the reaching defs of
 264    each register in each block and thus can try to use an existing register.
 265
 266    **********************
 267
 268    A fair bit of simplicity is created by creating small functions for simple
 269    tasks, even when the function is only called in one place.  This may
 270    measurably slow things down [or may not] by creating more function call
 271    overhead than is necessary.  The source is laid out so that it's trivial
 272    to make the affected functions inline so that one can measure what speed
 273    up, if any, can be achieved, and maybe later when things settle things can
 274    be rearranged.
 275
 276    Help stamp out big monolithic functions!  */
 277 \f
 278 /* GCSE global vars.  */
 279
 280 /* -dG dump file.  */
 281 static FILE *gcse_file;
 282
 283 /* Note whether or not we should run jump optimization after gcse.  We
 284    want to do this for two cases.
 285
 286     * If we changed any jumps via cprop.
 287
 288     * If we added any labels via edge splitting.  */
 289
 290 static int run_jump_opt_after_gcse;
 291
 292 /* Bitmaps are normally not included in debugging dumps.
 293    However it's useful to be able to print them from GDB.
 294    We could create special functions for this, but it's simpler to
 295    just allow passing stderr to the dump_foo fns.  Since stderr can
 296    be a macro, we store a copy here.  */
 297 static FILE *debug_stderr;
 298
 299 /* An obstack for our working variables.  */
 300 static struct obstack gcse_obstack;
 301
 302 /* Non-zero for each mode that supports (set (reg) (reg)).
 303    This is trivially true for integer and floating point values.
 304    It may or may not be true for condition codes.  */
 305 static char can_copy_p[(int) NUM_MACHINE_MODES];
 306
 307 /* Non-zero if can_copy_p has been initialized.  */
 308 static int can_copy_init_p;
 309
 310 struct reg_use {
 311   rtx reg_rtx;
 312 };
 313
 314 /* Hash table of expressions.  */
 315
 316 struct expr
 317 {
 318   /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
 319   rtx expr;
 320   /* Index in the available expression bitmaps.  */
 321   int bitmap_index;
 322   /* Next entry with the same hash.  */
 323   struct expr *next_same_hash;
 324   /* List of anticipatable occurrences in basic blocks in the function.
 325      An "anticipatable occurrence" is one that is the first occurrence in the
 326      basic block, the operands are not modified in the basic block prior
 327      to the occurrence and the output is not used between the start of
 328      the block and the occurrence.  */
 329   struct occr *antic_occr;
 330   /* List of available occurrence in basic blocks in the function.
 331      An "available occurrence" is one that is the last occurrence in the
 332      basic block and the operands are not modified by following statements in
 333      the basic block [including this insn].  */
 334   struct occr *avail_occr;
 335   /* Non-null if the computation is PRE redundant.
 336      The value is the newly created pseudo-reg to record a copy of the
 337      expression in all the places that reach the redundant copy.  */
 338   rtx reaching_reg;
 339 };
 340
 341 /* Occurrence of an expression.
 342    There is one per basic block.  If a pattern appears more than once the
 343    last appearance is used [or first for anticipatable expressions].  */
 344
 345 struct occr
 346 {
 347   /* Next occurrence of this expression.  */
 348   struct occr *next;
 349   /* The insn that computes the expression.  */
 350   rtx insn;
 351   /* Non-zero if this [anticipatable] occurrence has been deleted.  */
 352   char deleted_p;
 353   /* Non-zero if this [available] occurrence has been copied to
 354      reaching_reg.  */
 355   /* ??? This is mutually exclusive with deleted_p, so they could share
 356      the same byte.  */
 357   char copied_p;
 358 };
 359
 360 /* Expression and copy propagation hash tables.
 361    Each hash table is an array of buckets.
 362    ??? It is known that if it were an array of entries, structure elements
 363    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 364    not clear whether in the final analysis a sufficient amount of memory would
 365    be saved as the size of the available expression bitmaps would be larger
 366    [one could build a mapping table without holes afterwards though].
 367    Someday I'll perform the computation and figure it out.
 368 */
 369
 370 /* Total size of the expression hash table, in elements.  */
 371 static int expr_hash_table_size;
 372 /* The table itself.
 373    This is an array of `expr_hash_table_size' elements.  */
 374 static struct expr **expr_hash_table;
 375
 376 /* Total size of the copy propagation hash table, in elements.  */
 377 static int set_hash_table_size;
 378 /* The table itself.
 379    This is an array of `set_hash_table_size' elements.  */
 380 static struct expr **set_hash_table;
 381
 382 /* Mapping of uids to cuids.
 383    Only real insns get cuids.  */
 384 static int *uid_cuid;
 385
 386 /* Highest UID in UID_CUID.  */
 387 static int max_uid;
 388
 389 /* Get the cuid of an insn.  */
 390 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
 391
 392 /* Number of cuids.  */
 393 static int max_cuid;
 394
 395 /* Mapping of cuids to insns.  */
 396 static rtx *cuid_insn;
 397
 398 /* Get insn from cuid.  */
 399 #define CUID_INSN(CUID) (cuid_insn[CUID])
 400
 401 /* Maximum register number in function prior to doing gcse + 1.
 402    Registers created during this pass have regno >= max_gcse_regno.
 403    This is named with "gcse" to not collide with global of same name.  */
 404 static int max_gcse_regno;
 405
 406 /* Maximum number of cse-able expressions found.  */
 407 static int n_exprs;
 408 /* Maximum number of assignments for copy propagation found.  */
 409 static int n_sets;
 410
 411 /* Table of registers that are modified.
 412    For each register, each element is a list of places where the pseudo-reg
 413    is set.
 414
 415    For simplicity, GCSE is done on sets of pseudo-regs only.  PRE GCSE only
 416    requires knowledge of which blocks kill which regs [and thus could use
 417    a bitmap instead of the lists `reg_set_table' uses].
 418
 419    `reg_set_table' and could be turned into an array of bitmaps
 420    (num-bbs x num-regs)
 421    [however perhaps it may be useful to keep the data as is].
 422    One advantage of recording things this way is that `reg_set_table' is
 423    fairly sparse with respect to pseudo regs but for hard regs could be
 424    fairly dense [relatively speaking].
 425    And recording sets of pseudo-regs in lists speeds
 426    up functions like compute_transp since in the case of pseudo-regs we only
 427    need to iterate over the number of times a pseudo-reg is set, not over the
 428    number of basic blocks [clearly there is a bit of a slow down in the cases
 429    where a pseudo is set more than once in a block, however it is believed
 430    that the net effect is to speed things up].  This isn't done for hard-regs
 431    because recording call-clobbered hard-regs in `reg_set_table' at each
 432    function call can consume a fair bit of memory, and iterating over hard-regs
 433    stored this way in compute_transp will be more expensive.  */
 434
 435 typedef struct reg_set {
 436   /* The next setting of this register.  */
 437   struct reg_set *next;
 438   /* The insn where it was set.  */
 439   rtx insn;
 440 } reg_set;
 441 static reg_set **reg_set_table;
 442 /* Size of `reg_set_table'.
 443    The table starts out at max_gcse_regno + slop, and is enlarged as
 444    necessary.  */
 445 static int reg_set_table_size;
 446 /* Amount to grow `reg_set_table' by when it's full.  */
 447 #define REG_SET_TABLE_SLOP 100
 448
 449 /* Bitmap containing one bit for each register in the program.
 450    Used when performing GCSE to track which registers have been set since
 451    the start of the basic block.  */
 452 static sbitmap reg_set_bitmap;
 453
 454 /* For each block, a bitmap of registers set in the block.
 455    This is used by expr_killed_p and compute_transp.
 456    It is computed during hash table computation and not by compute_sets
 457    as it includes registers added since the last pass (or between cprop and
 458    gcse) and it's currently not easy to realloc sbitmap vectors.  */
 459 static sbitmap *reg_set_in_block;
 460
 461 /* For each block, non-zero if memory is set in that block.
 462    This is computed during hash table computation and is used by
 463    expr_killed_p and compute_transp.
 464    ??? Handling of memory is very simple, we don't make any attempt
 465    to optimize things (later).
 466    ??? This can be computed by compute_sets since the information
 467    doesn't change.  */
 468 static char *mem_set_in_block;
 469
 470 /* Various variables for statistics gathering.  */
 471
 472 /* Memory used in a pass.
 473    This isn't intended to be absolutely precise.  Its intent is only
 474    to keep an eye on memory usage.  */
 475 static int bytes_used;
 476 /* GCSE substitutions made.  */
 477 static int gcse_subst_count;
 478 /* Number of copy instructions created.  */
 479 static int gcse_create_count;
 480 /* Number of constants propagated.  */
 481 static int const_prop_count;
 482 /* Number of copys propagated.  */
 483 static int copy_prop_count;
 484 \f
 485 /* These variables are used by classic GCSE.
 486    Normally they'd be defined a bit later, but `rd_gen' needs to
 487    be declared sooner.  */
 488
 489 /* A bitmap of all ones for implementing the algorithm for available
 490    expressions and reaching definitions.  */
 491 /* ??? Available expression bitmaps have a different size than reaching
 492    definition bitmaps.  This should be the larger of the two, however, it
 493    is not currently used for reaching definitions.  */
 494 static sbitmap u_bitmap;
 495
 496 /* Each block has a bitmap of each type.
 497    The length of each blocks bitmap is:
 498
 499        max_cuid  - for reaching definitions
 500        n_exprs - for available expressions
 501
 502    Thus we view the bitmaps as 2 dimensional arrays.  i.e.
 503    rd_kill[block_num][cuid_num]
 504    ae_kill[block_num][expr_num]
 505 */
 506
 507 /* For reaching defs */
 508 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
 509
 510 /* for available exprs */
 511 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
 512
 513 /* Objects of this type are passed around by the null-pointer check
 514    removal routines.  */
 515 struct null_pointer_info {
 516   /* The basic block being processed.  */
 517   int current_block;
 518   /* The first register to be handled in this pass.  */
 519   int min_reg;
 520   /* One greater than the last register to be handled in this pass.  */
 521   int max_reg;
 522   sbitmap *nonnull_local;
 523   sbitmap *nonnull_killed;
 524 };
 525 \f
 526 static void compute_can_copy      PROTO ((void));
 527
 528 static char *gmalloc              PROTO ((unsigned int));
 529 static char *grealloc            PROTO ((char *, unsigned int));
 530 static char *gcse_alloc        PROTO ((unsigned long));
 531 static void alloc_gcse_mem          PROTO ((rtx));
 532 static void free_gcse_mem            PROTO ((void));
 533 static void alloc_reg_set_mem    PROTO ((int));
 534 static void free_reg_set_mem      PROTO ((void));
 535 static int get_bitmap_width           PROTO ((int, int, int));
 536 static void record_one_set          PROTO ((int, rtx));
 537 static void record_set_info        PROTO ((rtx, rtx, void *));
 538 static void compute_sets              PROTO ((rtx));
 539
 540 static void hash_scan_insn          PROTO ((rtx, int, int));
 541 static void hash_scan_set            PROTO ((rtx, rtx, int));
 542 static void hash_scan_clobber    PROTO ((rtx, rtx));
 543 static void hash_scan_call          PROTO ((rtx, rtx));
 544 static int want_to_gcse_p            PROTO ((rtx));
 545 static int oprs_unchanged_p        PROTO ((rtx, rtx, int));
 546 static int oprs_anticipatable_p       PROTO ((rtx, rtx));
 547 static int oprs_available_p        PROTO ((rtx, rtx));
 548 static void insert_expr_in_table      PROTO ((rtx, enum machine_mode,
 549                                               rtx, int, int));
 550 static void insert_set_in_table       PROTO ((rtx, rtx));
 551 static unsigned int hash_expr    PROTO ((rtx, enum machine_mode,
 552                                          int *, int));
 553 static unsigned int hash_expr_1       PROTO ((rtx, enum machine_mode, int *));
 554 static unsigned int hash_set      PROTO ((int, int));
 555 static int expr_equiv_p        PROTO ((rtx, rtx));
 556 static void record_last_reg_set_info  PROTO ((rtx, int));
 557 static void record_last_mem_set_info  PROTO ((rtx));
 558 static void record_last_set_info      PROTO ((rtx, rtx, void *));
 559 static void compute_hash_table  PROTO ((int));
 560 static void alloc_set_hash_table      PROTO ((int));
 561 static void free_set_hash_table       PROTO ((void));
 562 static void compute_set_hash_table    PROTO ((void));
 563 static void alloc_expr_hash_table     PROTO ((int));
 564 static void free_expr_hash_table      PROTO ((void));
 565 static void compute_expr_hash_table   PROTO ((void));
 566 static void dump_hash_table        PROTO ((FILE *, const char *, struct expr **,
 567                                            int, int));
 568 static struct expr *lookup_expr       PROTO ((rtx));
 569 static struct expr *lookup_set  PROTO ((int, rtx));
 570 static struct expr *next_set      PROTO ((int, struct expr *));
 571 static void reset_opr_set_tables      PROTO ((void));
 572 static int oprs_not_set_p            PROTO ((rtx, rtx));
 573 static void mark_call            PROTO ((rtx));
 574 static void mark_set              PROTO ((rtx, rtx));
 575 static void mark_clobber              PROTO ((rtx, rtx));
 576 static void mark_oprs_set            PROTO ((rtx));
 577
 578 static void alloc_cprop_mem        PROTO ((int, int));
 579 static void free_cprop_mem          PROTO ((void));
 580 static void compute_transp          PROTO ((rtx, int, sbitmap *, int));
 581 static void compute_transpout       PROTO ((void));
 582 static void compute_local_properties  PROTO ((sbitmap *, sbitmap *,
 583                                               sbitmap *, int));
 584 static void compute_cprop_data  PROTO ((void));
 585 static void find_used_regs          PROTO ((rtx));
 586 static int try_replace_reg          PROTO ((rtx, rtx, rtx));
 587 static struct expr *find_avail_set    PROTO ((int, rtx));
 588 static int cprop_jump                   PROTO((rtx, rtx, struct reg_use *, rtx));
 589 #ifdef HAVE_cc0
 590 static int cprop_cc0_jump               PROTO((rtx, struct reg_use *, rtx));
 591 #endif
 592 static int cprop_insn            PROTO ((rtx, int));
 593 static int cprop                      PROTO ((int));
 594 static int one_cprop_pass            PROTO ((int, int));
 595
 596 static void alloc_pre_mem            PROTO ((int, int));
 597 static void free_pre_mem              PROTO ((void));
 598 static void compute_pre_data      PROTO ((void));
 599 static int pre_expr_reaches_here_p    PROTO ((int, struct expr *,
 600                                               int, int));
 601 static void insert_insn_end_bb  PROTO ((struct expr *, int, int));
 602 static void pre_insert_copy_insn      PROTO ((struct expr *, rtx));
 603 static void pre_insert_copies    PROTO ((void));
 604 static int pre_delete            PROTO ((void));
 605 static int pre_gcse                PROTO ((void));
 606 static int one_pre_gcse_pass      PROTO ((int));
 607
 608 static void add_label_notes           PROTO ((rtx, rtx));
 609
 610 static void alloc_code_hoist_mem        PROTO ((int, int));
 611 static void free_code_hoist_mem         PROTO ((void));
 612 static void compute_code_hoist_vbeinout PROTO ((void));
 613 static void compute_code_hoist_data     PROTO ((void));
 614 static int hoist_expr_reaches_here_p    PROTO ((int, int, int, char *));
 615 static void hoist_code                  PROTO ((void));
 616 static int one_code_hoisting_pass       PROTO ((void));
 617
 618 static void alloc_rd_mem              PROTO ((int, int));
 619 static void free_rd_mem        PROTO ((void));
 620 static void handle_rd_kill_set  PROTO ((rtx, int, int));
 621 static void compute_kill_rd        PROTO ((void));
 622 static void compute_rd          PROTO ((void));
 623 static void alloc_avail_expr_mem      PROTO ((int, int));
 624 static void free_avail_expr_mem       PROTO ((void));
 625 static void compute_ae_gen          PROTO ((void));
 626 static int expr_killed_p              PROTO ((rtx, int));
 627 static void compute_ae_kill        PROTO ((sbitmap *, sbitmap *));
 628 static int expr_reaches_here_p  PROTO ((struct occr *, struct expr *,
 629                                               int, int));
 630 static rtx computing_insn            PROTO ((struct expr *, rtx));
 631 static int def_reaches_here_p    PROTO ((rtx, rtx));
 632 static int can_disregard_other_sets   PROTO ((struct reg_set **, rtx, int));
 633 static int handle_avail_expr      PROTO ((rtx, struct expr *));
 634 static int classic_gcse        PROTO ((void));
 635 static int one_classic_gcse_pass      PROTO ((int));
 636 static void invalidate_nonnull_info     PROTO ((rtx, rtx, void *));
 637 static void delete_null_pointer_checks_1 PROTO ((int_list_ptr *, int *,
 638                                                  sbitmap *, sbitmap *,
 639                                                  struct null_pointer_info *));
 640 static rtx process_insert_insn  PROTO ((struct expr *));
 641 static int pre_edge_insert      PROTO ((struct edge_list *, struct expr **));
 642 static int expr_reaches_here_p_work     PROTO ((struct occr *, struct expr *, int, int, char *));
 643 static int pre_expr_reaches_here_p_work PROTO ((int, struct expr *, int, int, char *));
 644 \f
 645 /* Entry point for global common subexpression elimination.
 646    F is the first instruction in the function.  */
 647
 648 int
 649 gcse_main (f, file)
 650      rtx f;
 651      FILE *file;
 652 {
 653   int changed, pass;
 654   /* Bytes used at start of pass.  */
 655   int initial_bytes_used;
 656   /* Maximum number of bytes used by a pass.  */
 657   int max_pass_bytes;
 658   /* Point to release obstack data from for each pass.  */
 659   char *gcse_obstack_bottom;
 660
 661   /* We do not construct an accurate cfg in functions which call
 662      setjmp, so just punt to be safe.  */
 663   if (current_function_calls_setjmp)
 664     return 0;
 665
 666   /* Assume that we do not need to run jump optimizations after gcse.  */
 667   run_jump_opt_after_gcse = 0;
 668
 669   /* For calling dump_foo fns from gdb.  */
 670   debug_stderr = stderr;
 671   gcse_file = file;
 672
 673   /* Identify the basic block information for this function, including
 674      successors and predecessors.  */
 675   max_gcse_regno = max_reg_num ();
 676   find_basic_blocks (f, max_gcse_regno, file, 1);
 677
 678   if (file)
 679     dump_flow_info (file);
 680
 681   /* Return if there's nothing to do.  */
 682   if (n_basic_blocks <= 1)
 683     {
 684       /* Free storage allocated by find_basic_blocks.  */
 685       free_basic_block_vars (0);
 686       return 0;
 687     }
 688
 689   /* Trying to perform global optimizations on flow graphs which have
 690      a high connectivity will take a long time and is unlikely to be
 691      particularly useful.
 692
 693      In normal circumstances a cfg should have about twice has many edges
 694      as blocks.  But we do not want to punish small functions which have
 695      a couple switch statements.  So we require a relatively large number
 696      of basic blocks and the ratio of edges to blocks to be high.  */
 697   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
 698     {
 699       /* Free storage allocated by find_basic_blocks.  */
 700       free_basic_block_vars (0);
 701       return 0;
 702     }
 703
 704   /* See what modes support reg/reg copy operations.  */
 705   if (! can_copy_init_p)
 706     {
 707       compute_can_copy ();
 708       can_copy_init_p = 1;
 709     }
 710
 711   gcc_obstack_init (&gcse_obstack);
 712   bytes_used = 0;
 713
 714   /* Record where pseudo-registers are set.
 715      This data is kept accurate during each pass.
 716      ??? We could also record hard-reg information here
 717      [since it's unchanging], however it is currently done during
 718      hash table computation.
 719
 720      It may be tempting to compute MEM set information here too, but MEM
 721      sets will be subject to code motion one day and thus we need to compute
 722      information about memory sets when we build the hash tables.  */
 723
 724   alloc_reg_set_mem (max_gcse_regno);
 725   compute_sets (f);
 726
 727   pass = 0;
 728   initial_bytes_used = bytes_used;
 729   max_pass_bytes = 0;
 730   gcse_obstack_bottom = gcse_alloc (1);
 731   changed = 1;
 732   while (changed && pass < MAX_PASSES)
 733     {
 734       changed = 0;
 735       if (file)
 736         fprintf (file, "GCSE pass %d\n\n", pass + 1);
 737
 738       /* Initialize bytes_used to the space for the pred/succ lists,
 739          and the reg_set_table data.  */
 740       bytes_used = initial_bytes_used;
 741
 742       /* Each pass may create new registers, so recalculate each time.  */
 743       max_gcse_regno = max_reg_num ();
 744
 745       alloc_gcse_mem (f);
 746
 747       /* Don't allow constant propagation to modify jumps
 748          during this pass.  */
 749       changed = one_cprop_pass (pass + 1, 0);
 750
 751       if (optimize_size)
 752         changed |= one_classic_gcse_pass (pass + 1);
 753       else
 754         {
 755           changed |= one_pre_gcse_pass (pass + 1);
 756           free_reg_set_mem ();
 757           alloc_reg_set_mem (max_reg_num ());
 758           compute_sets (f);
 759           run_jump_opt_after_gcse = 1;
 760         }
 761
 762       if (max_pass_bytes < bytes_used)
 763         max_pass_bytes = bytes_used;
 764
 765       /* Free up memory, then reallocate for code hoisting.  We can
 766          not re-use the existing allocated memory because the tables
 767          will not have info for the insns or registers created by
 768          partial redundancy elimination.  */
 769       free_gcse_mem ();
 770
 771       /* It does not make sense to run code hoisting unless we optimizing
 772          for code size -- it rarely makes programs faster, and can make
 773          them bigger if we did partial redundancy elimination (when optimizing
 774          for space, we use a classic gcse algorithm instead of partial
 775          redundancy algorithms).  */
 776       if (optimize_size)
 777         {
 778           max_gcse_regno = max_reg_num ();
 779           alloc_gcse_mem (f);
 780           changed |= one_code_hoisting_pass ();
 781           free_gcse_mem ();
 782
 783           if (max_pass_bytes < bytes_used)
 784             max_pass_bytes = bytes_used;
 785         }
 786
 787       if (file)
 788         {
 789           fprintf (file, "\n");
 790           fflush (file);
 791         }
 792       obstack_free (&gcse_obstack, gcse_obstack_bottom);
 793       pass++;
 794     }
 795
 796   /* Do one last pass of copy propagation, including cprop into
 797      conditional jumps.  */
 798
 799   max_gcse_regno = max_reg_num ();
 800   alloc_gcse_mem (f);
 801   /* This time, go ahead and allow cprop to alter jumps.  */
 802   one_cprop_pass (pass + 1, 1);
 803   free_gcse_mem ();
 804
 805   if (file)
 806     {
 807       fprintf (file, "GCSE of %s: %d basic blocks, ",
 808                current_function_name, n_basic_blocks);
 809       fprintf (file, "%d pass%s, %d bytes\n\n",
 810                pass, pass > 1 ? "es" : "", max_pass_bytes);
 811     }
 812
 813   /* Free our obstack.  */
 814   obstack_free (&gcse_obstack, NULL_PTR);
 815   /* Free reg_set_table.  */
 816   free_reg_set_mem ();
 817   /* Free storage used to record predecessor/successor data.  */
 818   free_bb_mem ();
 819   /* Free storage allocated by find_basic_blocks.  */
 820   free_basic_block_vars (0);
 821   return run_jump_opt_after_gcse;
 822 }
 823 \f
 824 /* Misc. utilities.  */
 825
 826 /* Compute which modes support reg/reg copy operations.  */
 827
 828 static void
 829 compute_can_copy ()
 830 {
 831   int i;
 832 #ifndef AVOID_CCMODE_COPIES
 833   rtx reg,insn;
 834 #endif
 835   char *free_point = (char *) oballoc (1);
 836
 837   bzero (can_copy_p, NUM_MACHINE_MODES);
 838
 839   start_sequence ();
 840   for (i = 0; i < NUM_MACHINE_MODES; i++)
 841     {
 842       switch (GET_MODE_CLASS (i))
 843         {
 844         case MODE_CC :
 845 #ifdef AVOID_CCMODE_COPIES
 846           can_copy_p[i] = 0;
 847 #else
 848           reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 849           insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 850           if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
 851             can_copy_p[i] = 1;
 852 #endif
 853           break;
 854         default :
 855           can_copy_p[i] = 1;
 856           break;
 857         }
 858     }
 859   end_sequence ();
 860
 861   /* Free the objects we just allocated.  */
 862   obfree (free_point);
 863 }
 864 \f
 865 /* Cover function to xmalloc to record bytes allocated.  */
 866
 867 static char *
 868 gmalloc (size)
 869      unsigned int size;
 870 {
 871   bytes_used += size;
 872   return xmalloc (size);
 873 }
 874
 875 /* Cover function to xrealloc.
 876    We don't record the additional size since we don't know it.
 877    It won't affect memory usage stats much anyway.  */
 878
 879 static char *
 880 grealloc (ptr, size)
 881      char *ptr;
 882      unsigned int size;
 883 {
 884   return xrealloc (ptr, size);
 885 }
 886
 887 /* Cover function to obstack_alloc.
 888    We don't need to record the bytes allocated here since
 889    obstack_chunk_alloc is set to gmalloc.  */
 890
 891 static char *
 892 gcse_alloc (size)
 893      unsigned long size;
 894 {
 895   return (char *) obstack_alloc (&gcse_obstack, size);
 896 }
 897
 898 /* Allocate memory for the cuid mapping array,
 899    and reg/memory set tracking tables.
 900
 901    This is called at the start of each pass.  */
 902
 903 static void
 904 alloc_gcse_mem (f)
 905      rtx f;
 906 {
 907   int i,n;
 908   rtx insn;
 909
 910   /* Find the largest UID and create a mapping from UIDs to CUIDs.
 911      CUIDs are like UIDs except they increase monotonically, have no gaps,
 912      and only apply to real insns.  */
 913
 914   max_uid = get_max_uid ();
 915   n = (max_uid + 1) * sizeof (int);
 916   uid_cuid = (int *) gmalloc (n);
 917   bzero ((char *) uid_cuid, n);
 918   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 919     {
 920       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
 921         INSN_CUID (insn) = i++;
 922       else
 923         INSN_CUID (insn) = i;
 924     }
 925
 926   /* Create a table mapping cuids to insns.  */
 927
 928   max_cuid = i;
 929   n = (max_cuid + 1) * sizeof (rtx);
 930   cuid_insn = (rtx *) gmalloc (n);
 931   bzero ((char *) cuid_insn, n);
 932   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 933     {
 934       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
 935         {
 936           CUID_INSN (i) = insn;
 937           i++;
 938         }
 939     }
 940
 941   /* Allocate vars to track sets of regs.  */
 942
 943   reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
 944
 945   /* Allocate vars to track sets of regs, memory per block.  */
 946
 947   reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
 948                                                        max_gcse_regno);
 949   mem_set_in_block = (char *) gmalloc (n_basic_blocks);
 950 }
 951
 952 /* Free memory allocated by alloc_gcse_mem.  */
 953
 954 static void
 955 free_gcse_mem ()
 956 {
 957   free (uid_cuid);
 958   free (cuid_insn);
 959
 960   free (reg_set_bitmap);
 961
 962   free (reg_set_in_block);
 963   free (mem_set_in_block);
 964 }
 965
 966 /* Many of the global optimization algorithms work by solving dataflow
 967    equations for various expressions.  Initially, some local value is
 968    computed for each expression in each block.  Then, the values
 969    across the various blocks are combined (by following flow graph
 970    edges) to arrive at global values.  Conceptually, each set of
 971    equations is independent.  We may therefore solve all the equations
 972    in parallel, solve them one at a time, or pick any intermediate
 973    approach.
 974
 975    When you're going to need N two-dimensional bitmaps, each X (say,
 976    the number of blocks) by Y (say, the number of expressions), call
 977    this function.  It's not important what X and Y represent; only
 978    that Y correspond to the things that can be done in parallel.  This
 979    function will return an appropriate chunking factor C; you should
 980    solve C sets of equations in parallel.  By going through this
 981    function, we can easily trade space against time; by solving fewer
 982    equations in parallel we use less space.  */
 983
 984 static int
 985 get_bitmap_width (n, x, y)
 986      int n;
 987      int x;
 988      int y;
 989 {
 990   /* It's not really worth figuring out *exactly* how much memory will
 991      be used by a particular choice.  The important thing is to get
 992      something approximately right.  */
 993   size_t max_bitmap_memory = 10 * 1024 * 1024;
 994
 995   /* The number of bytes we'd use for a single column of minimum
 996      width.  */
 997   size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
 998
 999   /* Often, it's reasonable just to solve all the equations in
1000      parallel.  */
1001   if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
1002     return y;
1003
1004   /* Otherwise, pick the largest width we can, without going over the
1005      limit.  */
1006   return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
1007                              / column_size);
1008 }
1009
1010 \f
1011 /* Compute the local properties of each recorded expression.
1012    Local properties are those that are defined by the block, irrespective
1013    of other blocks.
1014
1015    An expression is transparent in a block if its operands are not modified
1016    in the block.
1017
1018    An expression is computed (locally available) in a block if it is computed
1019    at least once and expression would contain the same value if the
1020    computation was moved to the end of the block.
1021
1022    An expression is locally anticipatable in a block if it is computed at
1023    least once and expression would contain the same value if the computation
1024    was moved to the beginning of the block.
1025
1026    We call this routine for cprop, pre and code hoisting.  They all
1027    compute basically the same information and thus can easily share
1028    this code.
1029
1030    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording
1031    local properties.  If NULL, then it is not necessary to compute
1032    or record that particular property.
1033
1034    SETP controls which hash table to look at.  If zero, this routine
1035    looks at the expr hash table; if nonzero this routine looks at
1036    the set hash table.  Additionally, TRANSP is computed as ~TRANSP,
1037    since this is really cprop's ABSALTERED.  */
1038
1039 static void
1040 compute_local_properties (transp, comp, antloc, setp)
1041      sbitmap *transp;
1042      sbitmap *comp;
1043      sbitmap *antloc;
1044      int setp;
1045 {
1046   int i, hash_table_size;
1047   struct expr **hash_table;
1048
1049   /* Initialize any bitmaps that were passed in.  */
1050   if (transp)
1051     {
1052       if (setp)
1053         sbitmap_vector_zero (transp, n_basic_blocks);
1054       else
1055         sbitmap_vector_ones (transp, n_basic_blocks);
1056     }
1057   if (comp)
1058     sbitmap_vector_zero (comp, n_basic_blocks);
1059   if (antloc)
1060     sbitmap_vector_zero (antloc, n_basic_blocks);
1061
1062   /* We use the same code for cprop, pre and hoisting.  For cprop
1063      we care about the set hash table, for pre and hoisting we
1064      care about the expr hash table.  */
1065   hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1066   hash_table = setp ? set_hash_table : expr_hash_table;
1067
1068   for (i = 0; i < hash_table_size; i++)
1069     {
1070       struct expr *expr;
1071
1072       for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1073         {
1074           struct occr *occr;
1075           int indx = expr->bitmap_index;
1076
1077           /* The expression is transparent in this block if it is not killed.
1078              We start by assuming all are transparent [none are killed], and
1079              then reset the bits for those that are.  */
1080
1081           if (transp)
1082             compute_transp (expr->expr, indx, transp, setp);
1083
1084           /* The occurrences recorded in antic_occr are exactly those that
1085              we want to set to non-zero in ANTLOC.  */
1086
1087           if (antloc)
1088             {
1089               for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1090                 {
1091                   int bb = BLOCK_NUM (occr->insn);
1092                   SET_BIT (antloc[bb], indx);
1093
1094                   /* While we're scanning the table, this is a good place to
1095                      initialize this.  */
1096                   occr->deleted_p = 0;
1097                 }
1098             }
1099
1100           /* The occurrences recorded in avail_occr are exactly those that
1101              we want to set to non-zero in COMP.  */
1102           if (comp)
1103             {
1104
1105               for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1106                 {
1107                   int bb = BLOCK_NUM (occr->insn);
1108                   SET_BIT (comp[bb], indx);
1109
1110                   /* While we're scanning the table, this is a good place to
1111                      initialize this.  */
1112                   occr->copied_p = 0;
1113                 }
1114             }
1115
1116           /* While we're scanning the table, this is a good place to
1117              initialize this.  */
1118           expr->reaching_reg = 0;
1119         }
1120     }
1121 }
1122
1123 \f
1124 /* Register set information.
1125
1126    `reg_set_table' records where each register is set or otherwise
1127    modified.  */
1128
1129 static struct obstack reg_set_obstack;
1130
1131 static void
1132 alloc_reg_set_mem (n_regs)
1133      int n_regs;
1134 {
1135   int n;
1136
1137   reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1138   n = reg_set_table_size * sizeof (struct reg_set *);
1139   reg_set_table = (struct reg_set **) gmalloc (n);
1140   bzero ((char *) reg_set_table, n);
1141
1142   gcc_obstack_init (&reg_set_obstack);
1143 }
1144
1145 static void
1146 free_reg_set_mem ()
1147 {
1148   free (reg_set_table);
1149   obstack_free (&reg_set_obstack, NULL_PTR);
1150 }
1151
1152 /* Record REGNO in the reg_set table.  */
1153
1154 static void
1155 record_one_set (regno, insn)
1156      int regno;
1157      rtx insn;
1158 {
1159   /* allocate a new reg_set element and link it onto the list */
1160   struct reg_set *new_reg_info, *reg_info_ptr1, *reg_info_ptr2;
1161
1162   /* If the table isn't big enough, enlarge it.  */
1163   if (regno >= reg_set_table_size)
1164     {
1165       int new_size = regno + REG_SET_TABLE_SLOP;
1166       reg_set_table = (struct reg_set **)
1167         grealloc ((char *) reg_set_table,
1168                   new_size * sizeof (struct reg_set *));
1169       bzero ((char *) (reg_set_table + reg_set_table_size),
1170              (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1171       reg_set_table_size = new_size;
1172     }
1173
1174   new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1175                                                    sizeof (struct reg_set));
1176   bytes_used += sizeof (struct reg_set);
1177   new_reg_info->insn = insn;
1178   new_reg_info->next = NULL;
1179   if (reg_set_table[regno] == NULL)
1180     reg_set_table[regno] = new_reg_info;
1181   else
1182     {
1183       reg_info_ptr1 = reg_info_ptr2 = reg_set_table[regno];
1184       /* ??? One could keep a "last" pointer to speed this up.  */
1185       while (reg_info_ptr1 != NULL)
1186         {
1187           reg_info_ptr2 = reg_info_ptr1;
1188           reg_info_ptr1 = reg_info_ptr1->next;
1189         }
1190       reg_info_ptr2->next = new_reg_info;
1191     }
1192 }
1193
1194 /* Called from compute_sets via note_stores to handle one
1195    SET or CLOBBER in an insn.  The DATA is really the instruction
1196    in which the SET is occurring.  */
1197
1198 static void
1199 record_set_info (dest, setter, data)
1200      rtx dest, setter ATTRIBUTE_UNUSED;
1201      void *data;
1202 {
1203   rtx record_set_insn = (rtx) data;
1204
1205   if (GET_CODE (dest) == SUBREG)
1206     dest = SUBREG_REG (dest);
1207
1208   if (GET_CODE (dest) == REG)
1209     {
1210       if (REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1211         record_one_set (REGNO (dest), record_set_insn);
1212     }
1213 }
1214
1215 /* Scan the function and record each set of each pseudo-register.
1216
1217    This is called once, at the start of the gcse pass.
1218    See the comments for `reg_set_table' for further docs.  */
1219
1220 static void
1221 compute_sets (f)
1222      rtx f;
1223 {
1224   rtx insn = f;
1225
1226   while (insn)
1227     {
1228       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
1229         note_stores (PATTERN (insn), record_set_info, insn);
1230       insn = NEXT_INSN (insn);
1231     }
1232 }
1233 \f
1234 /* Hash table support.  */
1235
1236 #define NEVER_SET -1
1237
1238 /* For each register, the cuid of the first/last insn in the block to set it,
1239    or -1 if not set.  */
1240 static int *reg_first_set;
1241 static int *reg_last_set;
1242
1243 /* While computing "first/last set" info, this is the CUID of first/last insn
1244    to set memory or -1 if not set.  `mem_last_set' is also used when
1245    performing GCSE to record whether memory has been set since the beginning
1246    of the block.
1247    Note that handling of memory is very simple, we don't make any attempt
1248    to optimize things (later).  */
1249 static int mem_first_set;
1250 static int mem_last_set;
1251
1252 /* Perform a quick check whether X, the source of a set, is something
1253    we want to consider for GCSE.  */
1254
1255 static int
1256 want_to_gcse_p (x)
1257      rtx x;
1258 {
1259   enum rtx_code code = GET_CODE (x);
1260
1261   switch (code)
1262     {
1263     case REG:
1264     case SUBREG:
1265     case CONST_INT:
1266     case CONST_DOUBLE:
1267     case CALL:
1268       return 0;
1269
1270     default:
1271       break;
1272     }
1273
1274   return 1;
1275 }
1276
1277 /* Return non-zero if the operands of expression X are unchanged from the
1278    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1279    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
1280
1281 static int
1282 oprs_unchanged_p (x, insn, avail_p)
1283      rtx x, insn;
1284      int avail_p;
1285 {
1286   int i;
1287   enum rtx_code code;
1288   const char *fmt;
1289
1290   /* repeat is used to turn tail-recursion into iteration.  */
1291  repeat:
1292
1293   if (x == 0)
1294     return 1;
1295
1296   code = GET_CODE (x);
1297   switch (code)
1298     {
1299     case REG:
1300       if (avail_p)
1301         return (reg_last_set[REGNO (x)] == NEVER_SET
1302                 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1303       else
1304         return (reg_first_set[REGNO (x)] == NEVER_SET
1305                 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1306
1307     case MEM:
1308       if (avail_p)
1309         {
1310           if (mem_last_set != NEVER_SET
1311               && mem_last_set >= INSN_CUID (insn))
1312             return 0;
1313         }
1314       else
1315         {
1316           if (mem_first_set != NEVER_SET
1317               && mem_first_set < INSN_CUID (insn))
1318             return 0;
1319         }
1320       x = XEXP (x, 0);
1321       goto repeat;
1322
1323     case PRE_DEC:
1324     case PRE_INC:
1325     case POST_DEC:
1326     case POST_INC:
1327       return 0;
1328
1329     case PC:
1330     case CC0: /*FIXME*/
1331     case CONST:
1332     case CONST_INT:
1333     case CONST_DOUBLE:
1334     case SYMBOL_REF:
1335     case LABEL_REF:
1336     case ADDR_VEC:
1337     case ADDR_DIFF_VEC:
1338       return 1;
1339
1340     default:
1341       break;
1342     }
1343
1344   i = GET_RTX_LENGTH (code) - 1;
1345   fmt = GET_RTX_FORMAT (code);
1346   for (; i >= 0; i--)
1347     {
1348       if (fmt[i] == 'e')
1349         {
1350           rtx tem = XEXP (x, i);
1351
1352           /* If we are about to do the last recursive call
1353              needed at this level, change it into iteration.
1354              This function is called enough to be worth it.  */
1355           if (i == 0)
1356             {
1357               x = tem;
1358               goto repeat;
1359             }
1360           if (! oprs_unchanged_p (tem, insn, avail_p))
1361             return 0;
1362         }
1363       else if (fmt[i] == 'E')
1364         {
1365           int j;
1366           for (j = 0; j < XVECLEN (x, i); j++)
1367             {
1368               if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1369                 return 0;
1370             }
1371         }
1372     }
1373
1374   return 1;
1375 }
1376
1377 /* Return non-zero if the operands of expression X are unchanged from
1378    the start of INSN's basic block up to but not including INSN.  */
1379
1380 static int
1381 oprs_anticipatable_p (x, insn)
1382      rtx x, insn;
1383 {
1384   return oprs_unchanged_p (x, insn, 0);
1385 }
1386
1387 /* Return non-zero if the operands of expression X are unchanged from
1388    INSN to the end of INSN's basic block.  */
1389
1390 static int
1391 oprs_available_p (x, insn)
1392      rtx x, insn;
1393 {
1394   return oprs_unchanged_p (x, insn, 1);
1395 }
1396
1397 /* Hash expression X.
1398    MODE is only used if X is a CONST_INT.
1399    A boolean indicating if a volatile operand is found or if the expression
1400    contains something we don't want to insert in the table is stored in
1401    DO_NOT_RECORD_P.
1402
1403    ??? One might want to merge this with canon_hash.  Later.  */
1404
1405 static unsigned int
1406 hash_expr (x, mode, do_not_record_p, hash_table_size)
1407      rtx x;
1408      enum machine_mode mode;
1409      int *do_not_record_p;
1410      int hash_table_size;
1411 {
1412   unsigned int hash;
1413
1414   *do_not_record_p = 0;
1415
1416   hash = hash_expr_1 (x, mode, do_not_record_p);
1417   return hash % hash_table_size;
1418 }
1419
1420 /* Subroutine of hash_expr to do the actual work.  */
1421
1422 static unsigned int
1423 hash_expr_1 (x, mode, do_not_record_p)
1424      rtx x;
1425      enum machine_mode mode;
1426      int *do_not_record_p;
1427 {
1428   int i, j;
1429   unsigned hash = 0;
1430   enum rtx_code code;
1431   const char *fmt;
1432
1433   /* repeat is used to turn tail-recursion into iteration.  */
1434  repeat:
1435
1436   if (x == 0)
1437     return hash;
1438
1439   code = GET_CODE (x);
1440   switch (code)
1441     {
1442     case REG:
1443       {
1444         register int regno = REGNO (x);
1445         hash += ((unsigned) REG << 7) + regno;
1446         return hash;
1447       }
1448
1449     case CONST_INT:
1450       {
1451         unsigned HOST_WIDE_INT tem = INTVAL (x);
1452         hash += ((unsigned) CONST_INT << 7) + (unsigned) mode + tem;
1453         return hash;
1454       }
1455
1456     case CONST_DOUBLE:
1457       /* This is like the general case, except that it only counts
1458          the integers representing the constant.  */
1459       hash += (unsigned) code + (unsigned) GET_MODE (x);
1460       if (GET_MODE (x) != VOIDmode)
1461         for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1462           {
1463             unsigned tem = XWINT (x, i);
1464             hash += tem;
1465           }
1466       else
1467         hash += ((unsigned) CONST_DOUBLE_LOW (x)
1468                  + (unsigned) CONST_DOUBLE_HIGH (x));
1469       return hash;
1470
1471       /* Assume there is only one rtx object for any given label.  */
1472     case LABEL_REF:
1473       /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1474          differences and differences between each stage's debugging dumps.  */
1475       hash += ((unsigned) LABEL_REF << 7) + CODE_LABEL_NUMBER (XEXP (x, 0));
1476       return hash;
1477
1478     case SYMBOL_REF:
1479       {
1480         /* Don't hash on the symbol's address to avoid bootstrap differences.
1481            Different hash values may cause expressions to be recorded in
1482            different orders and thus different registers to be used in the
1483            final assembler.  This also avoids differences in the dump files
1484            between various stages.  */
1485         unsigned int h = 0;
1486         unsigned char *p = (unsigned char *) XSTR (x, 0);
1487         while (*p)
1488           h += (h << 7) + *p++; /* ??? revisit */
1489         hash += ((unsigned) SYMBOL_REF << 7) + h;
1490         return hash;
1491       }
1492
1493     case MEM:
1494       if (MEM_VOLATILE_P (x))
1495         {
1496           *do_not_record_p = 1;
1497           return 0;
1498         }
1499       hash += (unsigned) MEM;
1500       hash += MEM_ALIAS_SET (x);
1501       x = XEXP (x, 0);
1502       goto repeat;
1503
1504     case PRE_DEC:
1505     case PRE_INC:
1506     case POST_DEC:
1507     case POST_INC:
1508     case PC:
1509     case CC0:
1510     case CALL:
1511     case UNSPEC_VOLATILE:
1512       *do_not_record_p = 1;
1513       return 0;
1514
1515     case ASM_OPERANDS:
1516       if (MEM_VOLATILE_P (x))
1517         {
1518           *do_not_record_p = 1;
1519           return 0;
1520         }
1521
1522     default:
1523       break;
1524     }
1525
1526   i = GET_RTX_LENGTH (code) - 1;
1527   hash += (unsigned) code + (unsigned) GET_MODE (x);
1528   fmt = GET_RTX_FORMAT (code);
1529   for (; i >= 0; i--)
1530     {
1531       if (fmt[i] == 'e')
1532         {
1533           rtx tem = XEXP (x, i);
1534
1535           /* If we are about to do the last recursive call
1536              needed at this level, change it into iteration.
1537              This function is called enough to be worth it.  */
1538           if (i == 0)
1539             {
1540               x = tem;
1541               goto repeat;
1542             }
1543           hash += hash_expr_1 (tem, 0, do_not_record_p);
1544           if (*do_not_record_p)
1545             return 0;
1546         }
1547       else if (fmt[i] == 'E')
1548         for (j = 0; j < XVECLEN (x, i); j++)
1549           {
1550             hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1551             if (*do_not_record_p)
1552               return 0;
1553           }
1554       else if (fmt[i] == 's')
1555         {
1556           register unsigned char *p = (unsigned char *) XSTR (x, i);
1557           if (p)
1558             while (*p)
1559               hash += *p++;
1560         }
1561       else if (fmt[i] == 'i')
1562         {
1563           register unsigned tem = XINT (x, i);
1564           hash += tem;
1565         }
1566       else
1567         abort ();
1568     }
1569
1570   return hash;
1571 }
1572
1573 /* Hash a set of register REGNO.
1574
1575    Sets are hashed on the register that is set.
1576    This simplifies the PRE copy propagation code.
1577
1578    ??? May need to make things more elaborate.  Later, as necessary.  */
1579
1580 static unsigned int
1581 hash_set (regno, hash_table_size)
1582      int regno;
1583      int hash_table_size;
1584 {
1585   unsigned int hash;
1586
1587   hash = regno;
1588   return hash % hash_table_size;
1589 }
1590
1591 /* Return non-zero if exp1 is equivalent to exp2.
1592    ??? Borrowed from cse.c.  Might want to remerge with cse.c.  Later.  */
1593
1594 static int
1595 expr_equiv_p (x, y)
1596      rtx x, y;
1597 {
1598   register int i, j;
1599   register enum rtx_code code;
1600   register const char *fmt;
1601
1602   if (x == y)
1603     return 1;
1604   if (x == 0 || y == 0)
1605     return x == y;
1606
1607   code = GET_CODE (x);
1608   if (code != GET_CODE (y))
1609     return 0;
1610
1611   /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.  */
1612   if (GET_MODE (x) != GET_MODE (y))
1613     return 0;
1614
1615   switch (code)
1616     {
1617     case PC:
1618     case CC0:
1619       return x == y;
1620
1621     case CONST_INT:
1622       return INTVAL (x) == INTVAL (y);
1623
1624     case LABEL_REF:
1625       return XEXP (x, 0) == XEXP (y, 0);
1626
1627     case SYMBOL_REF:
1628       return XSTR (x, 0) == XSTR (y, 0);
1629
1630     case REG:
1631       return REGNO (x) == REGNO (y);
1632
1633     case MEM:
1634       /* Can't merge two expressions in different alias sets, since we can
1635          decide that the expression is transparent in a block when it isn't,
1636          due to it being set with the different alias set.  */
1637       if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1638         return 0;
1639       break;
1640
1641     /*  For commutative operations, check both orders.  */
1642     case PLUS:
1643     case MULT:
1644     case AND:
1645     case IOR:
1646     case XOR:
1647     case NE:
1648     case EQ:
1649       return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1650                && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1651               || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1652                   && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1653
1654     default:
1655       break;
1656     }
1657
1658   /* Compare the elements.  If any pair of corresponding elements
1659      fail to match, return 0 for the whole thing.  */
1660
1661   fmt = GET_RTX_FORMAT (code);
1662   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1663     {
1664       switch (fmt[i])
1665         {
1666         case 'e':
1667           if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1668             return 0;
1669           break;
1670
1671         case 'E':
1672           if (XVECLEN (x, i) != XVECLEN (y, i))
1673             return 0;
1674           for (j = 0; j < XVECLEN (x, i); j++)
1675             if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1676               return 0;
1677           break;
1678
1679         case 's':
1680           if (strcmp (XSTR (x, i), XSTR (y, i)))
1681             return 0;
1682           break;
1683
1684         case 'i':
1685           if (XINT (x, i) != XINT (y, i))
1686             return 0;
1687           break;
1688
1689         case 'w':
1690           if (XWINT (x, i) != XWINT (y, i))
1691             return 0;
1692         break;
1693
1694         case '0':
1695           break;
1696
1697         default:
1698           abort ();
1699         }
1700       }
1701
1702   return 1;
1703 }
1704
1705 /* Insert expression X in INSN in the hash table.
1706    If it is already present, record it as the last occurrence in INSN's
1707    basic block.
1708
1709    MODE is the mode of the value X is being stored into.
1710    It is only used if X is a CONST_INT.
1711
1712    ANTIC_P is non-zero if X is an anticipatable expression.
1713    AVAIL_P is non-zero if X is an available expression.  */
1714
1715 static void
1716 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1717      rtx x;
1718      enum machine_mode mode;
1719      rtx insn;
1720      int antic_p, avail_p;
1721 {
1722   int found, do_not_record_p;
1723   unsigned int hash;
1724   struct expr *cur_expr, *last_expr = NULL;
1725   struct occr *antic_occr, *avail_occr;
1726   struct occr *last_occr = NULL;
1727
1728   hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1729
1730   /* Do not insert expression in table if it contains volatile operands,
1731      or if hash_expr determines the expression is something we don't want
1732      to or can't handle.  */
1733   if (do_not_record_p)
1734     return;
1735
1736   cur_expr = expr_hash_table[hash];
1737   found = 0;
1738
1739   while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1740     {
1741       /* If the expression isn't found, save a pointer to the end of
1742          the list.  */
1743       last_expr = cur_expr;
1744       cur_expr = cur_expr->next_same_hash;
1745     }
1746
1747   if (! found)
1748     {
1749       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1750       bytes_used += sizeof (struct expr);
1751       if (expr_hash_table[hash] == NULL)
1752         {
1753           /* This is the first pattern that hashed to this index.  */
1754           expr_hash_table[hash] = cur_expr;
1755         }
1756       else
1757         {
1758           /* Add EXPR to end of this hash chain.  */
1759           last_expr->next_same_hash = cur_expr;
1760         }
1761       /* Set the fields of the expr element.  */
1762       cur_expr->expr = x;
1763       cur_expr->bitmap_index = n_exprs++;
1764       cur_expr->next_same_hash = NULL;
1765       cur_expr->antic_occr = NULL;
1766       cur_expr->avail_occr = NULL;
1767     }
1768
1769   /* Now record the occurrence(s).  */
1770
1771   if (antic_p)
1772     {
1773       antic_occr = cur_expr->antic_occr;
1774
1775       /* Search for another occurrence in the same basic block.  */
1776       while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1777         {
1778           /* If an occurrence isn't found, save a pointer to the end of
1779              the list.  */
1780           last_occr = antic_occr;
1781           antic_occr = antic_occr->next;
1782         }
1783
1784       if (antic_occr)
1785         {
1786           /* Found another instance of the expression in the same basic block.
1787              Prefer the currently recorded one.  We want the first one in the
1788              block and the block is scanned from start to end.  */
1789           ; /* nothing to do */
1790         }
1791       else
1792         {
1793           /* First occurrence of this expression in this basic block.  */
1794           antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1795           bytes_used += sizeof (struct occr);
1796           /* First occurrence of this expression in any block?  */
1797           if (cur_expr->antic_occr == NULL)
1798             cur_expr->antic_occr = antic_occr;
1799           else
1800             last_occr->next = antic_occr;
1801           antic_occr->insn = insn;
1802           antic_occr->next = NULL;
1803         }
1804     }
1805
1806   if (avail_p)
1807     {
1808       avail_occr = cur_expr->avail_occr;
1809
1810       /* Search for another occurrence in the same basic block.  */
1811       while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1812         {
1813           /* If an occurrence isn't found, save a pointer to the end of
1814              the list.  */
1815           last_occr = avail_occr;
1816           avail_occr = avail_occr->next;
1817         }
1818
1819       if (avail_occr)
1820         {
1821           /* Found another instance of the expression in the same basic block.
1822              Prefer this occurrence to the currently recorded one.  We want
1823              the last one in the block and the block is scanned from start
1824              to end.  */
1825           avail_occr->insn = insn;
1826         }
1827       else
1828         {
1829           /* First occurrence of this expression in this basic block.  */
1830           avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1831           bytes_used += sizeof (struct occr);
1832           /* First occurrence of this expression in any block?  */
1833           if (cur_expr->avail_occr == NULL)
1834             cur_expr->avail_occr = avail_occr;
1835           else
1836             last_occr->next = avail_occr;
1837           avail_occr->insn = insn;
1838           avail_occr->next = NULL;
1839         }
1840     }
1841 }
1842
1843 /* Insert pattern X in INSN in the hash table.
1844    X is a SET of a reg to either another reg or a constant.
1845    If it is already present, record it as the last occurrence in INSN's
1846    basic block.  */
1847
1848 static void
1849 insert_set_in_table (x, insn)
1850      rtx x;
1851      rtx insn;
1852 {
1853   int found;
1854   unsigned int hash;
1855   struct expr *cur_expr, *last_expr = NULL;
1856   struct occr *cur_occr, *last_occr = NULL;
1857
1858   if (GET_CODE (x) != SET
1859       || GET_CODE (SET_DEST (x)) != REG)
1860     abort ();
1861
1862   hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1863
1864   cur_expr = set_hash_table[hash];
1865   found = 0;
1866
1867   while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1868     {
1869       /* If the expression isn't found, save a pointer to the end of
1870          the list.  */
1871       last_expr = cur_expr;
1872       cur_expr = cur_expr->next_same_hash;
1873     }
1874
1875   if (! found)
1876     {
1877       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1878       bytes_used += sizeof (struct expr);
1879       if (set_hash_table[hash] == NULL)
1880         {
1881           /* This is the first pattern that hashed to this index.  */
1882           set_hash_table[hash] = cur_expr;
1883         }
1884       else
1885         {
1886           /* Add EXPR to end of this hash chain.  */
1887           last_expr->next_same_hash = cur_expr;
1888         }
1889       /* Set the fields of the expr element.
1890          We must copy X because it can be modified when copy propagation is
1891          performed on its operands.  */
1892       /* ??? Should this go in a different obstack?  */
1893       cur_expr->expr = copy_rtx (x);
1894       cur_expr->bitmap_index = n_sets++;
1895       cur_expr->next_same_hash = NULL;
1896       cur_expr->antic_occr = NULL;
1897       cur_expr->avail_occr = NULL;
1898     }
1899
1900   /* Now record the occurrence.  */
1901
1902   cur_occr = cur_expr->avail_occr;
1903
1904   /* Search for another occurrence in the same basic block.  */
1905   while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1906     {
1907       /* If an occurrence isn't found, save a pointer to the end of
1908          the list.  */
1909       last_occr = cur_occr;
1910       cur_occr = cur_occr->next;
1911     }
1912
1913   if (cur_occr)
1914     {
1915       /* Found another instance of the expression in the same basic block.
1916          Prefer this occurrence to the currently recorded one.  We want
1917          the last one in the block and the block is scanned from start
1918          to end.  */
1919       cur_occr->insn = insn;
1920     }
1921   else
1922     {
1923       /* First occurrence of this expression in this basic block.  */
1924       cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1925       bytes_used += sizeof (struct occr);
1926       /* First occurrence of this expression in any block?  */
1927       if (cur_expr->avail_occr == NULL)
1928         cur_expr->avail_occr = cur_occr;
1929       else
1930         last_occr->next = cur_occr;
1931       cur_occr->insn = insn;
1932       cur_occr->next = NULL;
1933     }
1934 }
1935
1936 /* Scan pattern PAT of INSN and add an entry to the hash table.
1937    If SET_P is non-zero, this is for the assignment hash table,
1938    otherwise it is for the expression hash table.  */
1939
1940 static void
1941 hash_scan_set (pat, insn, set_p)
1942      rtx pat, insn;
1943      int set_p;
1944 {
1945   rtx src = SET_SRC (pat);
1946   rtx dest = SET_DEST (pat);
1947
1948   if (GET_CODE (src) == CALL)
1949     hash_scan_call (src, insn);
1950
1951   if (GET_CODE (dest) == REG)
1952     {
1953       int regno = REGNO (dest);
1954       rtx tmp;
1955
1956       /* Only record sets of pseudo-regs in the hash table.  */
1957       if (! set_p
1958           && regno >= FIRST_PSEUDO_REGISTER
1959           /* Don't GCSE something if we can't do a reg/reg copy.  */
1960           && can_copy_p [GET_MODE (dest)]
1961           /* Is SET_SRC something we want to gcse?  */
1962           && want_to_gcse_p (src))
1963         {
1964           /* An expression is not anticipatable if its operands are
1965              modified before this insn.  */
1966           int antic_p = oprs_anticipatable_p (src, insn);
1967           /* An expression is not available if its operands are
1968              subsequently modified, including this insn.  */
1969           int avail_p = oprs_available_p (src, insn);
1970           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1971         }
1972       /* Record sets for constant/copy propagation.  */
1973       else if (set_p
1974                && regno >= FIRST_PSEUDO_REGISTER
1975                && ((GET_CODE (src) == REG
1976                     && REGNO (src) >= FIRST_PSEUDO_REGISTER
1977                     && can_copy_p [GET_MODE (dest)])
1978                    || GET_CODE (src) == CONST_INT
1979                    || GET_CODE (src) == SYMBOL_REF
1980                    || GET_CODE (src) == CONST_DOUBLE)
1981                /* A copy is not available if its src or dest is subsequently
1982                   modified.  Here we want to search from INSN+1 on, but
1983                   oprs_available_p searches from INSN on.  */
1984                && (insn == BLOCK_END (BLOCK_NUM (insn))
1985                    || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1986                        && oprs_available_p (pat, tmp))))
1987         insert_set_in_table (pat, insn);
1988     }
1989 }
1990
1991 static void
1992 hash_scan_clobber (x, insn)
1993      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1994 {
1995   /* Currently nothing to do.  */
1996 }
1997
1998 static void
1999 hash_scan_call (x, insn)
2000      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
2001 {
2002   /* Currently nothing to do.  */
2003 }
2004
2005 /* Process INSN and add hash table entries as appropriate.
2006
2007    Only available expressions that set a single pseudo-reg are recorded.
2008
2009    Single sets in a PARALLEL could be handled, but it's an extra complication
2010    that isn't dealt with right now.  The trick is handling the CLOBBERs that
2011    are also in the PARALLEL.  Later.
2012
2013    If SET_P is non-zero, this is for the assignment hash table,
2014    otherwise it is for the expression hash table.
2015    If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
2016    not record any expressions.  */
2017
2018 static void
2019 hash_scan_insn (insn, set_p, in_libcall_block)
2020      rtx insn;
2021      int set_p;
2022      int in_libcall_block;
2023 {
2024   rtx pat = PATTERN (insn);
2025
2026   /* Pick out the sets of INSN and for other forms of instructions record
2027      what's been modified.  */
2028
2029   if (GET_CODE (pat) == SET && ! in_libcall_block)
2030     {
2031       /* Ignore obvious no-ops.  */
2032       if (SET_SRC (pat) != SET_DEST (pat))
2033         hash_scan_set (pat, insn, set_p);
2034     }
2035   else if (GET_CODE (pat) == PARALLEL)
2036     {
2037       int i;
2038
2039       for (i = 0; i < XVECLEN (pat, 0); i++)
2040         {
2041           rtx x = XVECEXP (pat, 0, i);
2042
2043           if (GET_CODE (x) == SET)
2044             {
2045               if (GET_CODE (SET_SRC (x)) == CALL)
2046                 hash_scan_call (SET_SRC (x), insn);
2047             }
2048           else if (GET_CODE (x) == CLOBBER)
2049             hash_scan_clobber (x, insn);
2050           else if (GET_CODE (x) == CALL)
2051             hash_scan_call (x, insn);
2052         }
2053     }
2054   else if (GET_CODE (pat) == CLOBBER)
2055     hash_scan_clobber (pat, insn);
2056   else if (GET_CODE (pat) == CALL)
2057     hash_scan_call (pat, insn);
2058 }
2059
2060 static void
2061 dump_hash_table (file, name, table, table_size, total_size)
2062      FILE *file;
2063      const char *name;
2064      struct expr **table;
2065      int table_size, total_size;
2066 {
2067   int i;
2068   /* Flattened out table, so it's printed in proper order.  */
2069   struct expr **flat_table;
2070   unsigned int *hash_val;
2071
2072   flat_table
2073     = (struct expr **) xcalloc (total_size, sizeof (struct expr *));
2074   hash_val = (unsigned int *) xmalloc (total_size * sizeof (unsigned int));
2075
2076   for (i = 0; i < table_size; i++)
2077     {
2078       struct expr *expr;
2079
2080       for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
2081         {
2082           flat_table[expr->bitmap_index] = expr;
2083           hash_val[expr->bitmap_index] = i;
2084         }
2085     }
2086
2087   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2088            name, table_size, total_size);
2089
2090   for (i = 0; i < total_size; i++)
2091     {
2092       struct expr *expr = flat_table[i];
2093
2094       fprintf (file, "Index %d (hash value %d)\n  ",
2095                expr->bitmap_index, hash_val[i]);
2096       print_rtl (file, expr->expr);
2097       fprintf (file, "\n");
2098     }
2099
2100   fprintf (file, "\n");
2101
2102   /* Clean up.  */
2103   free (flat_table);
2104   free (hash_val);
2105 }
2106
2107 /* Record register first/last/block set information for REGNO in INSN.
2108    reg_first_set records the first place in the block where the register
2109    is set and is used to compute "anticipatability".
2110    reg_last_set records the last place in the block where the register
2111    is set and is used to compute "availability".
2112    reg_set_in_block records whether the register is set in the block
2113    and is used to compute "transparency".  */
2114
2115 static void
2116 record_last_reg_set_info (insn, regno)
2117      rtx insn;
2118      int regno;
2119 {
2120   if (reg_first_set[regno] == NEVER_SET)
2121     reg_first_set[regno] = INSN_CUID (insn);
2122   reg_last_set[regno] = INSN_CUID (insn);
2123   SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2124 }
2125
2126 /* Record memory first/last/block set information for INSN.  */
2127
2128 static void
2129 record_last_mem_set_info (insn)
2130      rtx insn;
2131 {
2132   if (mem_first_set == NEVER_SET)
2133     mem_first_set = INSN_CUID (insn);
2134   mem_last_set = INSN_CUID (insn);
2135   mem_set_in_block[BLOCK_NUM (insn)] = 1;
2136 }
2137
2138 /* Called from compute_hash_table via note_stores to handle one
2139    SET or CLOBBER in an insn.  DATA is really the instruction in which
2140    the SET is taking place.  */
2141
2142 static void
2143 record_last_set_info (dest, setter, data)
2144      rtx dest, setter ATTRIBUTE_UNUSED;
2145      void *data;
2146 {
2147   rtx last_set_insn = (rtx) data;
2148
2149   if (GET_CODE (dest) == SUBREG)
2150     dest = SUBREG_REG (dest);
2151
2152   if (GET_CODE (dest) == REG)
2153     record_last_reg_set_info (last_set_insn, REGNO (dest));
2154   else if (GET_CODE (dest) == MEM
2155            /* Ignore pushes, they clobber nothing.  */
2156            && ! push_operand (dest, GET_MODE (dest)))
2157     record_last_mem_set_info (last_set_insn);
2158 }
2159
2160 /* Top level function to create an expression or assignment hash table.
2161
2162    Expression entries are placed in the hash table if
2163    - they are of the form (set (pseudo-reg) src),
2164    - src is something we want to perform GCSE on,
2165    - none of the operands are subsequently modified in the block
2166
2167    Assignment entries are placed in the hash table if
2168    - they are of the form (set (pseudo-reg) src),
2169    - src is something we want to perform const/copy propagation on,
2170    - none of the operands or target are subsequently modified in the block
2171    Currently src must be a pseudo-reg or a const_int.
2172
2173    F is the first insn.
2174    SET_P is non-zero for computing the assignment hash table.  */
2175
2176 static void
2177 compute_hash_table (set_p)
2178      int set_p;
2179 {
2180   int bb;
2181
2182   /* While we compute the hash table we also compute a bit array of which
2183      registers are set in which blocks.
2184      We also compute which blocks set memory, in the absence of aliasing
2185      support [which is TODO].
2186      ??? This isn't needed during const/copy propagation, but it's cheap to
2187      compute.  Later.  */
2188   sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2189   bzero ((char *) mem_set_in_block, n_basic_blocks);
2190
2191   /* Some working arrays used to track first and last set in each block.  */
2192   /* ??? One could use alloca here, but at some size a threshold is crossed
2193      beyond which one should use malloc.  Are we at that threshold here?  */
2194   reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2195   reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2196
2197   for (bb = 0; bb < n_basic_blocks; bb++)
2198     {
2199       rtx insn;
2200       int regno;
2201       int in_libcall_block;
2202       int i;
2203
2204       /* First pass over the instructions records information used to
2205          determine when registers and memory are first and last set.
2206          ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2207          could be moved to compute_sets since they currently don't change.  */
2208
2209       for (i = 0; i < max_gcse_regno; i++)
2210         reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2211       mem_first_set = NEVER_SET;
2212       mem_last_set = NEVER_SET;
2213
2214       for (insn = BLOCK_HEAD (bb);
2215            insn && insn != NEXT_INSN (BLOCK_END (bb));
2216            insn = NEXT_INSN (insn))
2217         {
2218 #ifdef NON_SAVING_SETJMP
2219           if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2220               && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2221             {
2222               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2223                 record_last_reg_set_info (insn, regno);
2224               continue;
2225             }
2226 #endif
2227
2228           if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
2229             continue;
2230
2231           if (GET_CODE (insn) == CALL_INSN)
2232             {
2233               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2234                 if ((call_used_regs[regno]
2235                      && regno != STACK_POINTER_REGNUM
2236 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2237                      && regno != HARD_FRAME_POINTER_REGNUM
2238 #endif
2239 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2240                      && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2241 #endif
2242 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2243                      && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2244 #endif
2245
2246                      && regno != FRAME_POINTER_REGNUM)
2247                     || global_regs[regno])
2248                   record_last_reg_set_info (insn, regno);
2249               if (! CONST_CALL_P (insn))
2250                 record_last_mem_set_info (insn);
2251             }
2252
2253           note_stores (PATTERN (insn), record_last_set_info, insn);
2254         }
2255
2256       /* The next pass builds the hash table.  */
2257
2258       for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2259            insn && insn != NEXT_INSN (BLOCK_END (bb));
2260            insn = NEXT_INSN (insn))
2261         {
2262           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
2263             {
2264               if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2265                 in_libcall_block = 1;
2266               else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2267                 in_libcall_block = 0;
2268               hash_scan_insn (insn, set_p, in_libcall_block);
2269             }
2270         }
2271     }
2272
2273   free (reg_first_set);
2274   free (reg_last_set);
2275   /* Catch bugs early.  */
2276   reg_first_set = reg_last_set = 0;
2277 }
2278
2279 /* Allocate space for the set hash table.
2280    N_INSNS is the number of instructions in the function.
2281    It is used to determine the number of buckets to use.  */
2282
2283 static void
2284 alloc_set_hash_table (n_insns)
2285      int n_insns;
2286 {
2287   int n;
2288
2289   set_hash_table_size = n_insns / 4;
2290   if (set_hash_table_size < 11)
2291     set_hash_table_size = 11;
2292   /* Attempt to maintain efficient use of hash table.
2293      Making it an odd number is simplest for now.
2294      ??? Later take some measurements.  */
2295   set_hash_table_size |= 1;
2296   n = set_hash_table_size * sizeof (struct expr *);
2297   set_hash_table = (struct expr **) gmalloc (n);
2298 }
2299
2300 /* Free things allocated by alloc_set_hash_table.  */
2301
2302 static void
2303 free_set_hash_table ()
2304 {
2305   free (set_hash_table);
2306 }
2307
2308 /* Compute the hash table for doing copy/const propagation.  */
2309
2310 static void
2311 compute_set_hash_table ()
2312 {
2313   /* Initialize count of number of entries in hash table.  */
2314   n_sets = 0;
2315   bzero ((char *) set_hash_table, set_hash_table_size * sizeof (struct expr *));
2316
2317   compute_hash_table (1);
2318 }
2319
2320 /* Allocate space for the expression hash table.
2321    N_INSNS is the number of instructions in the function.
2322    It is used to determine the number of buckets to use.  */
2323
2324 static void
2325 alloc_expr_hash_table (n_insns)
2326      int n_insns;
2327 {
2328   int n;
2329
2330   expr_hash_table_size = n_insns / 2;
2331   /* Make sure the amount is usable.  */
2332   if (expr_hash_table_size < 11)
2333     expr_hash_table_size = 11;
2334   /* Attempt to maintain efficient use of hash table.
2335      Making it an odd number is simplest for now.
2336      ??? Later take some measurements.  */
2337   expr_hash_table_size |= 1;
2338   n = expr_hash_table_size * sizeof (struct expr *);
2339   expr_hash_table = (struct expr **) gmalloc (n);
2340 }
2341
2342 /* Free things allocated by alloc_expr_hash_table.  */
2343
2344 static void
2345 free_expr_hash_table ()
2346 {
2347   free (expr_hash_table);
2348 }
2349
2350 /* Compute the hash table for doing GCSE.  */
2351
2352 static void
2353 compute_expr_hash_table ()
2354 {
2355   /* Initialize count of number of entries in hash table.  */
2356   n_exprs = 0;
2357   bzero ((char *) expr_hash_table, expr_hash_table_size * sizeof (struct expr *));
2358
2359   compute_hash_table (0);
2360 }
2361 \f
2362 /* Expression tracking support.  */
2363
2364 /* Lookup pattern PAT in the expression table.
2365    The result is a pointer to the table entry, or NULL if not found.  */
2366
2367 static struct expr *
2368 lookup_expr (pat)
2369      rtx pat;
2370 {
2371   int do_not_record_p;
2372   unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2373                                  expr_hash_table_size);
2374   struct expr *expr;
2375
2376   if (do_not_record_p)
2377     return NULL;
2378
2379   expr = expr_hash_table[hash];
2380
2381   while (expr && ! expr_equiv_p (expr->expr, pat))
2382     expr = expr->next_same_hash;
2383
2384   return expr;
2385 }
2386
2387 /* Lookup REGNO in the set table.
2388    If PAT is non-NULL look for the entry that matches it, otherwise return
2389    the first entry for REGNO.
2390    The result is a pointer to the table entry, or NULL if not found.  */
2391
2392 static struct expr *
2393 lookup_set (regno, pat)
2394      int regno;
2395      rtx pat;
2396 {
2397   unsigned int hash = hash_set (regno, set_hash_table_size);
2398   struct expr *expr;
2399
2400   expr = set_hash_table[hash];
2401
2402   if (pat)
2403     {
2404       while (expr && ! expr_equiv_p (expr->expr, pat))
2405         expr = expr->next_same_hash;
2406     }
2407   else
2408     {
2409       while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2410         expr = expr->next_same_hash;
2411     }
2412
2413   return expr;
2414 }
2415
2416 /* Return the next entry for REGNO in list EXPR.  */
2417
2418 static struct expr *
2419 next_set (regno, expr)
2420      int regno;
2421      struct expr *expr;
2422 {
2423   do
2424     expr = expr->next_same_hash;
2425   while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2426   return expr;
2427 }
2428
2429 /* Reset tables used to keep track of what's still available [since the
2430    start of the block].  */
2431
2432 static void
2433 reset_opr_set_tables ()
2434 {
2435   /* Maintain a bitmap of which regs have been set since beginning of
2436      the block.  */
2437   sbitmap_zero (reg_set_bitmap);
2438   /* Also keep a record of the last instruction to modify memory.
2439      For now this is very trivial, we only record whether any memory
2440      location has been modified.  */
2441   mem_last_set = 0;
2442 }
2443
2444 /* Return non-zero if the operands of X are not set before INSN in
2445    INSN's basic block.  */
2446
2447 static int
2448 oprs_not_set_p (x, insn)
2449      rtx x, insn;
2450 {
2451   int i;
2452   enum rtx_code code;
2453   const char *fmt;
2454
2455   /* repeat is used to turn tail-recursion into iteration.  */
2456 repeat:
2457
2458   if (x == 0)
2459     return 1;
2460
2461   code = GET_CODE (x);
2462   switch (code)
2463     {
2464     case PC:
2465     case CC0:
2466     case CONST:
2467     case CONST_INT:
2468     case CONST_DOUBLE:
2469     case SYMBOL_REF:
2470     case LABEL_REF:
2471     case ADDR_VEC:
2472     case ADDR_DIFF_VEC:
2473       return 1;
2474
2475     case MEM:
2476       if (mem_last_set != 0)
2477         return 0;
2478       x = XEXP (x, 0);
2479       goto repeat;
2480
2481     case REG:
2482       return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2483
2484     default:
2485       break;
2486     }
2487
2488   fmt = GET_RTX_FORMAT (code);
2489   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2490     {
2491       if (fmt[i] == 'e')
2492         {
2493           int not_set_p;
2494           /* If we are about to do the last recursive call
2495              needed at this level, change it into iteration.
2496              This function is called enough to be worth it.  */
2497           if (i == 0)
2498             {
2499               x = XEXP (x, 0);
2500               goto repeat;
2501             }
2502           not_set_p = oprs_not_set_p (XEXP (x, i), insn);
2503           if (! not_set_p)
2504             return 0;
2505         }
2506       else if (fmt[i] == 'E')
2507         {
2508           int j;
2509           for (j = 0; j < XVECLEN (x, i); j++)
2510             {
2511               int not_set_p = oprs_not_set_p (XVECEXP (x, i, j), insn);
2512               if (! not_set_p)
2513                 return 0;
2514             }
2515         }
2516     }
2517
2518   return 1;
2519 }
2520
2521 /* Mark things set by a CALL.  */
2522
2523 static void
2524 mark_call (insn)
2525      rtx insn;
2526 {
2527   mem_last_set = INSN_CUID (insn);
2528 }
2529
2530 /* Mark things set by a SET.  */
2531
2532 static void
2533 mark_set (pat, insn)
2534      rtx pat, insn;
2535 {
2536   rtx dest = SET_DEST (pat);
2537
2538   while (GET_CODE (dest) == SUBREG
2539          || GET_CODE (dest) == ZERO_EXTRACT
2540          || GET_CODE (dest) == SIGN_EXTRACT
2541          || GET_CODE (dest) == STRICT_LOW_PART)
2542     dest = XEXP (dest, 0);
2543
2544   if (GET_CODE (dest) == REG)
2545     SET_BIT (reg_set_bitmap, REGNO (dest));
2546   else if (GET_CODE (dest) == MEM)
2547     mem_last_set = INSN_CUID (insn);
2548
2549   if (GET_CODE (SET_SRC (pat)) == CALL)
2550     mark_call (insn);
2551 }
2552
2553 /* Record things set by a CLOBBER.  */
2554
2555 static void
2556 mark_clobber (pat, insn)
2557      rtx pat, insn;
2558 {
2559   rtx clob = XEXP (pat, 0);
2560
2561   while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2562     clob = XEXP (clob, 0);
2563
2564   if (GET_CODE (clob) == REG)
2565     SET_BIT (reg_set_bitmap, REGNO (clob));
2566   else
2567     mem_last_set = INSN_CUID (insn);
2568 }
2569
2570 /* Record things set by INSN.
2571    This data is used by oprs_not_set_p.  */
2572
2573 static void
2574 mark_oprs_set (insn)
2575      rtx insn;
2576 {
2577   rtx pat = PATTERN (insn);
2578
2579   if (GET_CODE (pat) == SET)
2580     mark_set (pat, insn);
2581   else if (GET_CODE (pat) == PARALLEL)
2582     {
2583       int i;
2584
2585       for (i = 0; i < XVECLEN (pat, 0); i++)
2586         {
2587           rtx x = XVECEXP (pat, 0, i);
2588
2589           if (GET_CODE (x) == SET)
2590             mark_set (x, insn);
2591           else if (GET_CODE (x) == CLOBBER)
2592             mark_clobber (x, insn);
2593           else if (GET_CODE (x) == CALL)
2594             mark_call (insn);
2595         }
2596     }
2597   else if (GET_CODE (pat) == CLOBBER)
2598     mark_clobber (pat, insn);
2599   else if (GET_CODE (pat) == CALL)
2600     mark_call (insn);
2601 }
2602
2603 \f
2604 /* Classic GCSE reaching definition support.  */
2605
2606 /* Allocate reaching def variables.  */
2607
2608 static void
2609 alloc_rd_mem (n_blocks, n_insns)
2610      int n_blocks, n_insns;
2611 {
2612   rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2613   sbitmap_vector_zero (rd_kill, n_basic_blocks);
2614
2615   rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2616   sbitmap_vector_zero (rd_gen, n_basic_blocks);
2617
2618   reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2619   sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2620
2621   rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2622   sbitmap_vector_zero (rd_out, n_basic_blocks);
2623 }
2624
2625 /* Free reaching def variables.  */
2626
2627 static void
2628 free_rd_mem ()
2629 {
2630   free (rd_kill);
2631   free (rd_gen);
2632   free (reaching_defs);
2633   free (rd_out);
2634 }
2635
2636 /* Add INSN to the kills of BB.
2637    REGNO, set in BB, is killed by INSN.  */
2638
2639 static void
2640 handle_rd_kill_set (insn, regno, bb)
2641      rtx insn;
2642      int regno, bb;
2643 {
2644   struct reg_set *this_reg = reg_set_table[regno];
2645
2646   while (this_reg)
2647     {
2648       if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2649         SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2650       this_reg = this_reg->next;
2651     }
2652 }
2653
2654 /* Compute the set of kill's for reaching definitions.  */
2655
2656 static void
2657 compute_kill_rd ()
2658 {
2659   int bb,cuid;
2660
2661   /* For each block
2662        For each set bit in `gen' of the block (i.e each insn which
2663            generates a definition in the block)
2664          Call the reg set by the insn corresponding to that bit regx
2665          Look at the linked list starting at reg_set_table[regx]
2666          For each setting of regx in the linked list, which is not in
2667              this block
2668            Set the bit in `kill' corresponding to that insn
2669     */
2670
2671   for (bb = 0; bb < n_basic_blocks; bb++)
2672     {
2673       for (cuid = 0; cuid < max_cuid; cuid++)
2674         {
2675           if (TEST_BIT (rd_gen[bb], cuid))
2676             {
2677               rtx insn = CUID_INSN (cuid);
2678               rtx pat = PATTERN (insn);
2679
2680               if (GET_CODE (insn) == CALL_INSN)
2681                 {
2682                   int regno;
2683
2684                   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2685                     {
2686                       if ((call_used_regs[regno]
2687                            && regno != STACK_POINTER_REGNUM
2688 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2689                            && regno != HARD_FRAME_POINTER_REGNUM
2690 #endif
2691 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2692                            && ! (regno == ARG_POINTER_REGNUM
2693                                  && fixed_regs[regno])
2694 #endif
2695 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2696                            && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2697 #endif
2698                            && regno != FRAME_POINTER_REGNUM)
2699                           || global_regs[regno])
2700                         handle_rd_kill_set (insn, regno, bb);
2701                     }
2702                 }
2703
2704               if (GET_CODE (pat) == PARALLEL)
2705                 {
2706                   int i;
2707
2708                   /* We work backwards because ... */
2709                   for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2710                     {
2711                       enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2712                       if ((code == SET || code == CLOBBER)
2713                           && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2714                         handle_rd_kill_set (insn,
2715                                             REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2716                                             bb);
2717                     }
2718                 }
2719               else if (GET_CODE (pat) == SET)
2720                 {
2721                   if (GET_CODE (SET_DEST (pat)) == REG)
2722                     {
2723                       /* Each setting of this register outside of this block
2724                          must be marked in the set of kills in this block.  */
2725                       handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2726                     }
2727                 }
2728               /* FIXME: CLOBBER? */
2729             }
2730         }
2731     }
2732 }
2733
2734 /* Compute the reaching definitions as in
2735    Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2736    Chapter 10.  It is the same algorithm as used for computing available
2737    expressions but applied to the gens and kills of reaching definitions.  */
2738
2739 static void
2740 compute_rd ()
2741 {
2742   int bb, changed, passes;
2743
2744   for (bb = 0; bb < n_basic_blocks; bb++)
2745     sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2746
2747   passes = 0;
2748   changed = 1;
2749   while (changed)
2750     {
2751       changed = 0;
2752       for (bb = 0; bb < n_basic_blocks; bb++)
2753         {
2754           sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
2755           changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2756                                             reaching_defs[bb], rd_kill[bb]);
2757         }
2758       passes++;
2759     }
2760
2761   if (gcse_file)
2762     fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2763 }
2764 \f
2765 /* Classic GCSE available expression support.  */
2766
2767 /* Allocate memory for available expression computation.  */
2768
2769 static void
2770 alloc_avail_expr_mem (n_blocks, n_exprs)
2771      int n_blocks, n_exprs;
2772 {
2773   ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2774   sbitmap_vector_zero (ae_kill, n_basic_blocks);
2775
2776   ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2777   sbitmap_vector_zero (ae_gen, n_basic_blocks);
2778
2779   ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2780   sbitmap_vector_zero (ae_in, n_basic_blocks);
2781
2782   ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2783   sbitmap_vector_zero (ae_out, n_basic_blocks);
2784
2785   u_bitmap = (sbitmap) sbitmap_alloc (n_exprs);
2786   sbitmap_ones (u_bitmap);
2787 }
2788
2789 static void
2790 free_avail_expr_mem ()
2791 {
2792   free (ae_kill);
2793   free (ae_gen);
2794   free (ae_in);
2795   free (ae_out);
2796   free (u_bitmap);
2797 }
2798
2799 /* Compute the set of available expressions generated in each basic block.  */
2800
2801 static void
2802 compute_ae_gen ()
2803 {
2804   int i;
2805
2806   /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2807      This is all we have to do because an expression is not recorded if it
2808      is not available, and the only expressions we want to work with are the
2809      ones that are recorded.  */
2810
2811   for (i = 0; i < expr_hash_table_size; i++)
2812     {
2813       struct expr *expr = expr_hash_table[i];
2814       while (expr != NULL)
2815         {
2816           struct occr *occr = expr->avail_occr;
2817           while (occr != NULL)
2818             {
2819               SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2820               occr = occr->next;
2821             }
2822           expr = expr->next_same_hash;
2823         }
2824     }
2825 }
2826
2827 /* Return non-zero if expression X is killed in BB.  */
2828
2829 static int
2830 expr_killed_p (x, bb)
2831      rtx x;
2832      int bb;
2833 {
2834   int i;
2835   enum rtx_code code;
2836   const char *fmt;
2837
2838   /* repeat is used to turn tail-recursion into iteration.  */
2839  repeat:
2840
2841   if (x == 0)
2842     return 1;
2843
2844   code = GET_CODE (x);
2845   switch (code)
2846     {
2847     case REG:
2848       return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2849
2850     case MEM:
2851       if (mem_set_in_block[bb])
2852         return 1;
2853       x = XEXP (x, 0);
2854       goto repeat;
2855
2856     case PC:
2857     case CC0: /*FIXME*/
2858     case CONST:
2859     case CONST_INT:
2860     case CONST_DOUBLE:
2861     case SYMBOL_REF:
2862     case LABEL_REF:
2863     case ADDR_VEC:
2864     case ADDR_DIFF_VEC:
2865       return 0;
2866
2867     default:
2868       break;
2869     }
2870
2871   i = GET_RTX_LENGTH (code) - 1;
2872   fmt = GET_RTX_FORMAT (code);
2873   for (; i >= 0; i--)
2874     {
2875       if (fmt[i] == 'e')
2876         {
2877           rtx tem = XEXP (x, i);
2878
2879           /* If we are about to do the last recursive call
2880              needed at this level, change it into iteration.
2881              This function is called enough to be worth it.  */
2882           if (i == 0)
2883             {
2884               x = tem;
2885               goto repeat;
2886             }
2887           if (expr_killed_p (tem, bb))
2888             return 1;
2889         }
2890       else if (fmt[i] == 'E')
2891         {
2892           int j;
2893           for (j = 0; j < XVECLEN (x, i); j++)
2894             {
2895               if (expr_killed_p (XVECEXP (x, i, j), bb))
2896                 return 1;
2897             }
2898         }
2899     }
2900
2901   return 0;
2902 }
2903
2904 /* Compute the set of available expressions killed in each basic block.  */
2905
2906 static void
2907 compute_ae_kill (ae_gen, ae_kill)
2908      sbitmap *ae_gen, *ae_kill;
2909 {
2910   int bb,i;
2911
2912   for (bb = 0; bb < n_basic_blocks; bb++)
2913     {
2914       for (i = 0; i < expr_hash_table_size; i++)
2915         {
2916           struct expr *expr = expr_hash_table[i];
2917
2918           for ( ; expr != NULL; expr = expr->next_same_hash)
2919             {
2920               /* Skip EXPR if generated in this block.  */
2921               if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2922                 continue;
2923
2924               if (expr_killed_p (expr->expr, bb))
2925                 SET_BIT (ae_kill[bb], expr->bitmap_index);
2926             }
2927         }
2928     }
2929 }
2930 \f
2931 /* Actually perform the Classic GCSE optimizations.  */
2932
2933 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2934
2935    CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2936    as a positive reach.  We want to do this when there are two computations
2937    of the expression in the block.
2938
2939    VISITED is a pointer to a working buffer for tracking which BB's have
2940    been visited.  It is NULL for the top-level call.
2941
2942    We treat reaching expressions that go through blocks containing the same
2943    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2944    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2945    2 as not reaching.  The intent is to improve the probability of finding
2946    only one reaching expression and to reduce register lifetimes by picking
2947    the closest such expression.  */
2948
2949 static int
2950 expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
2951      struct occr *occr;
2952      struct expr *expr;
2953      int bb;
2954      int check_self_loop;
2955      char *visited;
2956 {
2957   edge pred;
2958
2959   for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
2960     {
2961       int pred_bb = pred->src->index;
2962
2963       if (visited[pred_bb])
2964         {
2965           /* This predecessor has already been visited.
2966              Nothing to do.  */
2967           ;
2968         }
2969       else if (pred_bb == bb)
2970         {
2971           /* BB loops on itself.  */
2972           if (check_self_loop
2973               && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2974               && BLOCK_NUM (occr->insn) == pred_bb)
2975             return 1;
2976           visited[pred_bb] = 1;
2977         }
2978       /* Ignore this predecessor if it kills the expression.  */
2979       else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2980         visited[pred_bb] = 1;
2981       /* Does this predecessor generate this expression?  */
2982       else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2983         {
2984           /* Is this the occurrence we're looking for?
2985              Note that there's only one generating occurrence per block
2986              so we just need to check the block number.  */
2987           if (BLOCK_NUM (occr->insn) == pred_bb)
2988             return 1;
2989           visited[pred_bb] = 1;
2990         }
2991       /* Neither gen nor kill.  */
2992       else
2993         {
2994           visited[pred_bb] = 1;
2995           if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
2996               visited))
2997             return 1;
2998         }
2999     }
3000
3001   /* All paths have been checked.  */
3002   return 0;
3003 }
3004
3005 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
3006    memory allocated for that function is returned. */
3007
3008 static int
3009 expr_reaches_here_p (occr, expr, bb, check_self_loop)
3010      struct occr *occr;
3011      struct expr *expr;
3012      int bb;
3013      int check_self_loop;
3014 {
3015   int rval;
3016   char * visited = (char *) xcalloc (n_basic_blocks, 1);
3017
3018   rval = expr_reaches_here_p_work(occr, expr, bb, check_self_loop, visited);
3019
3020   free (visited);
3021
3022   return (rval);
3023 }
3024
3025 /* Return the instruction that computes EXPR that reaches INSN's basic block.
3026    If there is more than one such instruction, return NULL.
3027
3028    Called only by handle_avail_expr.  */
3029
3030 static rtx
3031 computing_insn (expr, insn)
3032      struct expr *expr;
3033      rtx insn;
3034 {
3035   int bb = BLOCK_NUM (insn);
3036
3037   if (expr->avail_occr->next == NULL)
3038     {
3039       if (BLOCK_NUM (expr->avail_occr->insn) == bb)
3040         {
3041           /* The available expression is actually itself
3042              (i.e. a loop in the flow graph) so do nothing.  */
3043           return NULL;
3044         }
3045       /* (FIXME) Case that we found a pattern that was created by
3046          a substitution that took place.  */
3047       return expr->avail_occr->insn;
3048     }
3049   else
3050     {
3051       /* Pattern is computed more than once.
3052          Search backwards from this insn to see how many of these
3053          computations actually reach this insn.  */
3054       struct occr *occr;
3055       rtx insn_computes_expr = NULL;
3056       int can_reach = 0;
3057
3058       for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
3059         {
3060           if (BLOCK_NUM (occr->insn) == bb)
3061             {
3062               /* The expression is generated in this block.
3063                  The only time we care about this is when the expression
3064                  is generated later in the block [and thus there's a loop].
3065                  We let the normal cse pass handle the other cases.  */
3066               if (INSN_CUID (insn) < INSN_CUID (occr->insn))
3067                 {
3068                   if (expr_reaches_here_p (occr, expr, bb, 1))
3069                     {
3070                       can_reach++;
3071                       if (can_reach > 1)
3072                         return NULL;
3073                       insn_computes_expr = occr->insn;
3074                     }
3075                 }
3076             }
3077           else /* Computation of the pattern outside this block.  */
3078             {
3079               if (expr_reaches_here_p (occr, expr, bb, 0))
3080                 {
3081                   can_reach++;
3082                   if (can_reach > 1)
3083                     return NULL;
3084                   insn_computes_expr = occr->insn;
3085                 }
3086             }
3087         }
3088
3089       if (insn_computes_expr == NULL)
3090         abort ();
3091       return insn_computes_expr;
3092     }
3093 }
3094
3095 /* Return non-zero if the definition in DEF_INSN can reach INSN.
3096    Only called by can_disregard_other_sets.  */
3097
3098 static int
3099 def_reaches_here_p (insn, def_insn)
3100      rtx insn, def_insn;
3101 {
3102   rtx reg;
3103
3104   if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
3105     return 1;
3106
3107   if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
3108     {
3109       if (INSN_CUID (def_insn) < INSN_CUID (insn))
3110         {
3111           if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
3112             return 1;
3113           if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
3114             reg = XEXP (PATTERN (def_insn), 0);
3115           else if (GET_CODE (PATTERN (def_insn)) == SET)
3116             reg = SET_DEST (PATTERN (def_insn));
3117           else
3118             abort ();
3119           return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
3120         }
3121       else
3122         return 0;
3123     }
3124
3125   return 0;
3126 }
3127
3128 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN.
3129    The value returned is the number of definitions that reach INSN.
3130    Returning a value of zero means that [maybe] more than one definition
3131    reaches INSN and the caller can't perform whatever optimization it is
3132    trying.  i.e. it is always safe to return zero.  */
3133
3134 static int
3135 can_disregard_other_sets (addr_this_reg, insn, for_combine)
3136      struct reg_set **addr_this_reg;
3137      rtx insn;
3138      int for_combine;
3139 {
3140   int number_of_reaching_defs = 0;
3141   struct reg_set *this_reg = *addr_this_reg;
3142
3143   while (this_reg)
3144     {
3145       if (def_reaches_here_p (insn, this_reg->insn))
3146         {
3147           number_of_reaching_defs++;
3148           /* Ignore parallels for now.  */
3149           if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3150             return 0;
3151           if (!for_combine
3152               && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3153                   || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3154                                     SET_SRC (PATTERN (insn)))))
3155             {
3156               /* A setting of the reg to a different value reaches INSN.  */
3157               return 0;
3158             }
3159           if (number_of_reaching_defs > 1)
3160             {
3161               /* If in this setting the value the register is being
3162                  set to is equal to the previous value the register
3163                  was set to and this setting reaches the insn we are
3164                  trying to do the substitution on then we are ok.  */
3165
3166               if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3167                 return 0;
3168               if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3169                                  SET_SRC (PATTERN (insn))))
3170                 return 0;
3171             }
3172           *addr_this_reg = this_reg;
3173         }
3174
3175       /* prev_this_reg = this_reg; */
3176       this_reg = this_reg->next;
3177     }
3178
3179   return number_of_reaching_defs;
3180 }
3181
3182 /* Expression computed by insn is available and the substitution is legal,
3183    so try to perform the substitution.
3184
3185    The result is non-zero if any changes were made.  */
3186
3187 static int
3188 handle_avail_expr (insn, expr)
3189      rtx insn;
3190      struct expr *expr;
3191 {
3192   rtx pat, insn_computes_expr;
3193   rtx to;
3194   struct reg_set *this_reg;
3195   int found_setting, use_src;
3196   int changed = 0;
3197
3198   /* We only handle the case where one computation of the expression
3199      reaches this instruction.  */
3200   insn_computes_expr = computing_insn (expr, insn);
3201   if (insn_computes_expr == NULL)
3202     return 0;
3203
3204   found_setting = 0;
3205   use_src = 0;
3206
3207   /* At this point we know only one computation of EXPR outside of this
3208      block reaches this insn.  Now try to find a register that the
3209      expression is computed into.  */
3210
3211   if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3212     {
3213       /* This is the case when the available expression that reaches
3214          here has already been handled as an available expression.  */
3215       int regnum_for_replacing = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3216       /* If the register was created by GCSE we can't use `reg_set_table',
3217          however we know it's set only once.  */
3218       if (regnum_for_replacing >= max_gcse_regno
3219           /* If the register the expression is computed into is set only once,
3220              or only one set reaches this insn, we can use it.  */
3221           || (((this_reg = reg_set_table[regnum_for_replacing]),
3222                this_reg->next == NULL)
3223               || can_disregard_other_sets (&this_reg, insn, 0)))
3224        {
3225          use_src = 1;
3226          found_setting = 1;
3227        }
3228     }
3229
3230   if (!found_setting)
3231     {
3232       int regnum_for_replacing = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3233       /* This shouldn't happen.  */
3234       if (regnum_for_replacing >= max_gcse_regno)
3235         abort ();
3236       this_reg = reg_set_table[regnum_for_replacing];
3237       /* If the register the expression is computed into is set only once,
3238          or only one set reaches this insn, use it.  */
3239       if (this_reg->next == NULL
3240           || can_disregard_other_sets (&this_reg, insn, 0))
3241         found_setting = 1;
3242     }
3243
3244   if (found_setting)
3245     {
3246       pat = PATTERN (insn);
3247       if (use_src)
3248         to = SET_SRC (PATTERN (insn_computes_expr));
3249       else
3250         to = SET_DEST (PATTERN (insn_computes_expr));
3251       changed = validate_change (insn, &SET_SRC (pat), to, 0);
3252
3253       /* We should be able to ignore the return code from validate_change but
3254          to play it safe we check.  */
3255       if (changed)
3256         {
3257           gcse_subst_count++;
3258           if (gcse_file != NULL)
3259             {
3260               fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d %s insn %d\n",
3261                        INSN_UID (insn), REGNO (to),
3262                        use_src ? "from" : "set in",
3263                        INSN_UID (insn_computes_expr));
3264             }
3265
3266         }
3267     }
3268   /* The register that the expr is computed into is set more than once.  */
3269   else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3270     {
3271       /* Insert an insn after insnx that copies the reg set in insnx
3272          into a new pseudo register call this new register REGN.
3273          From insnb until end of basic block or until REGB is set
3274          replace all uses of REGB with REGN.  */
3275       rtx new_insn;
3276
3277       to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3278
3279       /* Generate the new insn.  */
3280       /* ??? If the change fails, we return 0, even though we created
3281          an insn.  I think this is ok.  */
3282       new_insn
3283         = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3284                                         SET_DEST (PATTERN (insn_computes_expr))),
3285                                   insn_computes_expr);
3286       /* Keep block number table up to date.  */
3287       set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3288       /* Keep register set table up to date.  */
3289       record_one_set (REGNO (to), new_insn);
3290
3291       gcse_create_count++;
3292       if (gcse_file != NULL)
3293         {
3294           fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d, computed in insn %d,\n",
3295                    INSN_UID (NEXT_INSN (insn_computes_expr)),
3296                    REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))),
3297                    INSN_UID (insn_computes_expr));
3298           fprintf (gcse_file, "      into newly allocated reg %d\n", REGNO (to));
3299         }
3300
3301       pat = PATTERN (insn);
3302
3303       /* Do register replacement for INSN.  */
3304       changed = validate_change (insn, &SET_SRC (pat),
3305                                  SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr))),
3306                                  0);
3307
3308       /* We should be able to ignore the return code from validate_change but
3309          to play it safe we check.  */
3310       if (changed)
3311         {
3312           gcse_subst_count++;
3313           if (gcse_file != NULL)
3314             {
3315               fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d set in insn %d\n",
3316                        INSN_UID (insn),
3317                        REGNO (SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr)))),
3318                        INSN_UID (insn_computes_expr));
3319             }
3320
3321         }
3322     }
3323
3324   return changed;
3325 }
3326
3327 /* Perform classic GCSE.
3328    This is called by one_classic_gcse_pass after all the dataflow analysis
3329    has been done.
3330
3331    The result is non-zero if a change was made.  */
3332
3333 static int
3334 classic_gcse ()
3335 {
3336   int bb, changed;
3337   rtx insn;
3338
3339   /* Note we start at block 1.  */
3340
3341   changed = 0;
3342   for (bb = 1; bb < n_basic_blocks; bb++)
3343     {
3344       /* Reset tables used to keep track of what's still valid [since the
3345          start of the block].  */
3346       reset_opr_set_tables ();
3347
3348       for (insn = BLOCK_HEAD (bb);
3349            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3350            insn = NEXT_INSN (insn))
3351         {
3352           /* Is insn of form (set (pseudo-reg) ...)?  */
3353
3354           if (GET_CODE (insn) == INSN
3355               && GET_CODE (PATTERN (insn)) == SET
3356               && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3357               && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3358             {
3359               rtx pat = PATTERN (insn);
3360               rtx src = SET_SRC (pat);
3361               struct expr *expr;
3362
3363               if (want_to_gcse_p (src)
3364                   /* Is the expression recorded?  */
3365                   && ((expr = lookup_expr (src)) != NULL)
3366                   /* Is the expression available [at the start of the
3367                      block]?  */
3368                   && TEST_BIT (ae_in[bb], expr->bitmap_index)
3369                   /* Are the operands unchanged since the start of the
3370                      block?  */
3371                   && oprs_not_set_p (src, insn))
3372                 changed |= handle_avail_expr (insn, expr);
3373             }
3374
3375           /* Keep track of everything modified by this insn.  */
3376           /* ??? Need to be careful w.r.t. mods done to INSN.  */
3377           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3378             mark_oprs_set (insn);
3379         }
3380     }
3381
3382   return changed;
3383 }
3384
3385 /* Top level routine to perform one classic GCSE pass.
3386
3387    Return non-zero if a change was made.  */
3388
3389 static int
3390 one_classic_gcse_pass (pass)
3391      int pass;
3392 {
3393   int changed = 0;
3394
3395   gcse_subst_count = 0;
3396   gcse_create_count = 0;
3397
3398   alloc_expr_hash_table (max_cuid);
3399   alloc_rd_mem (n_basic_blocks, max_cuid);
3400   compute_expr_hash_table ();
3401   if (gcse_file)
3402     dump_hash_table (gcse_file, "Expression", expr_hash_table,
3403                      expr_hash_table_size, n_exprs);
3404   if (n_exprs > 0)
3405     {
3406       compute_kill_rd ();
3407       compute_rd ();
3408       alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3409       compute_ae_gen ();
3410       compute_ae_kill (ae_gen, ae_kill);
3411       compute_available (ae_gen, ae_kill, ae_out, ae_in);
3412       changed = classic_gcse ();
3413       free_avail_expr_mem ();
3414     }
3415   free_rd_mem ();
3416   free_expr_hash_table ();
3417
3418   if (gcse_file)
3419     {
3420       fprintf (gcse_file, "\n");
3421       fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
3422                current_function_name, pass,
3423                bytes_used, gcse_subst_count, gcse_create_count);
3424     }
3425
3426   return changed;
3427 }
3428 \f
3429 /* Compute copy/constant propagation working variables.  */
3430
3431 /* Local properties of assignments.  */
3432
3433 static sbitmap *cprop_pavloc;
3434 static sbitmap *cprop_absaltered;
3435
3436 /* Global properties of assignments (computed from the local properties).  */
3437
3438 static sbitmap *cprop_avin;
3439 static sbitmap *cprop_avout;
3440
3441 /* Allocate vars used for copy/const propagation.
3442    N_BLOCKS is the number of basic blocks.
3443    N_SETS is the number of sets.  */
3444
3445 static void
3446 alloc_cprop_mem (n_blocks, n_sets)
3447      int n_blocks, n_sets;
3448 {
3449   cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3450   cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3451
3452   cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3453   cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3454 }
3455
3456 /* Free vars used by copy/const propagation.  */
3457
3458 static void
3459 free_cprop_mem ()
3460 {
3461   free (cprop_pavloc);
3462   free (cprop_absaltered);
3463   free (cprop_avin);
3464   free (cprop_avout);
3465 }
3466
3467 /* For each block, compute whether X is transparent.
3468    X is either an expression or an assignment [though we don't care which,
3469    for this context an assignment is treated as an expression].
3470    For each block where an element of X is modified, set (SET_P == 1) or reset
3471    (SET_P == 0) the INDX bit in BMAP.  */
3472
3473 static void
3474 compute_transp (x, indx, bmap, set_p)
3475      rtx x;
3476      int indx;
3477      sbitmap *bmap;
3478      int set_p;
3479 {
3480   int bb,i;
3481   enum rtx_code code;
3482   const char *fmt;
3483
3484   /* repeat is used to turn tail-recursion into iteration.  */
3485  repeat:
3486
3487   if (x == 0)
3488     return;
3489
3490   code = GET_CODE (x);
3491   switch (code)
3492     {
3493     case REG:
3494       {
3495         reg_set *r;
3496         int regno = REGNO (x);
3497
3498         if (set_p)
3499           {
3500             if (regno < FIRST_PSEUDO_REGISTER)
3501               {
3502                 for (bb = 0; bb < n_basic_blocks; bb++)
3503                   if (TEST_BIT (reg_set_in_block[bb], regno))
3504                     SET_BIT (bmap[bb], indx);
3505               }
3506             else
3507               {
3508                 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3509                   {
3510                     bb = BLOCK_NUM (r->insn);
3511                     SET_BIT (bmap[bb], indx);
3512                   }
3513               }
3514           }
3515         else
3516           {
3517             if (regno < FIRST_PSEUDO_REGISTER)
3518               {
3519                 for (bb = 0; bb < n_basic_blocks; bb++)
3520                   if (TEST_BIT (reg_set_in_block[bb], regno))
3521                     RESET_BIT (bmap[bb], indx);
3522               }
3523             else
3524               {
3525                 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3526                   {
3527                     bb = BLOCK_NUM (r->insn);
3528                     RESET_BIT (bmap[bb], indx);
3529                   }
3530               }
3531           }
3532         return;
3533       }
3534
3535     case MEM:
3536       if (set_p)
3537         {
3538           for (bb = 0; bb < n_basic_blocks; bb++)
3539             if (mem_set_in_block[bb])
3540               SET_BIT (bmap[bb], indx);
3541         }
3542       else
3543         {
3544           for (bb = 0; bb < n_basic_blocks; bb++)
3545             if (mem_set_in_block[bb])
3546               RESET_BIT (bmap[bb], indx);
3547         }
3548       x = XEXP (x, 0);
3549       goto repeat;
3550
3551     case PC:
3552     case CC0: /*FIXME*/
3553     case CONST:
3554     case CONST_INT:
3555     case CONST_DOUBLE:
3556     case SYMBOL_REF:
3557     case LABEL_REF:
3558     case ADDR_VEC:
3559     case ADDR_DIFF_VEC:
3560       return;
3561
3562     default:
3563       break;
3564     }
3565
3566   i = GET_RTX_LENGTH (code) - 1;
3567   fmt = GET_RTX_FORMAT (code);
3568   for (; i >= 0; i--)
3569     {
3570       if (fmt[i] == 'e')
3571         {
3572           rtx tem = XEXP (x, i);
3573
3574           /* If we are about to do the last recursive call
3575              needed at this level, change it into iteration.
3576              This function is called enough to be worth it.  */
3577           if (i == 0)
3578             {
3579               x = tem;
3580               goto repeat;
3581             }
3582           compute_transp (tem, indx, bmap, set_p);
3583         }
3584       else if (fmt[i] == 'E')
3585         {
3586           int j;
3587           for (j = 0; j < XVECLEN (x, i); j++)
3588             compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3589         }
3590     }
3591 }
3592
3593 /* Top level routine to do the dataflow analysis needed by copy/const
3594    propagation.  */
3595
3596 static void
3597 compute_cprop_data ()
3598 {
3599   compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
3600   compute_available (cprop_pavloc, cprop_absaltered,
3601                      cprop_avout, cprop_avin);
3602 }
3603 \f
3604 /* Copy/constant propagation.  */
3605
3606 /* Maximum number of register uses in an insn that we handle.  */
3607 #define MAX_USES 8
3608
3609 /* Table of uses found in an insn.
3610    Allocated statically to avoid alloc/free complexity and overhead.  */
3611 static struct reg_use reg_use_table[MAX_USES];
3612
3613 /* Index into `reg_use_table' while building it.  */
3614 static int reg_use_count;
3615
3616 /* Set up a list of register numbers used in INSN.
3617    The found uses are stored in `reg_use_table'.
3618    `reg_use_count' is initialized to zero before entry, and
3619    contains the number of uses in the table upon exit.
3620
3621    ??? If a register appears multiple times we will record it multiple
3622    times.  This doesn't hurt anything but it will slow things down.  */
3623
3624 static void
3625 find_used_regs (x)
3626      rtx x;
3627 {
3628   int i;
3629   enum rtx_code code;
3630   const char *fmt;
3631
3632   /* repeat is used to turn tail-recursion into iteration.  */
3633  repeat:
3634
3635   if (x == 0)
3636     return;
3637
3638   code = GET_CODE (x);
3639   switch (code)
3640     {
3641     case REG:
3642       if (reg_use_count == MAX_USES)
3643         return;
3644       reg_use_table[reg_use_count].reg_rtx = x;
3645       reg_use_count++;
3646       return;
3647
3648     case MEM:
3649       x = XEXP (x, 0);
3650       goto repeat;
3651
3652     case PC:
3653     case CC0:
3654     case CONST:
3655     case CONST_INT:
3656     case CONST_DOUBLE:
3657     case SYMBOL_REF:
3658     case LABEL_REF:
3659     case CLOBBER:
3660     case ADDR_VEC:
3661     case ADDR_DIFF_VEC:
3662     case ASM_INPUT: /*FIXME*/
3663       return;
3664
3665     case SET:
3666       if (GET_CODE (SET_DEST (x)) == MEM)
3667         find_used_regs (SET_DEST (x));
3668       x = SET_SRC (x);
3669       goto repeat;
3670
3671     default:
3672       break;
3673     }
3674
3675   /* Recursively scan the operands of this expression.  */
3676
3677   fmt = GET_RTX_FORMAT (code);
3678   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3679     {
3680       if (fmt[i] == 'e')
3681         {
3682           /* If we are about to do the last recursive call
3683              needed at this level, change it into iteration.
3684              This function is called enough to be worth it.  */
3685           if (i == 0)
3686             {
3687               x = XEXP (x, 0);
3688               goto repeat;
3689             }
3690           find_used_regs (XEXP (x, i));
3691         }
3692       else if (fmt[i] == 'E')
3693         {
3694           int j;
3695           for (j = 0; j < XVECLEN (x, i); j++)
3696             find_used_regs (XVECEXP (x, i, j));
3697         }
3698     }
3699 }
3700
3701 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3702    Returns non-zero is successful.  */
3703
3704 static int
3705 try_replace_reg (from, to, insn)
3706      rtx from, to, insn;
3707 {
3708   /* If this fails we could try to simplify the result of the
3709      replacement and attempt to recognize the simplified insn.
3710
3711      But we need a general simplify_rtx that doesn't have pass
3712      specific state variables.  I'm not aware of one at the moment.  */
3713   return validate_replace_src (from, to, insn);
3714 }
3715
3716 /* Find a set of REGNO that is available on entry to INSN's block.
3717    Returns NULL if not found.  */
3718
3719 static struct expr *
3720 find_avail_set (regno, insn)
3721      int regno;
3722      rtx insn;
3723 {
3724   /* SET1 contains the last set found that can be returned to the caller for
3725      use in a substitution.  */
3726   struct expr *set1 = 0;
3727
3728   /* Loops are not possible here.  To get a loop we would need two sets
3729      available at the start of the block containing INSN.  ie we would
3730      need two sets like this available at the start of the block:
3731
3732        (set (reg X) (reg Y))
3733        (set (reg Y) (reg X))
3734
3735      This can not happen since the set of (reg Y) would have killed the
3736      set of (reg X) making it unavailable at the start of this block.  */
3737   while (1)
3738      {
3739       rtx src;
3740       struct expr *set = lookup_set (regno, NULL_RTX);
3741
3742       /* Find a set that is available at the start of the block
3743          which contains INSN.  */
3744       while (set)
3745         {
3746           if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3747             break;
3748           set = next_set (regno, set);
3749         }
3750
3751       /* If no available set was found we've reached the end of the
3752          (possibly empty) copy chain.  */
3753       if (set == 0)
3754         break;
3755
3756       if (GET_CODE (set->expr) != SET)
3757         abort ();
3758
3759       src = SET_SRC (set->expr);
3760
3761       /* We know the set is available.
3762          Now check that SRC is ANTLOC (i.e. none of the source operands
3763          have changed since the start of the block).
3764
3765          If the source operand changed, we may still use it for the next
3766          iteration of this loop, but we may not use it for substitutions.  */
3767       if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3768         set1 = set;
3769
3770       /* If the source of the set is anything except a register, then
3771          we have reached the end of the copy chain.  */
3772       if (GET_CODE (src) != REG)
3773         break;
3774
3775       /* Follow the copy chain, ie start another iteration of the loop
3776          and see if we have an available copy into SRC.  */
3777       regno = REGNO (src);
3778      }
3779
3780   /* SET1 holds the last set that was available and anticipatable at
3781      INSN.  */
3782   return set1;
3783 }
3784
3785 /* Subroutine of cprop_insn that tries to propagate constants into
3786    JUMP_INSNS.  INSN must be a conditional jump; COPY is a copy of it
3787    that we can use for substitutions.
3788    REG_USED is the use we will try to replace, SRC is the constant we
3789    will try to substitute for it.
3790    Returns nonzero if a change was made.  */
3791 static int
3792 cprop_jump (insn, copy, reg_used, src)
3793      rtx insn, copy;
3794      struct reg_use *reg_used;
3795      rtx src;
3796 {
3797   rtx set = PATTERN (copy);
3798   rtx temp;
3799
3800   /* Replace the register with the appropriate constant.  */
3801   replace_rtx (SET_SRC (set), reg_used->reg_rtx, src);
3802
3803   temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)),
3804                                      GET_MODE (SET_SRC (set)),
3805                                      GET_MODE (XEXP (SET_SRC (set), 0)),
3806                                      XEXP (SET_SRC (set), 0),
3807                                      XEXP (SET_SRC (set), 1),
3808                                      XEXP (SET_SRC (set), 2));
3809
3810   /* If no simplification can be made, then try the next
3811      register.  */
3812   if (temp == 0)
3813     return 0;
3814
3815   SET_SRC (set) = temp;
3816
3817   /* That may have changed the structure of TEMP, so
3818      force it to be rerecognized if it has not turned
3819      into a nop or unconditional jump.  */
3820
3821   INSN_CODE (copy) = -1;
3822   if ((SET_DEST (set) == pc_rtx
3823        && (SET_SRC (set) == pc_rtx
3824            || GET_CODE (SET_SRC (set)) == LABEL_REF))
3825       || recog (PATTERN (copy), copy, NULL) >= 0)
3826     {
3827       /* This has either become an unconditional jump
3828          or a nop-jump.  We'd like to delete nop jumps
3829          here, but doing so confuses gcse.  So we just
3830          make the replacement and let later passes
3831          sort things out.  */
3832       PATTERN (insn) = set;
3833       INSN_CODE (insn) = -1;
3834
3835       /* One less use of the label this insn used to jump to
3836          if we turned this into a NOP jump.  */
3837       if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0)
3838         --LABEL_NUSES (JUMP_LABEL (insn));
3839
3840       /* If this has turned into an unconditional jump,
3841          then put a barrier after it so that the unreachable
3842          code will be deleted.  */
3843       if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3844         emit_barrier_after (insn);
3845
3846       run_jump_opt_after_gcse = 1;
3847
3848       const_prop_count++;
3849       if (gcse_file != NULL)
3850         {
3851           int regno = REGNO (reg_used->reg_rtx);
3852           fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ",
3853                    regno, INSN_UID (insn));
3854           print_rtl (gcse_file, src);
3855           fprintf (gcse_file, "\n");
3856         }
3857       return 1;
3858     }
3859   return 0;
3860 }
3861
3862 #ifdef HAVE_cc0
3863 /* Subroutine of cprop_insn that tries to propagate constants into
3864    JUMP_INSNS for machines that have CC0.  INSN is a single set that
3865    stores into CC0; the insn following it is a conditional jump.
3866    REG_USED is the use we will try to replace, SRC is the constant we
3867    will try to substitute for it.
3868    Returns nonzero if a change was made.  */
3869 static int
3870 cprop_cc0_jump (insn, reg_used, src)
3871      rtx insn;
3872      struct reg_use *reg_used;
3873      rtx src;
3874 {
3875   rtx jump = NEXT_INSN (insn);
3876   rtx copy = copy_rtx (jump);
3877   rtx set = PATTERN (copy);
3878
3879   /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3880      substitute into it.  */
3881   replace_rtx (SET_SRC (set), cc0_rtx, copy_rtx (SET_SRC (PATTERN (insn))));
3882   if (! cprop_jump (jump, copy, reg_used, src))
3883     return 0;
3884
3885   /* If we succeeded, delete the cc0 setter.  */
3886   PUT_CODE (insn, NOTE);
3887   NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3888   NOTE_SOURCE_FILE (insn) = 0;
3889   return 1;
3890  }
3891 #endif
3892
3893 /* Perform constant and copy propagation on INSN.
3894    The result is non-zero if a change was made.  */
3895
3896 static int
3897 cprop_insn (insn, alter_jumps)
3898      rtx insn;
3899      int alter_jumps;
3900 {
3901   struct reg_use *reg_used;
3902   int changed = 0;
3903
3904   /* Only propagate into SETs.  Note that a conditional jump is a
3905      SET with pc_rtx as the destination.  */
3906   if ((GET_CODE (insn) != INSN
3907        && GET_CODE (insn) != JUMP_INSN)
3908       || GET_CODE (PATTERN (insn)) != SET)
3909     return 0;
3910
3911   reg_use_count = 0;
3912   find_used_regs (PATTERN (insn));
3913
3914   reg_used = &reg_use_table[0];
3915   for ( ; reg_use_count > 0; reg_used++, reg_use_count--)
3916     {
3917       rtx pat, src;
3918       struct expr *set;
3919       int regno = REGNO (reg_used->reg_rtx);
3920
3921       /* Ignore registers created by GCSE.
3922          We do this because ... */
3923       if (regno >= max_gcse_regno)
3924         continue;
3925
3926       /* If the register has already been set in this block, there's
3927          nothing we can do.  */
3928       if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3929         continue;
3930
3931       /* Find an assignment that sets reg_used and is available
3932          at the start of the block.  */
3933       set = find_avail_set (regno, insn);
3934       if (! set)
3935         continue;
3936
3937       pat = set->expr;
3938       /* ??? We might be able to handle PARALLELs.  Later.  */
3939       if (GET_CODE (pat) != SET)
3940         abort ();
3941       src = SET_SRC (pat);
3942
3943       /* Constant propagation.  */
3944       if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
3945           || GET_CODE (src) == SYMBOL_REF)
3946         {
3947           /* Handle normal insns first.  */
3948           if (GET_CODE (insn) == INSN
3949               && try_replace_reg (reg_used->reg_rtx, src, insn))
3950             {
3951               changed = 1;
3952               const_prop_count++;
3953               if (gcse_file != NULL)
3954                 {
3955                   fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ",
3956                            regno, INSN_UID (insn));
3957                   print_rtl (gcse_file, src);
3958                   fprintf (gcse_file, "\n");
3959                 }
3960
3961               /* The original insn setting reg_used may or may not now be
3962                  deletable.  We leave the deletion to flow.  */
3963             }
3964
3965           /* Try to propagate a CONST_INT into a conditional jump.
3966              We're pretty specific about what we will handle in this
3967              code, we can extend this as necessary over time.
3968
3969              Right now the insn in question must look like
3970              (set (pc) (if_then_else ...))  */
3971           else if (alter_jumps
3972                    && GET_CODE (insn) == JUMP_INSN
3973                    && condjump_p (insn)
3974                    && ! simplejump_p (insn))
3975             changed |= cprop_jump (insn, copy_rtx (insn), reg_used, src);
3976 #ifdef HAVE_cc0
3977           /* Similar code for machines that use a pair of CC0 setter and
3978              conditional jump insn.  */
3979           else if (alter_jumps
3980                    && GET_CODE (PATTERN (insn)) == SET
3981                    && SET_DEST (PATTERN (insn)) == cc0_rtx
3982                    && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
3983                    && condjump_p (NEXT_INSN (insn))
3984                    && ! simplejump_p (NEXT_INSN (insn)))
3985             changed |= cprop_cc0_jump (insn, reg_used, src);
3986 #endif
3987         }
3988       else if (GET_CODE (src) == REG
3989                && REGNO (src) >= FIRST_PSEUDO_REGISTER
3990                && REGNO (src) != regno)
3991         {
3992           if (try_replace_reg (reg_used->reg_rtx, src, insn))
3993             {
3994               changed = 1;
3995               copy_prop_count++;
3996               if (gcse_file != NULL)
3997                 {
3998                   fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d with reg %d\n",
3999                            regno, INSN_UID (insn), REGNO (src));
4000                 }
4001
4002               /* The original insn setting reg_used may or may not now be
4003                  deletable.  We leave the deletion to flow.  */
4004               /* FIXME: If it turns out that the insn isn't deletable,
4005                  then we may have unnecessarily extended register lifetimes
4006                  and made things worse.  */
4007             }
4008         }
4009     }
4010
4011   return changed;
4012 }
4013
4014 /* Forward propagate copies.
4015    This includes copies and constants.
4016    Return non-zero if a change was made.  */
4017
4018 static int
4019 cprop (alter_jumps)
4020      int alter_jumps;
4021 {
4022   int bb, changed;
4023   rtx insn;
4024
4025   /* Note we start at block 1.  */
4026
4027   changed = 0;
4028   for (bb = 1; bb < n_basic_blocks; bb++)
4029     {
4030       /* Reset tables used to keep track of what's still valid [since the
4031          start of the block].  */
4032       reset_opr_set_tables ();
4033
4034       for (insn = BLOCK_HEAD (bb);
4035            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
4036            insn = NEXT_INSN (insn))
4037         {
4038           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4039             {
4040               changed |= cprop_insn (insn, alter_jumps);
4041
4042               /* Keep track of everything modified by this insn.  */
4043               /* ??? Need to be careful w.r.t. mods done to INSN.  Don't
4044                  call mark_oprs_set if we turned the insn into a NOTE.  */
4045               if (GET_CODE (insn) != NOTE)
4046                 mark_oprs_set (insn);
4047             }
4048         }
4049     }
4050
4051   if (gcse_file != NULL)
4052     fprintf (gcse_file, "\n");
4053
4054   return changed;
4055 }
4056
4057 /* Perform one copy/constant propagation pass.
4058    F is the first insn in the function.
4059    PASS is the pass count.  */
4060
4061 static int
4062 one_cprop_pass (pass, alter_jumps)
4063      int pass;
4064      int alter_jumps;
4065 {
4066   int changed = 0;
4067
4068   const_prop_count = 0;
4069   copy_prop_count = 0;
4070
4071   alloc_set_hash_table (max_cuid);
4072   compute_set_hash_table ();
4073   if (gcse_file)
4074     dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
4075                      n_sets);
4076   if (n_sets > 0)
4077     {
4078       alloc_cprop_mem (n_basic_blocks, n_sets);
4079       compute_cprop_data ();
4080       changed = cprop (alter_jumps);
4081       free_cprop_mem ();
4082     }
4083   free_set_hash_table ();
4084
4085   if (gcse_file)
4086     {
4087       fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, %d const props, %d copy props\n",
4088                current_function_name, pass,
4089                bytes_used, const_prop_count, copy_prop_count);
4090       fprintf (gcse_file, "\n");
4091     }
4092
4093   return changed;
4094 }
4095 \f
4096 /* Compute PRE+LCM working variables.  */
4097
4098 /* Local properties of expressions.  */
4099 /* Nonzero for expressions that are transparent in the block.  */
4100 static sbitmap *transp;
4101
4102 /* Nonzero for expressions that are transparent at the end of the block.
4103    This is only zero for expressions killed by abnormal critical edge
4104    created by a calls.  */
4105 static sbitmap *transpout;
4106
4107 /* Nonzero for expressions that are computed (available) in the block.  */
4108 static sbitmap *comp;
4109
4110 /* Nonzero for expressions that are locally anticipatable in the block.  */
4111 static sbitmap *antloc;
4112
4113 /* Nonzero for expressions where this block is an optimal computation
4114    point.  */
4115 static sbitmap *pre_optimal;
4116
4117 /* Nonzero for expressions which are redundant in a particular block.  */
4118 static sbitmap *pre_redundant;
4119
4120 /* Nonzero for expressions which should be inserted on a specific edge.  */
4121 static sbitmap *pre_insert_map;
4122
4123 /* Nonzero for expressions which should be deleted in a specific block.  */
4124 static sbitmap *pre_delete_map;
4125
4126 /* Contains the edge_list returned by pre_edge_lcm.  */
4127 static struct edge_list *edge_list;
4128
4129 static sbitmap *temp_bitmap;
4130
4131 /* Redundant insns.  */
4132 static sbitmap pre_redundant_insns;
4133
4134 /* Allocate vars used for PRE analysis.  */
4135
4136 static void
4137 alloc_pre_mem (n_blocks, n_exprs)
4138      int n_blocks, n_exprs;
4139 {
4140   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4141   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4142   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4143   temp_bitmap = sbitmap_vector_alloc (n_blocks, n_exprs);
4144
4145   pre_optimal = NULL;
4146   pre_redundant = NULL;
4147   pre_insert_map = NULL;
4148   pre_delete_map = NULL;
4149   ae_in = NULL;
4150   ae_out = NULL;
4151   u_bitmap = NULL;
4152   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
4153   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
4154   /* pre_insert and pre_delete are allocated later.  */
4155 }
4156
4157 /* Free vars used for PRE analysis.  */
4158
4159 static void
4160 free_pre_mem ()
4161 {
4162   free (transp);
4163   free (comp);
4164   free (antloc);
4165   free (temp_bitmap);
4166
4167   if (pre_optimal)
4168     free (pre_optimal);
4169   if (pre_redundant)
4170     free (pre_redundant);
4171   if (pre_insert_map)
4172     free (pre_insert_map);
4173   if (pre_delete_map)
4174     free (pre_delete_map);
4175   if (transpout)
4176     free (transpout);
4177
4178   if (ae_in)
4179     free (ae_in);
4180   if (ae_out)
4181     free (ae_out);
4182   if (ae_kill)
4183     free (ae_kill);
4184   if (u_bitmap)
4185     free (u_bitmap);
4186
4187   transp = comp = antloc = NULL;
4188   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
4189   transpout = ae_in = ae_out = ae_kill = NULL;
4190   u_bitmap = NULL;
4191
4192 }
4193
4194 /* Top level routine to do the dataflow analysis needed by PRE.  */
4195
4196 static void
4197 compute_pre_data ()
4198 {
4199   compute_local_properties (transp, comp, antloc, 0);
4200   compute_transpout ();
4201   sbitmap_vector_zero (ae_kill, n_basic_blocks);
4202   compute_ae_kill (comp, ae_kill);
4203   edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
4204                             ae_kill, &pre_insert_map, &pre_delete_map);
4205 }
4206
4207 \f
4208 /* PRE utilities */
4209
4210 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4211    block BB.
4212
4213    VISITED is a pointer to a working buffer for tracking which BB's have
4214    been visited.  It is NULL for the top-level call.
4215
4216    CHECK_PRE_COMP controls whether or not we check for a computation of
4217    EXPR in OCCR_BB.
4218
4219    We treat reaching expressions that go through blocks containing the same
4220    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
4221    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4222    2 as not reaching.  The intent is to improve the probability of finding
4223    only one reaching expression and to reduce register lifetimes by picking
4224    the closest such expression.  */
4225
4226 static int
4227 pre_expr_reaches_here_p_work (occr_bb, expr, bb, check_pre_comp, visited)
4228      int occr_bb;
4229      struct expr *expr;
4230      int bb;
4231      int check_pre_comp;
4232      char *visited;
4233 {
4234   edge pred;
4235
4236   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
4237     {
4238       int pred_bb = pred->src->index;
4239
4240       if (pred->src == ENTRY_BLOCK_PTR
4241           /* Has predecessor has already been visited?  */
4242           || visited[pred_bb])
4243         {
4244           /* Nothing to do.  */
4245         }
4246       /* Does this predecessor generate this expression?  */
4247       else if ((!check_pre_comp && occr_bb == pred_bb)
4248                || TEST_BIT (comp[pred_bb], expr->bitmap_index))
4249         {
4250           /* Is this the occurrence we're looking for?
4251              Note that there's only one generating occurrence per block
4252              so we just need to check the block number.  */
4253           if (occr_bb == pred_bb)
4254             return 1;
4255           visited[pred_bb] = 1;
4256         }
4257       /* Ignore this predecessor if it kills the expression.  */
4258       else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
4259         visited[pred_bb] = 1;
4260       /* Neither gen nor kill.  */
4261       else
4262         {
4263           visited[pred_bb] = 1;
4264           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb,
4265                                             check_pre_comp, visited))
4266             return 1;
4267         }
4268     }
4269
4270   /* All paths have been checked.  */
4271   return 0;
4272 }
4273
4274 /* The wrapper for pre_expr_reaches_here_work that ensures that any
4275    memory allocated for that function is returned. */
4276
4277 static int
4278 pre_expr_reaches_here_p (occr_bb, expr, bb, check_pre_comp)
4279      int occr_bb;
4280      struct expr *expr;
4281      int bb;
4282      int check_pre_comp;
4283 {
4284   int rval;
4285   char * visited = (char *) xcalloc (n_basic_blocks, 1);
4286
4287   rval = pre_expr_reaches_here_p_work(occr_bb, expr, bb, check_pre_comp,
4288                                       visited);
4289
4290   free (visited);
4291
4292   return (rval);
4293 }
4294 \f
4295
4296 /* Given an expr, generate RTL which we can insert at the end of a BB,
4297    or on an edge.  Set the block number of any insns generated to
4298    the value of BB.  */
4299
4300 static rtx
4301 process_insert_insn (expr)
4302      struct expr *expr;
4303 {
4304   rtx reg = expr->reaching_reg;
4305   rtx pat, copied_expr;
4306   rtx first_new_insn;
4307
4308   start_sequence ();
4309   copied_expr = copy_rtx (expr->expr);
4310   emit_move_insn (reg, copied_expr);
4311   first_new_insn = get_insns ();
4312   pat = gen_sequence ();
4313   end_sequence ();
4314
4315   return pat;
4316 }
4317
4318 /* Add EXPR to the end of basic block BB.
4319
4320    This is used by both the PRE and code hoisting.
4321
4322    For PRE, we want to verify that the expr is either transparent
4323    or locally anticipatable in the target block.  This check makes
4324    no sense for code hoisting.  */
4325
4326 static void
4327 insert_insn_end_bb (expr, bb, pre)
4328      struct expr *expr;
4329      int bb;
4330      int pre;
4331 {
4332   rtx insn = BLOCK_END (bb);
4333   rtx new_insn;
4334   rtx reg = expr->reaching_reg;
4335   int regno = REGNO (reg);
4336   rtx pat;
4337
4338   pat = process_insert_insn (expr);
4339
4340   /* If the last insn is a jump, insert EXPR in front [taking care to
4341      handle cc0, etc. properly].  */
4342
4343   if (GET_CODE (insn) == JUMP_INSN)
4344     {
4345 #ifdef HAVE_cc0
4346       rtx note;
4347 #endif
4348
4349       /* If this is a jump table, then we can't insert stuff here.  Since
4350          we know the previous real insn must be the tablejump, we insert
4351          the new instruction just before the tablejump.  */
4352       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4353           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4354         insn = prev_real_insn (insn);
4355
4356 #ifdef HAVE_cc0
4357       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4358          if cc0 isn't set.  */
4359       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4360       if (note)
4361         insn = XEXP (note, 0);
4362       else
4363         {
4364           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4365           if (maybe_cc0_setter
4366               && GET_RTX_CLASS (GET_CODE (maybe_cc0_setter)) == 'i'
4367               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4368             insn = maybe_cc0_setter;
4369         }
4370 #endif
4371       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
4372       new_insn = emit_insn_before (pat, insn);
4373       if (BLOCK_HEAD (bb) == insn)
4374         BLOCK_HEAD (bb) = new_insn;
4375     }
4376   /* Likewise if the last insn is a call, as will happen in the presence
4377      of exception handling.  */
4378   else if (GET_CODE (insn) == CALL_INSN)
4379     {
4380       HARD_REG_SET parm_regs;
4381       int nparm_regs;
4382       rtx p;
4383
4384       /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4385          we search backward and place the instructions before the first
4386          parameter is loaded.  Do this for everyone for consistency and a
4387          presumtion that we'll get better code elsewhere as well.  */
4388
4389       /* It should always be the case that we can put these instructions
4390          anywhere in the basic block with performing PRE optimizations.
4391          Check this.  */
4392       if (pre
4393           && !TEST_BIT (antloc[bb], expr->bitmap_index)
4394           && !TEST_BIT (transp[bb], expr->bitmap_index))
4395         abort ();
4396
4397       /* Since different machines initialize their parameter registers
4398          in different orders, assume nothing.  Collect the set of all
4399          parameter registers.  */
4400       CLEAR_HARD_REG_SET (parm_regs);
4401       nparm_regs = 0;
4402       for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4403         if (GET_CODE (XEXP (p, 0)) == USE
4404             && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4405           {
4406             int regno = REGNO (XEXP (XEXP (p, 0), 0));
4407             if (regno >= FIRST_PSEUDO_REGISTER)
4408               abort ();
4409             SET_HARD_REG_BIT (parm_regs, regno);
4410             nparm_regs++;
4411           }
4412
4413       /* Search backward for the first set of a register in this set.  */
4414       while (nparm_regs && BLOCK_HEAD (bb) != insn)
4415         {
4416           insn = PREV_INSN (insn);
4417           p = single_set (insn);
4418           if (p && GET_CODE (SET_DEST (p)) == REG
4419               && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4420               && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4421             {
4422               CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4423               nparm_regs--;
4424             }
4425         }
4426
4427       /* If we found all the parameter loads, then we want to insert
4428          before the first parameter load.
4429
4430          If we did not find all the parameter loads, then we might have
4431          stopped on the head of the block, which could be a CODE_LABEL.
4432          If we inserted before the CODE_LABEL, then we would be putting
4433          the insn in the wrong basic block.  In that case, put the insn
4434          after the CODE_LABEL.
4435
4436          ?!? Do we need to account for NOTE_INSN_BASIC_BLOCK here?  */
4437       if (GET_CODE (insn) != CODE_LABEL)
4438         {
4439           new_insn = emit_insn_before (pat, insn);
4440           if (BLOCK_HEAD (bb) == insn)
4441             BLOCK_HEAD (bb) = new_insn;
4442         }
4443       else
4444         {
4445           new_insn = emit_insn_after (pat, insn);
4446         }
4447     }
4448   else
4449     {
4450       new_insn = emit_insn_after (pat, insn);
4451       BLOCK_END (bb) = new_insn;
4452     }
4453
4454   /* Keep block number table up to date.
4455      Note, PAT could be a multiple insn sequence, we have to make
4456      sure that each insn in the sequence is handled.  */
4457   if (GET_CODE (pat) == SEQUENCE)
4458     {
4459       int i;
4460
4461       for (i = 0; i < XVECLEN (pat, 0); i++)
4462         {
4463           rtx insn = XVECEXP (pat, 0, i);
4464           set_block_num (insn, bb);
4465           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4466             add_label_notes (PATTERN (insn), new_insn);
4467           note_stores (PATTERN (insn), record_set_info, insn);
4468         }
4469     }
4470   else
4471     {
4472       add_label_notes (SET_SRC (pat), new_insn);
4473       set_block_num (new_insn, bb);
4474       /* Keep register set table up to date.  */
4475       record_one_set (regno, new_insn);
4476     }
4477
4478   gcse_create_count++;
4479
4480   if (gcse_file)
4481     {
4482       fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, copying expression %d to reg %d\n",
4483                bb, INSN_UID (new_insn), expr->bitmap_index, regno);
4484     }
4485 }
4486
4487 /* Insert partially redundant expressions on edges in the CFG to make
4488    the expressions fully redundant.  */
4489
4490 static int
4491 pre_edge_insert (edge_list, index_map)
4492      struct edge_list *edge_list;
4493      struct expr **index_map;
4494 {
4495   int e, i, num_edges, set_size, did_insert = 0;
4496   sbitmap *inserted;
4497
4498   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4499      if it reaches any of the deleted expressions.  */
4500
4501   set_size = pre_insert_map[0]->size;
4502   num_edges = NUM_EDGES (edge_list);
4503   inserted = sbitmap_vector_alloc (num_edges, n_exprs);
4504   sbitmap_vector_zero (inserted, num_edges);
4505
4506   for (e = 0; e < num_edges; e++)
4507     {
4508       int indx;
4509       basic_block pred = INDEX_EDGE_PRED_BB (edge_list, e);
4510       int bb = pred->index;
4511
4512       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4513         {
4514           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
4515           int j;
4516
4517           for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
4518             {
4519               if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4520                 {
4521                   struct expr *expr = index_map[j];
4522                   struct occr *occr;
4523
4524                   /* Now look at each deleted occurence of this expression.  */
4525                   for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4526                     {
4527                       if (! occr->deleted_p)
4528                         continue;
4529
4530                       /* Insert this expression on this edge if if it would
4531                          reach the deleted occurence in BB.  */
4532                       if (!TEST_BIT (inserted[e], j)
4533                           && (bb == ENTRY_BLOCK
4534                               || pre_expr_reaches_here_p (bb, expr,
4535                                                    BLOCK_NUM (occr->insn), 0)))
4536                         {
4537                           rtx insn;
4538                           edge eg = INDEX_EDGE (edge_list, e);
4539                           /* We can't insert anything on an abnormal
4540                              and critical edge, so we insert the
4541                              insn at the end of the previous block. There
4542                              are several alternatives detailed in
4543                              Morgans book P277 (sec 10.5) for handling
4544                              this situation.  This one is easiest for now.  */
4545
4546                           if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
4547                             {
4548                               insert_insn_end_bb (index_map[j], bb, 0);
4549                             }
4550                           else
4551                             {
4552                               insn = process_insert_insn (index_map[j]);
4553                               insert_insn_on_edge (insn, eg);
4554                             }
4555                           if (gcse_file)
4556                             {
4557                               fprintf (gcse_file,
4558                                        "PRE/HOIST: edge (%d,%d), copy expression %d\n",
4559                                         bb,
4560                                         INDEX_EDGE_SUCC_BB (edge_list, e)->index, expr->bitmap_index);
4561                             }
4562                           SET_BIT (inserted[e], j);
4563                           did_insert = 1;
4564                           gcse_create_count++;
4565                         }
4566                     }
4567                 }
4568             }
4569         }
4570     }
4571
4572   /* Clean up.  */
4573   free (inserted);
4574
4575   return did_insert;
4576 }
4577
4578 /* Copy the result of INSN to REG.
4579    INDX is the expression number.  */
4580
4581 static void
4582 pre_insert_copy_insn (expr, insn)
4583      struct expr *expr;
4584      rtx insn;
4585 {
4586   rtx reg = expr->reaching_reg;
4587   int regno = REGNO (reg);
4588   int indx = expr->bitmap_index;
4589   rtx set = single_set (insn);
4590   rtx new_insn;
4591   int bb = BLOCK_NUM (insn);
4592
4593   if (!set)
4594     abort ();
4595   new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4596                               insn);
4597   /* Keep block number table up to date.  */
4598   set_block_num (new_insn, bb);
4599   /* Keep register set table up to date.  */
4600   record_one_set (regno, new_insn);
4601   if (insn == BLOCK_END (bb))
4602     BLOCK_END (bb) = new_insn;
4603
4604   gcse_create_count++;
4605
4606   if (gcse_file)
4607     fprintf (gcse_file,
4608              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4609               BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4610               INSN_UID (insn), regno);
4611 }
4612
4613 /* Copy available expressions that reach the redundant expression
4614    to `reaching_reg'.  */
4615
4616 static void
4617 pre_insert_copies ()
4618 {
4619   int i;
4620
4621   /* For each available expression in the table, copy the result to
4622      `reaching_reg' if the expression reaches a deleted one.
4623
4624      ??? The current algorithm is rather brute force.
4625      Need to do some profiling.  */
4626
4627   for (i = 0; i < expr_hash_table_size; i++)
4628     {
4629       struct expr *expr;
4630
4631       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4632         {
4633           struct occr *occr;
4634
4635           /* If the basic block isn't reachable, PPOUT will be TRUE.
4636              However, we don't want to insert a copy here because the
4637              expression may not really be redundant.  So only insert
4638              an insn if the expression was deleted.
4639              This test also avoids further processing if the expression
4640              wasn't deleted anywhere.  */
4641           if (expr->reaching_reg == NULL)
4642             continue;
4643
4644           for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4645             {
4646               struct occr *avail;
4647
4648               if (! occr->deleted_p)
4649                 continue;
4650
4651               for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4652                 {
4653                   rtx insn = avail->insn;
4654
4655                   /* No need to handle this one if handled already.  */
4656                   if (avail->copied_p)
4657                     continue;
4658                   /* Don't handle this one if it's a redundant one.  */
4659                   if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4660                     continue;
4661                   /* Or if the expression doesn't reach the deleted one.  */
4662                   if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
4663                                                  BLOCK_NUM (occr->insn),1))
4664                     continue;
4665
4666                   /* Copy the result of avail to reaching_reg.  */
4667                   pre_insert_copy_insn (expr, insn);
4668                   avail->copied_p = 1;
4669                 }
4670             }
4671         }
4672     }
4673 }
4674
4675 /* Delete redundant computations.
4676    Deletion is done by changing the insn to copy the `reaching_reg' of
4677    the expression into the result of the SET.  It is left to later passes
4678    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4679
4680    Returns non-zero if a change is made.  */
4681
4682 static int
4683 pre_delete ()
4684 {
4685   int i, bb, changed;
4686
4687   /* Compute the expressions which are redundant and need to be replaced by
4688      copies from the reaching reg to the target reg.  */
4689   for (bb = 0; bb < n_basic_blocks; bb++)
4690     sbitmap_copy (temp_bitmap[bb], pre_delete_map[bb]);
4691
4692   changed = 0;
4693   for (i = 0; i < expr_hash_table_size; i++)
4694     {
4695       struct expr *expr;
4696
4697       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4698         {
4699           struct occr *occr;
4700           int indx = expr->bitmap_index;
4701
4702           /* We only need to search antic_occr since we require
4703              ANTLOC != 0.  */
4704
4705           for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4706             {
4707               rtx insn = occr->insn;
4708               rtx set;
4709               int bb = BLOCK_NUM (insn);
4710
4711               if (TEST_BIT (temp_bitmap[bb], indx))
4712                 {
4713                   set = single_set (insn);
4714                   if (! set)
4715                     abort ();
4716
4717                   /* Create a pseudo-reg to store the result of reaching
4718                      expressions into.  Get the mode for the new pseudo
4719                      from the mode of the original destination pseudo.  */
4720                   if (expr->reaching_reg == NULL)
4721                     expr->reaching_reg
4722                       = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4723
4724                   /* In theory this should never fail since we're creating
4725                      a reg->reg copy.
4726
4727                      However, on the x86 some of the movXX patterns actually
4728                      contain clobbers of scratch regs.  This may cause the
4729                      insn created by validate_change to not match any pattern
4730                      and thus cause validate_change to fail.   */
4731                   if (validate_change (insn, &SET_SRC (set),
4732                                        expr->reaching_reg, 0))
4733                     {
4734                       occr->deleted_p = 1;
4735                       SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4736                       changed = 1;
4737                       gcse_subst_count++;
4738                     }
4739
4740                   if (gcse_file)
4741                     {
4742                       fprintf (gcse_file,
4743                                "PRE: redundant insn %d (expression %d) in bb %d, reaching reg is %d\n",
4744                                INSN_UID (insn), indx, bb, REGNO (expr->reaching_reg));
4745                     }
4746                 }
4747             }
4748         }
4749     }
4750
4751   return changed;
4752 }
4753
4754 /* Perform GCSE optimizations using PRE.
4755    This is called by one_pre_gcse_pass after all the dataflow analysis
4756    has been done.
4757
4758    This is based on the original Morel-Renvoise paper Fred Chow's thesis,
4759    and lazy code motion from Knoop, Ruthing and Steffen as described in
4760    Advanced Compiler Design and Implementation.
4761
4762    ??? A new pseudo reg is created to hold the reaching expression.
4763    The nice thing about the classical approach is that it would try to
4764    use an existing reg.  If the register can't be adequately optimized
4765    [i.e. we introduce reload problems], one could add a pass here to
4766    propagate the new register through the block.
4767
4768    ??? We don't handle single sets in PARALLELs because we're [currently]
4769    not able to copy the rest of the parallel when we insert copies to create
4770    full redundancies from partial redundancies.  However, there's no reason
4771    why we can't handle PARALLELs in the cases where there are no partial
4772    redundancies.  */
4773
4774 static int
4775 pre_gcse ()
4776 {
4777   int i, did_insert;
4778   int changed;
4779   struct expr **index_map;
4780
4781   /* Compute a mapping from expression number (`bitmap_index') to
4782      hash table entry.  */
4783
4784   index_map = xcalloc (n_exprs, sizeof (struct expr *));
4785   for (i = 0; i < expr_hash_table_size; i++)
4786     {
4787       struct expr *expr;
4788
4789       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4790         index_map[expr->bitmap_index] = expr;
4791     }
4792
4793   /* Reset bitmap used to track which insns are redundant.  */
4794   pre_redundant_insns = sbitmap_alloc (max_cuid);
4795   sbitmap_zero (pre_redundant_insns);
4796
4797   /* Delete the redundant insns first so that
4798      - we know what register to use for the new insns and for the other
4799        ones with reaching expressions
4800      - we know which insns are redundant when we go to create copies  */
4801   changed = pre_delete ();
4802
4803   did_insert = pre_edge_insert (edge_list, index_map);
4804   /* In other places with reaching expressions, copy the expression to the
4805      specially allocated pseudo-reg that reaches the redundant expr.  */
4806   pre_insert_copies ();
4807   if (did_insert)
4808     {
4809       commit_edge_insertions ();
4810       changed = 1;
4811     }
4812
4813   free (index_map);
4814   free (pre_redundant_insns);
4815
4816   return changed;
4817 }
4818
4819 /* Top level routine to perform one PRE GCSE pass.
4820
4821    Return non-zero if a change was made.  */
4822
4823 static int
4824 one_pre_gcse_pass (pass)
4825      int pass;
4826 {
4827   int changed = 0;
4828
4829   gcse_subst_count = 0;
4830   gcse_create_count = 0;
4831
4832   alloc_expr_hash_table (max_cuid);
4833   add_noreturn_fake_exit_edges ();
4834   compute_expr_hash_table ();
4835   if (gcse_file)
4836     dump_hash_table (gcse_file, "Expression", expr_hash_table,
4837                      expr_hash_table_size, n_exprs);
4838   if (n_exprs > 0)
4839     {
4840       alloc_pre_mem (n_basic_blocks, n_exprs);
4841       compute_pre_data ();
4842       changed |= pre_gcse ();
4843       free_edge_list (edge_list);
4844       free_pre_mem ();
4845     }
4846   remove_fake_edges ();
4847   free_expr_hash_table ();
4848
4849   if (gcse_file)
4850     {
4851       fprintf (gcse_file, "\n");
4852       fprintf (gcse_file, "PRE GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
4853                current_function_name, pass,
4854                bytes_used, gcse_subst_count, gcse_create_count);
4855     }
4856
4857   return changed;
4858 }
4859 \f
4860 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4861    We have to add REG_LABEL notes, because the following loop optimization
4862    pass requires them.  */
4863
4864 /* ??? This is very similar to the loop.c add_label_notes function.  We
4865    could probably share code here.  */
4866
4867 /* ??? If there was a jump optimization pass after gcse and before loop,
4868    then we would not need to do this here, because jump would add the
4869    necessary REG_LABEL notes.  */
4870
4871 static void
4872 add_label_notes (x, insn)
4873      rtx x;
4874      rtx insn;
4875 {
4876   enum rtx_code code = GET_CODE (x);
4877   int i, j;
4878   const char *fmt;
4879
4880   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4881     {
4882       /* This code used to ignore labels that referred to dispatch tables to
4883          avoid flow generating (slighly) worse code.
4884
4885          We no longer ignore such label references (see LABEL_REF handling in
4886          mark_jump_label for additional information).  */
4887       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4888                                             REG_NOTES (insn));
4889       return;
4890     }
4891
4892   fmt = GET_RTX_FORMAT (code);
4893   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4894     {
4895       if (fmt[i] == 'e')
4896         add_label_notes (XEXP (x, i), insn);
4897       else if (fmt[i] == 'E')
4898         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4899           add_label_notes (XVECEXP (x, i, j), insn);
4900     }
4901 }
4902
4903 /* Compute transparent outgoing information for each block.
4904
4905    An expression is transparent to an edge unless it is killed by
4906    the edge itself.  This can only happen with abnormal control flow,
4907    when the edge is traversed through a call.  This happens with
4908    non-local labels and exceptions.
4909
4910    This would not be necessary if we split the edge.  While this is
4911    normally impossible for abnormal critical edges, with some effort
4912    it should be possible with exception handling, since we still have
4913    control over which handler should be invoked.  But due to increased
4914    EH table sizes, this may not be worthwhile.  */
4915
4916 static void
4917 compute_transpout ()
4918 {
4919   int bb;
4920
4921   sbitmap_vector_ones (transpout, n_basic_blocks);
4922
4923   for (bb = 0; bb < n_basic_blocks; ++bb)
4924     {
4925       int i;
4926
4927       /* Note that flow inserted a nop a the end of basic blocks that
4928          end in call instructions for reasons other than abnormal
4929          control flow.  */
4930       if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4931         continue;
4932
4933       for (i = 0; i < expr_hash_table_size; i++)
4934         {
4935           struct expr *expr;
4936           for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4937             if (GET_CODE (expr->expr) == MEM)
4938               {
4939                 rtx addr = XEXP (expr->expr, 0);
4940
4941                 if (GET_CODE (addr) == SYMBOL_REF
4942                     && CONSTANT_POOL_ADDRESS_P (addr))
4943                   continue;
4944
4945                 /* ??? Optimally, we would use interprocedural alias
4946                    analysis to determine if this mem is actually killed
4947                    by this call.  */
4948                 RESET_BIT (transpout[bb], expr->bitmap_index);
4949               }
4950         }
4951     }
4952 }
4953
4954 /* Removal of useless null pointer checks */
4955
4956 /* Called via note_stores.  X is set by SETTER.  If X is a register we must
4957    invalidate nonnull_local and set nonnull_killed.  DATA is really a
4958    `null_pointer_info *'.
4959
4960    We ignore hard registers.  */
4961 static void
4962 invalidate_nonnull_info (x, setter, data)
4963      rtx x;
4964      rtx setter ATTRIBUTE_UNUSED;
4965      void *data;
4966 {
4967   int offset, regno;
4968   struct null_pointer_info* npi = (struct null_pointer_info *) data;
4969
4970   offset = 0;
4971   while (GET_CODE (x) == SUBREG)
4972     x = SUBREG_REG (x);
4973
4974   /* Ignore anything that is not a register or is a hard register.  */
4975   if (GET_CODE (x) != REG
4976       || REGNO (x) < npi->min_reg
4977       || REGNO (x) >= npi->max_reg)
4978     return;
4979
4980   regno = REGNO (x) - npi->min_reg;
4981
4982   RESET_BIT (npi->nonnull_local[npi->current_block], regno);
4983   SET_BIT (npi->nonnull_killed[npi->current_block], regno);
4984 }
4985
4986 /* Do null-pointer check elimination for the registers indicated in
4987    NPI.  NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
4988    they are not our responsibility to free.  */
4989
4990 static void
4991 delete_null_pointer_checks_1 (s_preds, block_reg, nonnull_avin,
4992                               nonnull_avout, npi)
4993      int_list_ptr *s_preds;
4994      int *block_reg;
4995      sbitmap *nonnull_avin;
4996      sbitmap *nonnull_avout;
4997      struct null_pointer_info *npi;
4998 {
4999   int bb;
5000   int current_block;
5001   sbitmap *nonnull_local = npi->nonnull_local;
5002   sbitmap *nonnull_killed = npi->nonnull_killed;
5003
5004   /* Compute local properties, nonnull and killed.  A register will have
5005      the nonnull property if at the end of the current block its value is
5006      known to be nonnull.  The killed property indicates that somewhere in
5007      the block any information we had about the register is killed.
5008
5009      Note that a register can have both properties in a single block.  That
5010      indicates that it's killed, then later in the block a new value is
5011      computed.  */
5012   sbitmap_vector_zero (nonnull_local, n_basic_blocks);
5013   sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
5014   for (current_block = 0; current_block < n_basic_blocks; current_block++)
5015     {
5016       rtx insn, stop_insn;
5017
5018       /* Set the current block for invalidate_nonnull_info.  */
5019       npi->current_block = current_block;
5020
5021       /* Scan each insn in the basic block looking for memory references and
5022          register sets.  */
5023       stop_insn = NEXT_INSN (BLOCK_END (current_block));
5024       for (insn = BLOCK_HEAD (current_block);
5025            insn != stop_insn;
5026            insn = NEXT_INSN (insn))
5027         {
5028           rtx set;
5029           rtx reg;
5030
5031           /* Ignore anything that is not a normal insn.  */
5032           if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
5033             continue;
5034
5035           /* Basically ignore anything that is not a simple SET.  We do have
5036              to make sure to invalidate nonnull_local and set nonnull_killed
5037              for such insns though.  */
5038           set = single_set (insn);
5039           if (!set)
5040             {
5041               note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5042               continue;
5043             }
5044
5045           /* See if we've got a useable memory load.  We handle it first
5046              in case it uses its address register as a dest (which kills
5047              the nonnull property).  */
5048           if (GET_CODE (SET_SRC (set)) == MEM
5049               && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
5050               && REGNO (reg) >= npi->min_reg
5051               && REGNO (reg) < npi->max_reg)
5052             SET_BIT (nonnull_local[current_block],
5053                      REGNO (reg) - npi->min_reg);
5054
5055           /* Now invalidate stuff clobbered by this insn.  */
5056           note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5057
5058           /* And handle stores, we do these last since any sets in INSN can
5059              not kill the nonnull property if it is derived from a MEM
5060              appearing in a SET_DEST.  */
5061           if (GET_CODE (SET_DEST (set)) == MEM
5062               && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
5063               && REGNO (reg) >= npi->min_reg
5064               && REGNO (reg) < npi->max_reg)
5065             SET_BIT (nonnull_local[current_block],
5066                      REGNO (reg) - npi->min_reg);
5067         }
5068     }
5069
5070   /* Now compute global properties based on the local properties.   This
5071      is a classic global availablity algorithm.  */
5072   compute_available (nonnull_local, nonnull_killed,
5073                      nonnull_avout, nonnull_avin);
5074
5075   /* Now look at each bb and see if it ends with a compare of a value
5076      against zero.  */
5077   for (bb = 0; bb < n_basic_blocks; bb++)
5078     {
5079       rtx last_insn = BLOCK_END (bb);
5080       rtx condition, earliest;
5081       int compare_and_branch;
5082
5083       /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
5084          since BLOCK_REG[BB] is zero if this block did not end with a
5085          comparison against zero, this condition works.  */
5086       if (block_reg[bb] < npi->min_reg
5087           || block_reg[bb] >= npi->max_reg)
5088         continue;
5089
5090       /* LAST_INSN is a conditional jump.  Get its condition.  */
5091       condition = get_condition (last_insn, &earliest);
5092
5093       /* Is the register known to have a nonzero value?  */
5094       if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
5095         continue;
5096
5097       /* Try to compute whether the compare/branch at the loop end is one or
5098          two instructions.  */
5099       if (earliest == last_insn)
5100         compare_and_branch = 1;
5101       else if (earliest == prev_nonnote_insn (last_insn))
5102         compare_and_branch = 2;
5103       else
5104         continue;
5105
5106       /* We know the register in this comparison is nonnull at exit from
5107          this block.  We can optimize this comparison.  */
5108       if (GET_CODE (condition) == NE)
5109         {
5110           rtx new_jump;
5111
5112           new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
5113                                             last_insn);
5114           JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5115           LABEL_NUSES (JUMP_LABEL (new_jump))++;
5116           emit_barrier_after (new_jump);
5117         }
5118       delete_insn (last_insn);
5119       if (compare_and_branch == 2)
5120         delete_insn (earliest);
5121
5122       /* Don't check this block again.  (Note that BLOCK_END is
5123          invalid here; we deleted the last instruction in the
5124          block.)  */
5125       block_reg[bb] = 0;
5126     }
5127 }
5128
5129 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5130    at compile time.
5131
5132    This is conceptually similar to global constant/copy propagation and
5133    classic global CSE (it even uses the same dataflow equations as cprop).
5134
5135    If a register is used as memory address with the form (mem (reg)), then we
5136    know that REG can not be zero at that point in the program.  Any instruction
5137    which sets REG "kills" this property.
5138
5139    So, if every path leading to a conditional branch has an available memory
5140    reference of that form, then we know the register can not have the value
5141    zero at the conditional branch.
5142
5143    So we merely need to compute the local properies and propagate that data
5144    around the cfg, then optimize where possible.
5145
5146    We run this pass two times.  Once before CSE, then again after CSE.  This
5147    has proven to be the most profitable approach.  It is rare for new
5148    optimization opportunities of this nature to appear after the first CSE
5149    pass.
5150
5151    This could probably be integrated with global cprop with a little work.  */
5152
5153 void
5154 delete_null_pointer_checks (f)
5155      rtx f;
5156 {
5157   int_list_ptr *s_preds, *s_succs;
5158   int *num_preds, *num_succs;
5159   sbitmap *nonnull_avin, *nonnull_avout;
5160   int *block_reg;
5161   int bb;
5162   int reg;
5163   int regs_per_pass;
5164   int max_reg;
5165   struct null_pointer_info npi;
5166
5167   /* First break the program into basic blocks.  */
5168   find_basic_blocks (f, max_reg_num (), NULL, 1);
5169
5170   /* If we have only a single block, then there's nothing to do.  */
5171   if (n_basic_blocks <= 1)
5172     {
5173       /* Free storage allocated by find_basic_blocks.  */
5174       free_basic_block_vars (0);
5175       return;
5176     }
5177
5178   /* Trying to perform global optimizations on flow graphs which have
5179      a high connectivity will take a long time and is unlikely to be
5180      particularly useful.
5181
5182      In normal circumstances a cfg should have about twice has many edges
5183      as blocks.  But we do not want to punish small functions which have
5184      a couple switch statements.  So we require a relatively large number
5185      of basic blocks and the ratio of edges to blocks to be high.  */
5186   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
5187     {
5188       /* Free storage allocated by find_basic_blocks.  */
5189       free_basic_block_vars (0);
5190       return;
5191     }
5192
5193   /* We need predecessor/successor lists as well as pred/succ counts for
5194      each basic block.  */
5195   s_preds = (int_list_ptr *) gmalloc (n_basic_blocks * sizeof (int_list_ptr));
5196   s_succs = (int_list_ptr *) gmalloc (n_basic_blocks * sizeof (int_list_ptr));
5197   num_preds = (int *) gmalloc (n_basic_blocks * sizeof (int));
5198   num_succs = (int *) gmalloc (n_basic_blocks * sizeof (int));
5199   compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
5200
5201   /* We need four bitmaps, each with a bit for each register in each
5202      basic block.  */
5203   max_reg = max_reg_num ();
5204   regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
5205
5206   /* Allocate bitmaps to hold local and global properties.  */
5207   npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5208   npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5209   nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5210   nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5211
5212   /* Go through the basic blocks, seeing whether or not each block
5213      ends with a conditional branch whose condition is a comparison
5214      against zero.  Record the register compared in BLOCK_REG.  */
5215   block_reg = (int *) xcalloc (n_basic_blocks, sizeof (int));
5216   for (bb = 0; bb < n_basic_blocks; bb++)
5217     {
5218       rtx last_insn = BLOCK_END (bb);
5219       rtx condition, earliest, reg;
5220
5221       /* We only want conditional branches.  */
5222       if (GET_CODE (last_insn) != JUMP_INSN
5223           || !condjump_p (last_insn)
5224           || simplejump_p (last_insn))
5225         continue;
5226
5227       /* LAST_INSN is a conditional jump.  Get its condition.  */
5228       condition = get_condition (last_insn, &earliest);
5229
5230       /* If we were unable to get the condition, or it is not a equality
5231          comparison against zero then there's nothing we can do.  */
5232       if (!condition
5233           || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5234           || GET_CODE (XEXP (condition, 1)) != CONST_INT
5235           || (XEXP (condition, 1)
5236               != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
5237         continue;
5238
5239       /* We must be checking a register against zero.  */
5240       reg = XEXP (condition, 0);
5241       if (GET_CODE (reg) != REG)
5242         continue;
5243
5244       block_reg[bb] = REGNO (reg);
5245     }
5246
5247   /* Go through the algorithm for each block of registers.  */
5248   for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
5249     {
5250       npi.min_reg = reg;
5251       npi.max_reg = MIN (reg + regs_per_pass, max_reg);
5252       delete_null_pointer_checks_1 (s_preds, block_reg, nonnull_avin,
5253                                     nonnull_avout, &npi);
5254     }
5255
5256   /* Free storage allocated by find_basic_blocks.  */
5257   free_basic_block_vars (0);
5258
5259   /* Free our local predecessor/successor lists. */
5260   free (s_preds);
5261   free (s_succs);
5262   free (num_preds);
5263   free (num_succs);
5264
5265   /* Free the table of registers compared at the end of every block.  */
5266   free (block_reg);
5267
5268   /* Free bitmaps.  */
5269   free (npi.nonnull_local);
5270   free (npi.nonnull_killed);
5271   free (nonnull_avin);
5272   free (nonnull_avout);
5273 }
5274
5275 /* Code Hoisting variables and subroutines.  */
5276
5277 /* Very busy expressions.  */
5278 static sbitmap *hoist_vbein;
5279 static sbitmap *hoist_vbeout;
5280
5281 /* Hoistable expressions.  */
5282 static sbitmap *hoist_exprs;
5283
5284 /* Dominator bitmaps.  */
5285 static sbitmap *dominators;
5286
5287 /* ??? We could compute post dominators and run this algorithm in
5288    reverse to to perform tail merging, doing so would probably be
5289    more effective than the tail merging code in jump.c.
5290
5291    It's unclear if tail merging could be run in parallel with
5292    code hoisting.  It would be nice.  */
5293
5294 /* Allocate vars used for code hoisting analysis.  */
5295
5296 static void
5297 alloc_code_hoist_mem (n_blocks, n_exprs)
5298      int n_blocks, n_exprs;
5299 {
5300   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5301   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5302   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5303
5304   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5305   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5306   hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5307   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5308
5309   dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5310 }
5311
5312 /* Free vars used for code hoisting analysis.  */
5313
5314 static void
5315 free_code_hoist_mem ()
5316 {
5317   free (antloc);
5318   free (transp);
5319   free (comp);
5320
5321   free (hoist_vbein);
5322   free (hoist_vbeout);
5323   free (hoist_exprs);
5324   free (transpout);
5325
5326   free (dominators);
5327 }
5328
5329 /* Compute the very busy expressions at entry/exit from each block.
5330
5331    An expression is very busy if all paths from a given point
5332    compute the expression.  */
5333
5334 static void
5335 compute_code_hoist_vbeinout ()
5336 {
5337   int bb, changed, passes;
5338
5339   sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5340   sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5341
5342   passes = 0;
5343   changed = 1;
5344   while (changed)
5345     {
5346       changed = 0;
5347       /* We scan the blocks in the reverse order to speed up
5348          the convergence.  */
5349       for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5350         {
5351           changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5352                                            hoist_vbeout[bb], transp[bb]);
5353           if (bb != n_basic_blocks - 1)
5354             sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
5355         }
5356       passes++;
5357     }
5358
5359   if (gcse_file)
5360     fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5361 }
5362
5363 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
5364
5365 static void
5366 compute_code_hoist_data ()
5367 {
5368   compute_local_properties (transp, comp, antloc, 0);
5369   compute_transpout ();
5370   compute_code_hoist_vbeinout ();
5371   compute_flow_dominators (dominators, NULL);
5372   if (gcse_file)
5373     fprintf (gcse_file, "\n");
5374 }
5375
5376 /* Determine if the expression identified by EXPR_INDEX would
5377    reach BB unimpared if it was placed at the end of EXPR_BB.
5378
5379    It's unclear exactly what Muchnick meant by "unimpared".  It seems
5380    to me that the expression must either be computed or transparent in
5381    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
5382    would allow the expression to be hoisted out of loops, even if
5383    the expression wasn't a loop invariant.
5384
5385    Contrast this to reachability for PRE where an expression is
5386    considered reachable if *any* path reaches instead of *all*
5387    paths.  */
5388
5389 static int
5390 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5391      int expr_bb;
5392      int expr_index;
5393      int bb;
5394      char *visited;
5395 {
5396   edge pred;
5397   int visited_allocated_locally = 0;
5398
5399
5400   if (visited == NULL)
5401     {
5402        visited_allocated_locally = 1;
5403        visited = xcalloc (n_basic_blocks, 1);
5404     }
5405
5406   visited[expr_bb] = 1;
5407   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5408     {
5409       int pred_bb = pred->src->index;
5410
5411       if (pred->src == ENTRY_BLOCK_PTR)
5412         break;
5413       else if (visited[pred_bb])
5414         continue;
5415       /* Does this predecessor generate this expression?  */
5416       else if (TEST_BIT (comp[pred_bb], expr_index))
5417         break;
5418       else if (! TEST_BIT (transp[pred_bb], expr_index))
5419         break;
5420       /* Not killed.  */
5421       else
5422         {
5423           visited[pred_bb] = 1;
5424           if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5425                                            pred_bb, visited))
5426             break;
5427         }
5428     }
5429   if (visited_allocated_locally)
5430     free (visited);
5431   return (pred == NULL);
5432 }
5433 \f
5434 /* Actually perform code hoisting.  */
5435 static void
5436 hoist_code ()
5437 {
5438   int bb, dominated, i;
5439   struct expr **index_map;
5440
5441   sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5442
5443   /* Compute a mapping from expression number (`bitmap_index') to
5444      hash table entry.  */
5445
5446   index_map = xcalloc (n_exprs, sizeof (struct expr *));
5447   for (i = 0; i < expr_hash_table_size; i++)
5448     {
5449       struct expr *expr;
5450
5451       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5452         index_map[expr->bitmap_index] = expr;
5453     }
5454
5455   /* Walk over each basic block looking for potentially hoistable
5456      expressions, nothing gets hoisted from the entry block.  */
5457   for (bb = 0; bb < n_basic_blocks; bb++)
5458     {
5459       int found = 0;
5460       int insn_inserted_p;
5461
5462       /* Examine each expression that is very busy at the exit of this
5463          block.  These are the potentially hoistable expressions.  */
5464       for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5465         {
5466           int hoistable = 0;
5467           if (TEST_BIT (hoist_vbeout[bb], i)
5468               && TEST_BIT (transpout[bb], i))
5469             {
5470               /* We've found a potentially hoistable expression, now
5471                  we look at every block BB dominates to see if it
5472                  computes the expression.  */
5473               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5474                 {
5475                   /* Ignore self dominance.  */
5476                   if (bb == dominated
5477                       || ! TEST_BIT (dominators[dominated], bb))
5478                     continue;
5479
5480                   /* We've found a dominated block, now see if it computes
5481                      the busy expression and whether or not moving that
5482                      expression to the "beginning" of that block is safe.  */
5483                   if (!TEST_BIT (antloc[dominated], i))
5484                     continue;
5485
5486                   /* Note if the expression would reach the dominated block
5487                      unimpared if it was placed at the end of BB.
5488
5489                      Keep track of how many times this expression is hoistable
5490                      from a dominated block into BB.  */
5491                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5492                     hoistable++;
5493                 }
5494
5495               /* If we found more than one hoistable occurence of this
5496                  expression, then note it in the bitmap of expressions to
5497                  hoist.  It makes no sense to hoist things which are computed
5498                  in only one BB, and doing so tends to pessimize register
5499                  allocation.  One could increase this value to try harder
5500                  to avoid any possible code expansion due to register
5501                  allocation issues; however experiments have shown that
5502                  the vast majority of hoistable expressions are only movable
5503                  from two successors, so raising this threshhold is likely
5504                  to nullify any benefit we get from code hoisting.  */
5505               if (hoistable > 1)
5506                 {
5507                   SET_BIT (hoist_exprs[bb], i);
5508                   found = 1;
5509                 }
5510             }
5511         }
5512
5513       /* If we found nothing to hoist, then quit now.  */
5514       if (! found)
5515         continue;
5516
5517       /* Loop over all the hoistable expressions.  */
5518       for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5519         {
5520           /* We want to insert the expression into BB only once, so
5521              note when we've inserted it.  */
5522           insn_inserted_p = 0;
5523
5524           /* These tests should be the same as the tests above.  */
5525           if (TEST_BIT (hoist_vbeout[bb], i))
5526             {
5527               /* We've found a potentially hoistable expression, now
5528                  we look at every block BB dominates to see if it
5529                  computes the expression.  */
5530               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5531                 {
5532                   /* Ignore self dominance.  */
5533                   if (bb == dominated
5534                       || ! TEST_BIT (dominators[dominated], bb))
5535                     continue;
5536
5537                   /* We've found a dominated block, now see if it computes
5538                      the busy expression and whether or not moving that
5539                      expression to the "beginning" of that block is safe.  */
5540                   if (!TEST_BIT (antloc[dominated], i))
5541                     continue;
5542
5543                   /* The expression is computed in the dominated block and
5544                      it would be safe to compute it at the start of the
5545                      dominated block.  Now we have to determine if the
5546                      expresion would reach the dominated block if it was
5547                      placed at the end of BB.  */
5548                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5549                     {
5550                       struct expr *expr = index_map[i];
5551                       struct occr *occr = expr->antic_occr;
5552                       rtx insn;
5553                       rtx set;
5554
5555
5556                       /* Find the right occurence of this expression.  */
5557                       while (BLOCK_NUM (occr->insn) != dominated && occr)
5558                         occr = occr->next;
5559
5560                       /* Should never happen.  */
5561                       if (!occr)
5562                         abort ();
5563
5564                       insn = occr->insn;
5565
5566                       set = single_set (insn);
5567                       if (! set)
5568                         abort ();
5569
5570                       /* Create a pseudo-reg to store the result of reaching
5571                          expressions into.  Get the mode for the new pseudo
5572                          from the mode of the original destination pseudo.  */
5573                       if (expr->reaching_reg == NULL)
5574                         expr->reaching_reg
5575                           = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5576
5577                       /* In theory this should never fail since we're creating
5578                          a reg->reg copy.
5579
5580                          However, on the x86 some of the movXX patterns actually
5581                          contain clobbers of scratch regs.  This may cause the
5582                          insn created by validate_change to not match any
5583                          pattern and thus cause validate_change to fail.   */
5584                       if (validate_change (insn, &SET_SRC (set),
5585                                            expr->reaching_reg, 0))
5586                         {
5587                           occr->deleted_p = 1;
5588                           if (!insn_inserted_p)
5589                             {
5590                               insert_insn_end_bb (index_map[i], bb, 0);
5591                               insn_inserted_p = 1;
5592                             }
5593                         }
5594                     }
5595                 }
5596             }
5597         }
5598     }
5599     free (index_map);
5600 }
5601
5602 /* Top level routine to perform one code hoisting (aka unification) pass
5603
5604    Return non-zero if a change was made.  */
5605
5606 static int
5607 one_code_hoisting_pass ()
5608 {
5609   int changed = 0;
5610
5611   alloc_expr_hash_table (max_cuid);
5612   compute_expr_hash_table ();
5613   if (gcse_file)
5614     dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5615                      expr_hash_table_size, n_exprs);
5616   if (n_exprs > 0)
5617     {
5618       alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5619       compute_code_hoist_data ();
5620       hoist_code ();
5621       free_code_hoist_mem ();
5622     }
5623   free_expr_hash_table ();
5624
5625   return changed;
5626 }