gcc/gcse.c

   1 /* Global common subexpression elimination/Partial redundancy elimination
   2    and global constant/copy propagation for GNU compiler.
   3    Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* TODO
  23    - reordering of memory allocation and freeing to be more space efficient
  24    - do rough calc of how many regs are needed in each block, and a rough
  25      calc of how many regs are available in each class and use that to
  26      throttle back the code in cases where RTX_COST is minimal.
  27    - dead store elimination
  28    - a store to the same address as a load does not kill the load if the
  29      source of the store is also the destination of the load.  Handling this
  30      allows more load motion, particularly out of loops.
  31    - ability to realloc sbitmap vectors would allow one initial computation
  32      of reg_set_in_block with only subsequent additions, rather than
  33      recomputing it for each pass
  34
  35 */
  36
  37 /* References searched while implementing this.
  38
  39    Compilers Principles, Techniques and Tools
  40    Aho, Sethi, Ullman
  41    Addison-Wesley, 1988
  42
  43    Global Optimization by Suppression of Partial Redundancies
  44    E. Morel, C. Renvoise
  45    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  46
  47    A Portable Machine-Independent Global Optimizer - Design and Measurements
  48    Frederick Chow
  49    Stanford Ph.D. thesis, Dec. 1983
  50
  51    A Fast Algorithm for Code Movement Optimization
  52    D.M. Dhamdhere
  53    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  54
  55    A Solution to a Problem with Morel and Renvoise's
  56    Global Optimization by Suppression of Partial Redundancies
  57    K-H Drechsler, M.P. Stadel
  58    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  59
  60    Practical Adaptation of the Global Optimization
  61    Algorithm of Morel and Renvoise
  62    D.M. Dhamdhere
  63    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  64
  65    Efficiently Computing Static Single Assignment Form and the Control
  66    Dependence Graph
  67    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  68    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  69
  70    Lazy Code Motion
  71    J. Knoop, O. Ruthing, B. Steffen
  72    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  73
  74    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  75    Time for Reducible Flow Control
  76    Thomas Ball
  77    ACM Letters on Programming Languages and Systems,
  78    Vol. 2, Num. 1-4, Mar-Dec 1993
  79
  80    An Efficient Representation for Sparse Sets
  81    Preston Briggs, Linda Torczon
  82    ACM Letters on Programming Languages and Systems,
  83    Vol. 2, Num. 1-4, Mar-Dec 1993
  84
  85    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  86    K-H Drechsler, M.P. Stadel
  87    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  88
  89    Partial Dead Code Elimination
  90    J. Knoop, O. Ruthing, B. Steffen
  91    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  92
  93    Effective Partial Redundancy Elimination
  94    P. Briggs, K.D. Cooper
  95    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  96
  97    The Program Structure Tree: Computing Control Regions in Linear Time
  98    R. Johnson, D. Pearson, K. Pingali
  99    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
 100
 101    Optimal Code Motion: Theory and Practice
 102    J. Knoop, O. Ruthing, B. Steffen
 103    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
 104
 105    The power of assignment motion
 106    J. Knoop, O. Ruthing, B. Steffen
 107    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 108
 109    Global code motion / global value numbering
 110    C. Click
 111    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 112
 113    Value Driven Redundancy Elimination
 114    L.T. Simpson
 115    Rice University Ph.D. thesis, Apr. 1996
 116
 117    Value Numbering
 118    L.T. Simpson
 119    Massively Scalar Compiler Project, Rice University, Sep. 1996
 120
 121    High Performance Compilers for Parallel Computing
 122    Michael Wolfe
 123    Addison-Wesley, 1996
 124
 125    Advanced Compiler Design and Implementation
 126    Steven Muchnick
 127    Morgan Kaufmann, 1997
 128
 129    Building an Optimizing Compiler
 130    Robert Morgan
 131    Digital Press, 1998
 132
 133    People wishing to speed up the code here should read:
 134      Elimination Algorithms for Data Flow Analysis
 135      B.G. Ryder, M.C. Paull
 136      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 137
 138      How to Analyze Large Programs Efficiently and Informatively
 139      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 140      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 141
 142    People wishing to do something different can find various possibilities
 143    in the above papers and elsewhere.
 144 */
 145
 146 #include "config.h"
 147 #include "system.h"
 148 #include "toplev.h"
 149
 150 #include "rtl.h"
 151 #include "tm_p.h"
 152 #include "regs.h"
 153 #include "hard-reg-set.h"
 154 #include "flags.h"
 155 #include "real.h"
 156 #include "insn-config.h"
 157 #include "recog.h"
 158 #include "basic-block.h"
 159 #include "output.h"
 160 #include "function.h"
 161 #include "expr.h"
 162
 163 #include "obstack.h"
 164 #define obstack_chunk_alloc gmalloc
 165 #define obstack_chunk_free free
 166
 167 /* Maximum number of passes to perform.  */
 168 #define MAX_PASSES 1
 169
 170 /* Propagate flow information through back edges and thus enable PRE's
 171    moving loop invariant calculations out of loops.
 172
 173    Originally this tended to create worse overall code, but several
 174    improvements during the development of PRE seem to have made following
 175    back edges generally a win.
 176
 177    Note much of the loop invariant code motion done here would normally
 178    be done by loop.c, which has more heuristics for when to move invariants
 179    out of loops.  At some point we might need to move some of those
 180    heuristics into gcse.c.  */
 181 #define FOLLOW_BACK_EDGES 1
 182
 183 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 184    are a superset of those done by GCSE.
 185
 186    We perform the following steps:
 187
 188    1) Compute basic block information.
 189
 190    2) Compute table of places where registers are set.
 191
 192    3) Perform copy/constant propagation.
 193
 194    4) Perform global cse.
 195
 196    5) Perform another pass of copy/constant propagation.
 197
 198    Two passes of copy/constant propagation are done because the first one
 199    enables more GCSE and the second one helps to clean up the copies that
 200    GCSE creates.  This is needed more for PRE than for Classic because Classic
 201    GCSE will try to use an existing register containing the common
 202    subexpression rather than create a new one.  This is harder to do for PRE
 203    because of the code motion (which Classic GCSE doesn't do).
 204
 205    Expressions we are interested in GCSE-ing are of the form
 206    (set (pseudo-reg) (expression)).
 207    Function want_to_gcse_p says what these are.
 208
 209    PRE handles moving invariant expressions out of loops (by treating them as
 210    partially redundant).
 211
 212    Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
 213    assignment) based GVN (global value numbering).  L. T. Simpson's paper
 214    (Rice University) on value numbering is a useful reference for this.
 215
 216    **********************
 217
 218    We used to support multiple passes but there are diminishing returns in
 219    doing so.  The first pass usually makes 90% of the changes that are doable.
 220    A second pass can make a few more changes made possible by the first pass.
 221    Experiments show any further passes don't make enough changes to justify
 222    the expense.
 223
 224    A study of spec92 using an unlimited number of passes:
 225    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 226    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 227    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 228
 229    It was found doing copy propagation between each pass enables further
 230    substitutions.
 231
 232    PRE is quite expensive in complicated functions because the DFA can take
 233    awhile to converge.  Hence we only perform one pass.  Macro MAX_PASSES can
 234    be modified if one wants to experiment.
 235
 236    **********************
 237
 238    The steps for PRE are:
 239
 240    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 241
 242    2) Perform the data flow analysis for PRE.
 243
 244    3) Delete the redundant instructions
 245
 246    4) Insert the required copies [if any] that make the partially
 247       redundant instructions fully redundant.
 248
 249    5) For other reaching expressions, insert an instruction to copy the value
 250       to a newly created pseudo that will reach the redundant instruction.
 251
 252    The deletion is done first so that when we do insertions we
 253    know which pseudo reg to use.
 254
 255    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 256    argue it is not.  The number of iterations for the algorithm to converge
 257    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 258
 259    PRE GCSE depends heavily on the second CSE pass to clean up the copies
 260    we create.  To make an expression reach the place where it's redundant,
 261    the result of the expression is copied to a new register, and the redundant
 262    expression is deleted by replacing it with this new register.  Classic GCSE
 263    doesn't have this problem as much as it computes the reaching defs of
 264    each register in each block and thus can try to use an existing register.
 265
 266    **********************
 267
 268    A fair bit of simplicity is created by creating small functions for simple
 269    tasks, even when the function is only called in one place.  This may
 270    measurably slow things down [or may not] by creating more function call
 271    overhead than is necessary.  The source is laid out so that it's trivial
 272    to make the affected functions inline so that one can measure what speed
 273    up, if any, can be achieved, and maybe later when things settle things can
 274    be rearranged.
 275
 276    Help stamp out big monolithic functions!  */
 277 \f
 278 /* GCSE global vars.  */
 279
 280 /* -dG dump file.  */
 281 static FILE *gcse_file;
 282
 283 /* Note whether or not we should run jump optimization after gcse.  We
 284    want to do this for two cases.
 285
 286     * If we changed any jumps via cprop.
 287
 288     * If we added any labels via edge splitting.  */
 289
 290 static int run_jump_opt_after_gcse;
 291
 292 /* Bitmaps are normally not included in debugging dumps.
 293    However it's useful to be able to print them from GDB.
 294    We could create special functions for this, but it's simpler to
 295    just allow passing stderr to the dump_foo fns.  Since stderr can
 296    be a macro, we store a copy here.  */
 297 static FILE *debug_stderr;
 298
 299 /* An obstack for our working variables.  */
 300 static struct obstack gcse_obstack;
 301
 302 /* Non-zero for each mode that supports (set (reg) (reg)).
 303    This is trivially true for integer and floating point values.
 304    It may or may not be true for condition codes.  */
 305 static char can_copy_p[(int) NUM_MACHINE_MODES];
 306
 307 /* Non-zero if can_copy_p has been initialized.  */
 308 static int can_copy_init_p;
 309
 310 struct reg_use {
 311   rtx reg_rtx;
 312 };
 313
 314 /* Hash table of expressions.  */
 315
 316 struct expr
 317 {
 318   /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
 319   rtx expr;
 320   /* Index in the available expression bitmaps.  */
 321   int bitmap_index;
 322   /* Next entry with the same hash.  */
 323   struct expr *next_same_hash;
 324   /* List of anticipatable occurrences in basic blocks in the function.
 325      An "anticipatable occurrence" is one that is the first occurrence in the
 326      basic block, the operands are not modified in the basic block prior
 327      to the occurrence and the output is not used between the start of
 328      the block and the occurrence.  */
 329   struct occr *antic_occr;
 330   /* List of available occurrence in basic blocks in the function.
 331      An "available occurrence" is one that is the last occurrence in the
 332      basic block and the operands are not modified by following statements in
 333      the basic block [including this insn].  */
 334   struct occr *avail_occr;
 335   /* Non-null if the computation is PRE redundant.
 336      The value is the newly created pseudo-reg to record a copy of the
 337      expression in all the places that reach the redundant copy.  */
 338   rtx reaching_reg;
 339 };
 340
 341 /* Occurrence of an expression.
 342    There is one per basic block.  If a pattern appears more than once the
 343    last appearance is used [or first for anticipatable expressions].  */
 344
 345 struct occr
 346 {
 347   /* Next occurrence of this expression.  */
 348   struct occr *next;
 349   /* The insn that computes the expression.  */
 350   rtx insn;
 351   /* Non-zero if this [anticipatable] occurrence has been deleted.  */
 352   char deleted_p;
 353   /* Non-zero if this [available] occurrence has been copied to
 354      reaching_reg.  */
 355   /* ??? This is mutually exclusive with deleted_p, so they could share
 356      the same byte.  */
 357   char copied_p;
 358 };
 359
 360 /* Expression and copy propagation hash tables.
 361    Each hash table is an array of buckets.
 362    ??? It is known that if it were an array of entries, structure elements
 363    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 364    not clear whether in the final analysis a sufficient amount of memory would
 365    be saved as the size of the available expression bitmaps would be larger
 366    [one could build a mapping table without holes afterwards though].
 367    Someday I'll perform the computation and figure it out.
 368 */
 369
 370 /* Total size of the expression hash table, in elements.  */
 371 static int expr_hash_table_size;
 372 /* The table itself.
 373    This is an array of `expr_hash_table_size' elements.  */
 374 static struct expr **expr_hash_table;
 375
 376 /* Total size of the copy propagation hash table, in elements.  */
 377 static int set_hash_table_size;
 378 /* The table itself.
 379    This is an array of `set_hash_table_size' elements.  */
 380 static struct expr **set_hash_table;
 381
 382 /* Mapping of uids to cuids.
 383    Only real insns get cuids.  */
 384 static int *uid_cuid;
 385
 386 /* Highest UID in UID_CUID.  */
 387 static int max_uid;
 388
 389 /* Get the cuid of an insn.  */
 390 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
 391
 392 /* Number of cuids.  */
 393 static int max_cuid;
 394
 395 /* Mapping of cuids to insns.  */
 396 static rtx *cuid_insn;
 397
 398 /* Get insn from cuid.  */
 399 #define CUID_INSN(CUID) (cuid_insn[CUID])
 400
 401 /* Maximum register number in function prior to doing gcse + 1.
 402    Registers created during this pass have regno >= max_gcse_regno.
 403    This is named with "gcse" to not collide with global of same name.  */
 404 static int max_gcse_regno;
 405
 406 /* Maximum number of cse-able expressions found.  */
 407 static int n_exprs;
 408 /* Maximum number of assignments for copy propagation found.  */
 409 static int n_sets;
 410
 411 /* Table of registers that are modified.
 412    For each register, each element is a list of places where the pseudo-reg
 413    is set.
 414
 415    For simplicity, GCSE is done on sets of pseudo-regs only.  PRE GCSE only
 416    requires knowledge of which blocks kill which regs [and thus could use
 417    a bitmap instead of the lists `reg_set_table' uses].
 418
 419    `reg_set_table' and could be turned into an array of bitmaps
 420    (num-bbs x num-regs)
 421    [however perhaps it may be useful to keep the data as is].
 422    One advantage of recording things this way is that `reg_set_table' is
 423    fairly sparse with respect to pseudo regs but for hard regs could be
 424    fairly dense [relatively speaking].
 425    And recording sets of pseudo-regs in lists speeds
 426    up functions like compute_transp since in the case of pseudo-regs we only
 427    need to iterate over the number of times a pseudo-reg is set, not over the
 428    number of basic blocks [clearly there is a bit of a slow down in the cases
 429    where a pseudo is set more than once in a block, however it is believed
 430    that the net effect is to speed things up].  This isn't done for hard-regs
 431    because recording call-clobbered hard-regs in `reg_set_table' at each
 432    function call can consume a fair bit of memory, and iterating over hard-regs
 433    stored this way in compute_transp will be more expensive.  */
 434
 435 typedef struct reg_set {
 436   /* The next setting of this register.  */
 437   struct reg_set *next;
 438   /* The insn where it was set.  */
 439   rtx insn;
 440 } reg_set;
 441 static reg_set **reg_set_table;
 442 /* Size of `reg_set_table'.
 443    The table starts out at max_gcse_regno + slop, and is enlarged as
 444    necessary.  */
 445 static int reg_set_table_size;
 446 /* Amount to grow `reg_set_table' by when it's full.  */
 447 #define REG_SET_TABLE_SLOP 100
 448
 449 /* Bitmap containing one bit for each register in the program.
 450    Used when performing GCSE to track which registers have been set since
 451    the start of the basic block.  */
 452 static sbitmap reg_set_bitmap;
 453
 454 /* For each block, a bitmap of registers set in the block.
 455    This is used by expr_killed_p and compute_transp.
 456    It is computed during hash table computation and not by compute_sets
 457    as it includes registers added since the last pass (or between cprop and
 458    gcse) and it's currently not easy to realloc sbitmap vectors.  */
 459 static sbitmap *reg_set_in_block;
 460
 461 /* For each block, non-zero if memory is set in that block.
 462    This is computed during hash table computation and is used by
 463    expr_killed_p and compute_transp.
 464    ??? Handling of memory is very simple, we don't make any attempt
 465    to optimize things (later).
 466    ??? This can be computed by compute_sets since the information
 467    doesn't change.  */
 468 static char *mem_set_in_block;
 469
 470 /* Various variables for statistics gathering.  */
 471
 472 /* Memory used in a pass.
 473    This isn't intended to be absolutely precise.  Its intent is only
 474    to keep an eye on memory usage.  */
 475 static int bytes_used;
 476 /* GCSE substitutions made.  */
 477 static int gcse_subst_count;
 478 /* Number of copy instructions created.  */
 479 static int gcse_create_count;
 480 /* Number of constants propagated.  */
 481 static int const_prop_count;
 482 /* Number of copys propagated.  */
 483 static int copy_prop_count;
 484 \f
 485 /* These variables are used by classic GCSE.
 486    Normally they'd be defined a bit later, but `rd_gen' needs to
 487    be declared sooner.  */
 488
 489 /* A bitmap of all ones for implementing the algorithm for available
 490    expressions and reaching definitions.  */
 491 /* ??? Available expression bitmaps have a different size than reaching
 492    definition bitmaps.  This should be the larger of the two, however, it
 493    is not currently used for reaching definitions.  */
 494 static sbitmap u_bitmap;
 495
 496 /* Each block has a bitmap of each type.
 497    The length of each blocks bitmap is:
 498
 499        max_cuid  - for reaching definitions
 500        n_exprs - for available expressions
 501
 502    Thus we view the bitmaps as 2 dimensional arrays.  i.e.
 503    rd_kill[block_num][cuid_num]
 504    ae_kill[block_num][expr_num]
 505 */
 506
 507 /* For reaching defs */
 508 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
 509
 510 /* for available exprs */
 511 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
 512
 513 /* Objects of this type are passed around by the null-pointer check
 514    removal routines.  */
 515 struct null_pointer_info {
 516   /* The basic block being processed.  */
 517   int current_block;
 518   /* The first register to be handled in this pass.  */
 519   int min_reg;
 520   /* One greater than the last register to be handled in this pass.  */
 521   int max_reg;
 522   sbitmap *nonnull_local;
 523   sbitmap *nonnull_killed;
 524 };
 525 \f
 526 static void compute_can_copy      PROTO ((void));
 527
 528 static char *gmalloc              PROTO ((unsigned int));
 529 static char *grealloc            PROTO ((char *, unsigned int));
 530 static char *gcse_alloc        PROTO ((unsigned long));
 531 static void alloc_gcse_mem          PROTO ((rtx));
 532 static void free_gcse_mem            PROTO ((void));
 533 static void alloc_reg_set_mem    PROTO ((int));
 534 static void free_reg_set_mem      PROTO ((void));
 535 static int get_bitmap_width           PROTO ((int, int, int));
 536 static void record_one_set          PROTO ((int, rtx));
 537 static void record_set_info        PROTO ((rtx, rtx, void *));
 538 static void compute_sets              PROTO ((rtx));
 539
 540 static void hash_scan_insn          PROTO ((rtx, int, int));
 541 static void hash_scan_set            PROTO ((rtx, rtx, int));
 542 static void hash_scan_clobber    PROTO ((rtx, rtx));
 543 static void hash_scan_call          PROTO ((rtx, rtx));
 544 static int want_to_gcse_p            PROTO ((rtx));
 545 static int oprs_unchanged_p        PROTO ((rtx, rtx, int));
 546 static int oprs_anticipatable_p       PROTO ((rtx, rtx));
 547 static int oprs_available_p        PROTO ((rtx, rtx));
 548 static void insert_expr_in_table      PROTO ((rtx, enum machine_mode,
 549                                               rtx, int, int));
 550 static void insert_set_in_table       PROTO ((rtx, rtx));
 551 static unsigned int hash_expr    PROTO ((rtx, enum machine_mode,
 552                                          int *, int));
 553 static unsigned int hash_expr_1       PROTO ((rtx, enum machine_mode, int *));
 554 static unsigned int hash_set      PROTO ((int, int));
 555 static int expr_equiv_p        PROTO ((rtx, rtx));
 556 static void record_last_reg_set_info  PROTO ((rtx, int));
 557 static void record_last_mem_set_info  PROTO ((rtx));
 558 static void record_last_set_info      PROTO ((rtx, rtx, void *));
 559 static void compute_hash_table  PROTO ((int));
 560 static void alloc_set_hash_table      PROTO ((int));
 561 static void free_set_hash_table       PROTO ((void));
 562 static void compute_set_hash_table    PROTO ((void));
 563 static void alloc_expr_hash_table     PROTO ((int));
 564 static void free_expr_hash_table      PROTO ((void));
 565 static void compute_expr_hash_table   PROTO ((void));
 566 static void dump_hash_table        PROTO ((FILE *, const char *, struct expr **,
 567                                            int, int));
 568 static struct expr *lookup_expr       PROTO ((rtx));
 569 static struct expr *lookup_set  PROTO ((int, rtx));
 570 static struct expr *next_set      PROTO ((int, struct expr *));
 571 static void reset_opr_set_tables      PROTO ((void));
 572 static int oprs_not_set_p            PROTO ((rtx, rtx));
 573 static void mark_call            PROTO ((rtx));
 574 static void mark_set              PROTO ((rtx, rtx));
 575 static void mark_clobber              PROTO ((rtx, rtx));
 576 static void mark_oprs_set            PROTO ((rtx));
 577
 578 static void alloc_cprop_mem        PROTO ((int, int));
 579 static void free_cprop_mem          PROTO ((void));
 580 static void compute_transp          PROTO ((rtx, int, sbitmap *, int));
 581 static void compute_transpout       PROTO ((void));
 582 static void compute_local_properties  PROTO ((sbitmap *, sbitmap *,
 583                                               sbitmap *, int));
 584 static void compute_cprop_avinout     PROTO ((void));
 585 static void compute_cprop_data  PROTO ((void));
 586 static void find_used_regs          PROTO ((rtx));
 587 static int try_replace_reg          PROTO ((rtx, rtx, rtx));
 588 static struct expr *find_avail_set    PROTO ((int, rtx));
 589 static int cprop_jump                   PROTO((rtx, rtx, struct reg_use *, rtx));
 590 #ifdef HAVE_cc0
 591 static int cprop_cc0_jump               PROTO((rtx, struct reg_use *, rtx));
 592 #endif
 593 static int cprop_insn            PROTO ((rtx, int));
 594 static int cprop                      PROTO ((int));
 595 static int one_cprop_pass            PROTO ((int, int));
 596
 597 static void alloc_pre_mem            PROTO ((int, int));
 598 static void free_pre_mem              PROTO ((void));
 599 static void compute_pre_data      PROTO ((void));
 600 static int pre_expr_reaches_here_p    PROTO ((int, struct expr *,
 601                                               int, int));
 602 static void insert_insn_end_bb  PROTO ((struct expr *, int, int));
 603 static void pre_insert_copy_insn      PROTO ((struct expr *, rtx));
 604 static void pre_insert_copies    PROTO ((void));
 605 static int pre_delete            PROTO ((void));
 606 static int pre_gcse                PROTO ((void));
 607 static int one_pre_gcse_pass      PROTO ((int));
 608
 609 static void add_label_notes           PROTO ((rtx, rtx));
 610
 611 static void alloc_code_hoist_mem        PROTO ((int, int));
 612 static void free_code_hoist_mem         PROTO ((void));
 613 static void compute_code_hoist_vbeinout PROTO ((void));
 614 static void compute_code_hoist_data     PROTO ((void));
 615 static int hoist_expr_reaches_here_p    PROTO ((int, int, int, char *));
 616 static void hoist_code                  PROTO ((void));
 617 static int one_code_hoisting_pass       PROTO ((void));
 618
 619 static void alloc_rd_mem              PROTO ((int, int));
 620 static void free_rd_mem        PROTO ((void));
 621 static void handle_rd_kill_set  PROTO ((rtx, int, int));
 622 static void compute_kill_rd        PROTO ((void));
 623 static void compute_rd          PROTO ((void));
 624 static void alloc_avail_expr_mem      PROTO ((int, int));
 625 static void free_avail_expr_mem       PROTO ((void));
 626 static void compute_ae_gen          PROTO ((void));
 627 static int expr_killed_p              PROTO ((rtx, int));
 628 static void compute_ae_kill        PROTO ((sbitmap *, sbitmap *));
 629 static int expr_reaches_here_p  PROTO ((struct occr *, struct expr *,
 630                                               int, int));
 631 static rtx computing_insn            PROTO ((struct expr *, rtx));
 632 static int def_reaches_here_p    PROTO ((rtx, rtx));
 633 static int can_disregard_other_sets   PROTO ((struct reg_set **, rtx, int));
 634 static int handle_avail_expr      PROTO ((rtx, struct expr *));
 635 static int classic_gcse        PROTO ((void));
 636 static int one_classic_gcse_pass      PROTO ((int));
 637 static void invalidate_nonnull_info     PROTO ((rtx, rtx, void *));
 638 static void delete_null_pointer_checks_1 PROTO ((int_list_ptr *, int *,
 639                                                  sbitmap *, sbitmap *,
 640                                                  struct null_pointer_info *));
 641 static rtx process_insert_insn  PROTO ((struct expr *));
 642 static int pre_edge_insert      PROTO ((struct edge_list *, struct expr **));
 643 static int expr_reaches_here_p_work     PROTO ((struct occr *, struct expr *, int, int, char *));
 644 static int pre_expr_reaches_here_p_work PROTO ((int, struct expr *, int, int, char *));
 645 \f
 646 /* Entry point for global common subexpression elimination.
 647    F is the first instruction in the function.  */
 648
 649 int
 650 gcse_main (f, file)
 651      rtx f;
 652      FILE *file;
 653 {
 654   int changed, pass;
 655   /* Bytes used at start of pass.  */
 656   int initial_bytes_used;
 657   /* Maximum number of bytes used by a pass.  */
 658   int max_pass_bytes;
 659   /* Point to release obstack data from for each pass.  */
 660   char *gcse_obstack_bottom;
 661
 662   /* We do not construct an accurate cfg in functions which call
 663      setjmp, so just punt to be safe.  */
 664   if (current_function_calls_setjmp)
 665     return 0;
 666
 667   /* Assume that we do not need to run jump optimizations after gcse.  */
 668   run_jump_opt_after_gcse = 0;
 669
 670   /* For calling dump_foo fns from gdb.  */
 671   debug_stderr = stderr;
 672   gcse_file = file;
 673
 674   /* Identify the basic block information for this function, including
 675      successors and predecessors.  */
 676   max_gcse_regno = max_reg_num ();
 677   find_basic_blocks (f, max_gcse_regno, file, 1);
 678
 679   if (file)
 680     dump_flow_info (file);
 681
 682   /* Return if there's nothing to do.  */
 683   if (n_basic_blocks <= 1)
 684     {
 685       /* Free storage allocated by find_basic_blocks.  */
 686       free_basic_block_vars (0);
 687       return 0;
 688     }
 689
 690   /* Trying to perform global optimizations on flow graphs which have
 691      a high connectivity will take a long time and is unlikely to be
 692      particularly useful.
 693
 694      In normal circumstances a cfg should have about twice has many edges
 695      as blocks.  But we do not want to punish small functions which have
 696      a couple switch statements.  So we require a relatively large number
 697      of basic blocks and the ratio of edges to blocks to be high.  */
 698   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
 699     {
 700       /* Free storage allocated by find_basic_blocks.  */
 701       free_basic_block_vars (0);
 702       return 0;
 703     }
 704
 705   /* See what modes support reg/reg copy operations.  */
 706   if (! can_copy_init_p)
 707     {
 708       compute_can_copy ();
 709       can_copy_init_p = 1;
 710     }
 711
 712   gcc_obstack_init (&gcse_obstack);
 713   bytes_used = 0;
 714
 715   /* Record where pseudo-registers are set.
 716      This data is kept accurate during each pass.
 717      ??? We could also record hard-reg information here
 718      [since it's unchanging], however it is currently done during
 719      hash table computation.
 720
 721      It may be tempting to compute MEM set information here too, but MEM
 722      sets will be subject to code motion one day and thus we need to compute
 723      information about memory sets when we build the hash tables.  */
 724
 725   alloc_reg_set_mem (max_gcse_regno);
 726   compute_sets (f);
 727
 728   pass = 0;
 729   initial_bytes_used = bytes_used;
 730   max_pass_bytes = 0;
 731   gcse_obstack_bottom = gcse_alloc (1);
 732   changed = 1;
 733   while (changed && pass < MAX_PASSES)
 734     {
 735       changed = 0;
 736       if (file)
 737         fprintf (file, "GCSE pass %d\n\n", pass + 1);
 738
 739       /* Initialize bytes_used to the space for the pred/succ lists,
 740          and the reg_set_table data.  */
 741       bytes_used = initial_bytes_used;
 742
 743       /* Each pass may create new registers, so recalculate each time.  */
 744       max_gcse_regno = max_reg_num ();
 745
 746       alloc_gcse_mem (f);
 747
 748       /* Don't allow constant propagation to modify jumps
 749          during this pass.  */
 750       changed = one_cprop_pass (pass + 1, 0);
 751
 752       if (optimize_size)
 753         changed |= one_classic_gcse_pass (pass + 1);
 754       else
 755         {
 756           changed |= one_pre_gcse_pass (pass + 1);
 757           free_reg_set_mem ();
 758           alloc_reg_set_mem (max_reg_num ());
 759           compute_sets (f);
 760           run_jump_opt_after_gcse = 1;
 761         }
 762
 763       if (max_pass_bytes < bytes_used)
 764         max_pass_bytes = bytes_used;
 765
 766       /* Free up memory, then reallocate for code hoisting.  We can
 767          not re-use the existing allocated memory because the tables
 768          will not have info for the insns or registers created by
 769          partial redundancy elimination.  */
 770       free_gcse_mem ();
 771
 772       /* It does not make sense to run code hoisting unless we optimizing
 773          for code size -- it rarely makes programs faster, and can make
 774          them bigger if we did partial redundancy elimination (when optimizing
 775          for space, we use a classic gcse algorithm instead of partial
 776          redundancy algorithms).  */
 777       if (optimize_size)
 778         {
 779           max_gcse_regno = max_reg_num ();
 780           alloc_gcse_mem (f);
 781           changed |= one_code_hoisting_pass ();
 782           free_gcse_mem ();
 783
 784           if (max_pass_bytes < bytes_used)
 785             max_pass_bytes = bytes_used;
 786         }
 787
 788       if (file)
 789         {
 790           fprintf (file, "\n");
 791           fflush (file);
 792         }
 793       obstack_free (&gcse_obstack, gcse_obstack_bottom);
 794       pass++;
 795     }
 796
 797   /* Do one last pass of copy propagation, including cprop into
 798      conditional jumps.  */
 799
 800   max_gcse_regno = max_reg_num ();
 801   alloc_gcse_mem (f);
 802   /* This time, go ahead and allow cprop to alter jumps.  */
 803   one_cprop_pass (pass + 1, 1);
 804   free_gcse_mem ();
 805
 806   if (file)
 807     {
 808       fprintf (file, "GCSE of %s: %d basic blocks, ",
 809                current_function_name, n_basic_blocks);
 810       fprintf (file, "%d pass%s, %d bytes\n\n",
 811                pass, pass > 1 ? "es" : "", max_pass_bytes);
 812     }
 813
 814   /* Free our obstack.  */
 815   obstack_free (&gcse_obstack, NULL_PTR);
 816   /* Free reg_set_table.  */
 817   free_reg_set_mem ();
 818   /* Free storage used to record predecessor/successor data.  */
 819   free_bb_mem ();
 820   /* Free storage allocated by find_basic_blocks.  */
 821   free_basic_block_vars (0);
 822   return run_jump_opt_after_gcse;
 823 }
 824 \f
 825 /* Misc. utilities.  */
 826
 827 /* Compute which modes support reg/reg copy operations.  */
 828
 829 static void
 830 compute_can_copy ()
 831 {
 832   int i;
 833 #ifndef AVOID_CCMODE_COPIES
 834   rtx reg,insn;
 835 #endif
 836   char *free_point = (char *) oballoc (1);
 837
 838   bzero (can_copy_p, NUM_MACHINE_MODES);
 839
 840   start_sequence ();
 841   for (i = 0; i < NUM_MACHINE_MODES; i++)
 842     {
 843       switch (GET_MODE_CLASS (i))
 844         {
 845         case MODE_CC :
 846 #ifdef AVOID_CCMODE_COPIES
 847           can_copy_p[i] = 0;
 848 #else
 849           reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 850           insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 851           if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
 852             can_copy_p[i] = 1;
 853 #endif
 854           break;
 855         default :
 856           can_copy_p[i] = 1;
 857           break;
 858         }
 859     }
 860   end_sequence ();
 861
 862   /* Free the objects we just allocated.  */
 863   obfree (free_point);
 864 }
 865 \f
 866 /* Cover function to xmalloc to record bytes allocated.  */
 867
 868 static char *
 869 gmalloc (size)
 870      unsigned int size;
 871 {
 872   bytes_used += size;
 873   return xmalloc (size);
 874 }
 875
 876 /* Cover function to xrealloc.
 877    We don't record the additional size since we don't know it.
 878    It won't affect memory usage stats much anyway.  */
 879
 880 static char *
 881 grealloc (ptr, size)
 882      char *ptr;
 883      unsigned int size;
 884 {
 885   return xrealloc (ptr, size);
 886 }
 887
 888 /* Cover function to obstack_alloc.
 889    We don't need to record the bytes allocated here since
 890    obstack_chunk_alloc is set to gmalloc.  */
 891
 892 static char *
 893 gcse_alloc (size)
 894      unsigned long size;
 895 {
 896   return (char *) obstack_alloc (&gcse_obstack, size);
 897 }
 898
 899 /* Allocate memory for the cuid mapping array,
 900    and reg/memory set tracking tables.
 901
 902    This is called at the start of each pass.  */
 903
 904 static void
 905 alloc_gcse_mem (f)
 906      rtx f;
 907 {
 908   int i,n;
 909   rtx insn;
 910
 911   /* Find the largest UID and create a mapping from UIDs to CUIDs.
 912      CUIDs are like UIDs except they increase monotonically, have no gaps,
 913      and only apply to real insns.  */
 914
 915   max_uid = get_max_uid ();
 916   n = (max_uid + 1) * sizeof (int);
 917   uid_cuid = (int *) gmalloc (n);
 918   bzero ((char *) uid_cuid, n);
 919   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 920     {
 921       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
 922         INSN_CUID (insn) = i++;
 923       else
 924         INSN_CUID (insn) = i;
 925     }
 926
 927   /* Create a table mapping cuids to insns.  */
 928
 929   max_cuid = i;
 930   n = (max_cuid + 1) * sizeof (rtx);
 931   cuid_insn = (rtx *) gmalloc (n);
 932   bzero ((char *) cuid_insn, n);
 933   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 934     {
 935       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
 936         {
 937           CUID_INSN (i) = insn;
 938           i++;
 939         }
 940     }
 941
 942   /* Allocate vars to track sets of regs.  */
 943
 944   reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
 945
 946   /* Allocate vars to track sets of regs, memory per block.  */
 947
 948   reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
 949                                                        max_gcse_regno);
 950   mem_set_in_block = (char *) gmalloc (n_basic_blocks);
 951 }
 952
 953 /* Free memory allocated by alloc_gcse_mem.  */
 954
 955 static void
 956 free_gcse_mem ()
 957 {
 958   free (uid_cuid);
 959   free (cuid_insn);
 960
 961   free (reg_set_bitmap);
 962
 963   free (reg_set_in_block);
 964   free (mem_set_in_block);
 965 }
 966
 967 /* Many of the global optimization algorithms work by solving dataflow
 968    equations for various expressions.  Initially, some local value is
 969    computed for each expression in each block.  Then, the values
 970    across the various blocks are combined (by following flow graph
 971    edges) to arrive at global values.  Conceptually, each set of
 972    equations is independent.  We may therefore solve all the equations
 973    in parallel, solve them one at a time, or pick any intermediate
 974    approach.
 975
 976    When you're going to need N two-dimensional bitmaps, each X (say,
 977    the number of blocks) by Y (say, the number of expressions), call
 978    this function.  It's not important what X and Y represent; only
 979    that Y correspond to the things that can be done in parallel.  This
 980    function will return an appropriate chunking factor C; you should
 981    solve C sets of equations in parallel.  By going through this
 982    function, we can easily trade space against time; by solving fewer
 983    equations in parallel we use less space.  */
 984
 985 static int
 986 get_bitmap_width (n, x, y)
 987      int n;
 988      int x;
 989      int y;
 990 {
 991   /* It's not really worth figuring out *exactly* how much memory will
 992      be used by a particular choice.  The important thing is to get
 993      something approximately right.  */
 994   size_t max_bitmap_memory = 10 * 1024 * 1024;
 995
 996   /* The number of bytes we'd use for a single column of minimum
 997      width.  */
 998   size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
 999
1000   /* Often, it's reasonable just to solve all the equations in
1001      parallel.  */
1002   if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
1003     return y;
1004
1005   /* Otherwise, pick the largest width we can, without going over the
1006      limit.  */
1007   return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
1008                              / column_size);
1009 }
1010
1011 \f
1012 /* Compute the local properties of each recorded expression.
1013    Local properties are those that are defined by the block, irrespective
1014    of other blocks.
1015
1016    An expression is transparent in a block if its operands are not modified
1017    in the block.
1018
1019    An expression is computed (locally available) in a block if it is computed
1020    at least once and expression would contain the same value if the
1021    computation was moved to the end of the block.
1022
1023    An expression is locally anticipatable in a block if it is computed at
1024    least once and expression would contain the same value if the computation
1025    was moved to the beginning of the block.
1026
1027    We call this routine for cprop, pre and code hoisting.  They all
1028    compute basically the same information and thus can easily share
1029    this code.
1030
1031    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording
1032    local properties.  If NULL, then it is not necessary to compute
1033    or record that particular property.
1034
1035    SETP controls which hash table to look at.  If zero, this routine
1036    looks at the expr hash table; if nonzero this routine looks at
1037    the set hash table.  Additionally, TRANSP is computed as ~TRANSP,
1038    since this is really cprop's ABSALTERED.  */
1039
1040 static void
1041 compute_local_properties (transp, comp, antloc, setp)
1042      sbitmap *transp;
1043      sbitmap *comp;
1044      sbitmap *antloc;
1045      int setp;
1046 {
1047   int i, hash_table_size;
1048   struct expr **hash_table;
1049
1050   /* Initialize any bitmaps that were passed in.  */
1051   if (transp)
1052     {
1053       if (setp)
1054         sbitmap_vector_zero (transp, n_basic_blocks);
1055       else
1056         sbitmap_vector_ones (transp, n_basic_blocks);
1057     }
1058   if (comp)
1059     sbitmap_vector_zero (comp, n_basic_blocks);
1060   if (antloc)
1061     sbitmap_vector_zero (antloc, n_basic_blocks);
1062
1063   /* We use the same code for cprop, pre and hoisting.  For cprop
1064      we care about the set hash table, for pre and hoisting we
1065      care about the expr hash table.  */
1066   hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1067   hash_table = setp ? set_hash_table : expr_hash_table;
1068
1069   for (i = 0; i < hash_table_size; i++)
1070     {
1071       struct expr *expr;
1072
1073       for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1074         {
1075           struct occr *occr;
1076           int indx = expr->bitmap_index;
1077
1078           /* The expression is transparent in this block if it is not killed.
1079              We start by assuming all are transparent [none are killed], and
1080              then reset the bits for those that are.  */
1081
1082           if (transp)
1083             compute_transp (expr->expr, indx, transp, setp);
1084
1085           /* The occurrences recorded in antic_occr are exactly those that
1086              we want to set to non-zero in ANTLOC.  */
1087
1088           if (antloc)
1089             {
1090               for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1091                 {
1092                   int bb = BLOCK_NUM (occr->insn);
1093                   SET_BIT (antloc[bb], indx);
1094
1095                   /* While we're scanning the table, this is a good place to
1096                      initialize this.  */
1097                   occr->deleted_p = 0;
1098                 }
1099             }
1100
1101           /* The occurrences recorded in avail_occr are exactly those that
1102              we want to set to non-zero in COMP.  */
1103           if (comp)
1104             {
1105
1106               for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1107                 {
1108                   int bb = BLOCK_NUM (occr->insn);
1109                   SET_BIT (comp[bb], indx);
1110
1111                   /* While we're scanning the table, this is a good place to
1112                      initialize this.  */
1113                   occr->copied_p = 0;
1114                 }
1115             }
1116
1117           /* While we're scanning the table, this is a good place to
1118              initialize this.  */
1119           expr->reaching_reg = 0;
1120         }
1121     }
1122 }
1123
1124 \f
1125 /* Register set information.
1126
1127    `reg_set_table' records where each register is set or otherwise
1128    modified.  */
1129
1130 static struct obstack reg_set_obstack;
1131
1132 static void
1133 alloc_reg_set_mem (n_regs)
1134      int n_regs;
1135 {
1136   int n;
1137
1138   reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1139   n = reg_set_table_size * sizeof (struct reg_set *);
1140   reg_set_table = (struct reg_set **) gmalloc (n);
1141   bzero ((char *) reg_set_table, n);
1142
1143   gcc_obstack_init (&reg_set_obstack);
1144 }
1145
1146 static void
1147 free_reg_set_mem ()
1148 {
1149   free (reg_set_table);
1150   obstack_free (&reg_set_obstack, NULL_PTR);
1151 }
1152
1153 /* Record REGNO in the reg_set table.  */
1154
1155 static void
1156 record_one_set (regno, insn)
1157      int regno;
1158      rtx insn;
1159 {
1160   /* allocate a new reg_set element and link it onto the list */
1161   struct reg_set *new_reg_info, *reg_info_ptr1, *reg_info_ptr2;
1162
1163   /* If the table isn't big enough, enlarge it.  */
1164   if (regno >= reg_set_table_size)
1165     {
1166       int new_size = regno + REG_SET_TABLE_SLOP;
1167       reg_set_table = (struct reg_set **)
1168         grealloc ((char *) reg_set_table,
1169                   new_size * sizeof (struct reg_set *));
1170       bzero ((char *) (reg_set_table + reg_set_table_size),
1171              (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1172       reg_set_table_size = new_size;
1173     }
1174
1175   new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1176                                                    sizeof (struct reg_set));
1177   bytes_used += sizeof (struct reg_set);
1178   new_reg_info->insn = insn;
1179   new_reg_info->next = NULL;
1180   if (reg_set_table[regno] == NULL)
1181     reg_set_table[regno] = new_reg_info;
1182   else
1183     {
1184       reg_info_ptr1 = reg_info_ptr2 = reg_set_table[regno];
1185       /* ??? One could keep a "last" pointer to speed this up.  */
1186       while (reg_info_ptr1 != NULL)
1187         {
1188           reg_info_ptr2 = reg_info_ptr1;
1189           reg_info_ptr1 = reg_info_ptr1->next;
1190         }
1191       reg_info_ptr2->next = new_reg_info;
1192     }
1193 }
1194
1195 /* Called from compute_sets via note_stores to handle one
1196    SET or CLOBBER in an insn.  The DATA is really the instruction
1197    in which the SET is occurring.  */
1198
1199 static void
1200 record_set_info (dest, setter, data)
1201      rtx dest, setter ATTRIBUTE_UNUSED;
1202      void *data;
1203 {
1204   rtx record_set_insn = (rtx) data;
1205
1206   if (GET_CODE (dest) == SUBREG)
1207     dest = SUBREG_REG (dest);
1208
1209   if (GET_CODE (dest) == REG)
1210     {
1211       if (REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1212         record_one_set (REGNO (dest), record_set_insn);
1213     }
1214 }
1215
1216 /* Scan the function and record each set of each pseudo-register.
1217
1218    This is called once, at the start of the gcse pass.
1219    See the comments for `reg_set_table' for further docs.  */
1220
1221 static void
1222 compute_sets (f)
1223      rtx f;
1224 {
1225   rtx insn = f;
1226
1227   while (insn)
1228     {
1229       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
1230         note_stores (PATTERN (insn), record_set_info, insn);
1231       insn = NEXT_INSN (insn);
1232     }
1233 }
1234 \f
1235 /* Hash table support.  */
1236
1237 #define NEVER_SET -1
1238
1239 /* For each register, the cuid of the first/last insn in the block to set it,
1240    or -1 if not set.  */
1241 static int *reg_first_set;
1242 static int *reg_last_set;
1243
1244 /* While computing "first/last set" info, this is the CUID of first/last insn
1245    to set memory or -1 if not set.  `mem_last_set' is also used when
1246    performing GCSE to record whether memory has been set since the beginning
1247    of the block.
1248    Note that handling of memory is very simple, we don't make any attempt
1249    to optimize things (later).  */
1250 static int mem_first_set;
1251 static int mem_last_set;
1252
1253 /* Perform a quick check whether X, the source of a set, is something
1254    we want to consider for GCSE.  */
1255
1256 static int
1257 want_to_gcse_p (x)
1258      rtx x;
1259 {
1260   enum rtx_code code = GET_CODE (x);
1261
1262   switch (code)
1263     {
1264     case REG:
1265     case SUBREG:
1266     case CONST_INT:
1267     case CONST_DOUBLE:
1268     case CALL:
1269       return 0;
1270
1271     default:
1272       break;
1273     }
1274
1275   return 1;
1276 }
1277
1278 /* Return non-zero if the operands of expression X are unchanged from the
1279    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1280    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
1281
1282 static int
1283 oprs_unchanged_p (x, insn, avail_p)
1284      rtx x, insn;
1285      int avail_p;
1286 {
1287   int i;
1288   enum rtx_code code;
1289   const char *fmt;
1290
1291   /* repeat is used to turn tail-recursion into iteration.  */
1292  repeat:
1293
1294   if (x == 0)
1295     return 1;
1296
1297   code = GET_CODE (x);
1298   switch (code)
1299     {
1300     case REG:
1301       if (avail_p)
1302         return (reg_last_set[REGNO (x)] == NEVER_SET
1303                 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1304       else
1305         return (reg_first_set[REGNO (x)] == NEVER_SET
1306                 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1307
1308     case MEM:
1309       if (avail_p)
1310         {
1311           if (mem_last_set != NEVER_SET
1312               && mem_last_set >= INSN_CUID (insn))
1313             return 0;
1314         }
1315       else
1316         {
1317           if (mem_first_set != NEVER_SET
1318               && mem_first_set < INSN_CUID (insn))
1319             return 0;
1320         }
1321       x = XEXP (x, 0);
1322       goto repeat;
1323
1324     case PRE_DEC:
1325     case PRE_INC:
1326     case POST_DEC:
1327     case POST_INC:
1328       return 0;
1329
1330     case PC:
1331     case CC0: /*FIXME*/
1332     case CONST:
1333     case CONST_INT:
1334     case CONST_DOUBLE:
1335     case SYMBOL_REF:
1336     case LABEL_REF:
1337     case ADDR_VEC:
1338     case ADDR_DIFF_VEC:
1339       return 1;
1340
1341     default:
1342       break;
1343     }
1344
1345   i = GET_RTX_LENGTH (code) - 1;
1346   fmt = GET_RTX_FORMAT (code);
1347   for (; i >= 0; i--)
1348     {
1349       if (fmt[i] == 'e')
1350         {
1351           rtx tem = XEXP (x, i);
1352
1353           /* If we are about to do the last recursive call
1354              needed at this level, change it into iteration.
1355              This function is called enough to be worth it.  */
1356           if (i == 0)
1357             {
1358               x = tem;
1359               goto repeat;
1360             }
1361           if (! oprs_unchanged_p (tem, insn, avail_p))
1362             return 0;
1363         }
1364       else if (fmt[i] == 'E')
1365         {
1366           int j;
1367           for (j = 0; j < XVECLEN (x, i); j++)
1368             {
1369               if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1370                 return 0;
1371             }
1372         }
1373     }
1374
1375   return 1;
1376 }
1377
1378 /* Return non-zero if the operands of expression X are unchanged from
1379    the start of INSN's basic block up to but not including INSN.  */
1380
1381 static int
1382 oprs_anticipatable_p (x, insn)
1383      rtx x, insn;
1384 {
1385   return oprs_unchanged_p (x, insn, 0);
1386 }
1387
1388 /* Return non-zero if the operands of expression X are unchanged from
1389    INSN to the end of INSN's basic block.  */
1390
1391 static int
1392 oprs_available_p (x, insn)
1393      rtx x, insn;
1394 {
1395   return oprs_unchanged_p (x, insn, 1);
1396 }
1397
1398 /* Hash expression X.
1399    MODE is only used if X is a CONST_INT.
1400    A boolean indicating if a volatile operand is found or if the expression
1401    contains something we don't want to insert in the table is stored in
1402    DO_NOT_RECORD_P.
1403
1404    ??? One might want to merge this with canon_hash.  Later.  */
1405
1406 static unsigned int
1407 hash_expr (x, mode, do_not_record_p, hash_table_size)
1408      rtx x;
1409      enum machine_mode mode;
1410      int *do_not_record_p;
1411      int hash_table_size;
1412 {
1413   unsigned int hash;
1414
1415   *do_not_record_p = 0;
1416
1417   hash = hash_expr_1 (x, mode, do_not_record_p);
1418   return hash % hash_table_size;
1419 }
1420
1421 /* Subroutine of hash_expr to do the actual work.  */
1422
1423 static unsigned int
1424 hash_expr_1 (x, mode, do_not_record_p)
1425      rtx x;
1426      enum machine_mode mode;
1427      int *do_not_record_p;
1428 {
1429   int i, j;
1430   unsigned hash = 0;
1431   enum rtx_code code;
1432   const char *fmt;
1433
1434   /* repeat is used to turn tail-recursion into iteration.  */
1435  repeat:
1436
1437   if (x == 0)
1438     return hash;
1439
1440   code = GET_CODE (x);
1441   switch (code)
1442     {
1443     case REG:
1444       {
1445         register int regno = REGNO (x);
1446         hash += ((unsigned) REG << 7) + regno;
1447         return hash;
1448       }
1449
1450     case CONST_INT:
1451       {
1452         unsigned HOST_WIDE_INT tem = INTVAL (x);
1453         hash += ((unsigned) CONST_INT << 7) + (unsigned) mode + tem;
1454         return hash;
1455       }
1456
1457     case CONST_DOUBLE:
1458       /* This is like the general case, except that it only counts
1459          the integers representing the constant.  */
1460       hash += (unsigned) code + (unsigned) GET_MODE (x);
1461       if (GET_MODE (x) != VOIDmode)
1462         for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1463           {
1464             unsigned tem = XWINT (x, i);
1465             hash += tem;
1466           }
1467       else
1468         hash += ((unsigned) CONST_DOUBLE_LOW (x)
1469                  + (unsigned) CONST_DOUBLE_HIGH (x));
1470       return hash;
1471
1472       /* Assume there is only one rtx object for any given label.  */
1473     case LABEL_REF:
1474       /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1475          differences and differences between each stage's debugging dumps.  */
1476       hash += ((unsigned) LABEL_REF << 7) + CODE_LABEL_NUMBER (XEXP (x, 0));
1477       return hash;
1478
1479     case SYMBOL_REF:
1480       {
1481         /* Don't hash on the symbol's address to avoid bootstrap differences.
1482            Different hash values may cause expressions to be recorded in
1483            different orders and thus different registers to be used in the
1484            final assembler.  This also avoids differences in the dump files
1485            between various stages.  */
1486         unsigned int h = 0;
1487         unsigned char *p = (unsigned char *) XSTR (x, 0);
1488         while (*p)
1489           h += (h << 7) + *p++; /* ??? revisit */
1490         hash += ((unsigned) SYMBOL_REF << 7) + h;
1491         return hash;
1492       }
1493
1494     case MEM:
1495       if (MEM_VOLATILE_P (x))
1496         {
1497           *do_not_record_p = 1;
1498           return 0;
1499         }
1500       hash += (unsigned) MEM;
1501       hash += MEM_ALIAS_SET (x);
1502       x = XEXP (x, 0);
1503       goto repeat;
1504
1505     case PRE_DEC:
1506     case PRE_INC:
1507     case POST_DEC:
1508     case POST_INC:
1509     case PC:
1510     case CC0:
1511     case CALL:
1512     case UNSPEC_VOLATILE:
1513       *do_not_record_p = 1;
1514       return 0;
1515
1516     case ASM_OPERANDS:
1517       if (MEM_VOLATILE_P (x))
1518         {
1519           *do_not_record_p = 1;
1520           return 0;
1521         }
1522
1523     default:
1524       break;
1525     }
1526
1527   i = GET_RTX_LENGTH (code) - 1;
1528   hash += (unsigned) code + (unsigned) GET_MODE (x);
1529   fmt = GET_RTX_FORMAT (code);
1530   for (; i >= 0; i--)
1531     {
1532       if (fmt[i] == 'e')
1533         {
1534           rtx tem = XEXP (x, i);
1535
1536           /* If we are about to do the last recursive call
1537              needed at this level, change it into iteration.
1538              This function is called enough to be worth it.  */
1539           if (i == 0)
1540             {
1541               x = tem;
1542               goto repeat;
1543             }
1544           hash += hash_expr_1 (tem, 0, do_not_record_p);
1545           if (*do_not_record_p)
1546             return 0;
1547         }
1548       else if (fmt[i] == 'E')
1549         for (j = 0; j < XVECLEN (x, i); j++)
1550           {
1551             hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1552             if (*do_not_record_p)
1553               return 0;
1554           }
1555       else if (fmt[i] == 's')
1556         {
1557           register unsigned char *p = (unsigned char *) XSTR (x, i);
1558           if (p)
1559             while (*p)
1560               hash += *p++;
1561         }
1562       else if (fmt[i] == 'i')
1563         {
1564           register unsigned tem = XINT (x, i);
1565           hash += tem;
1566         }
1567       else
1568         abort ();
1569     }
1570
1571   return hash;
1572 }
1573
1574 /* Hash a set of register REGNO.
1575
1576    Sets are hashed on the register that is set.
1577    This simplifies the PRE copy propagation code.
1578
1579    ??? May need to make things more elaborate.  Later, as necessary.  */
1580
1581 static unsigned int
1582 hash_set (regno, hash_table_size)
1583      int regno;
1584      int hash_table_size;
1585 {
1586   unsigned int hash;
1587
1588   hash = regno;
1589   return hash % hash_table_size;
1590 }
1591
1592 /* Return non-zero if exp1 is equivalent to exp2.
1593    ??? Borrowed from cse.c.  Might want to remerge with cse.c.  Later.  */
1594
1595 static int
1596 expr_equiv_p (x, y)
1597      rtx x, y;
1598 {
1599   register int i, j;
1600   register enum rtx_code code;
1601   register const char *fmt;
1602
1603   if (x == y)
1604     return 1;
1605   if (x == 0 || y == 0)
1606     return x == y;
1607
1608   code = GET_CODE (x);
1609   if (code != GET_CODE (y))
1610     return 0;
1611
1612   /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.  */
1613   if (GET_MODE (x) != GET_MODE (y))
1614     return 0;
1615
1616   switch (code)
1617     {
1618     case PC:
1619     case CC0:
1620       return x == y;
1621
1622     case CONST_INT:
1623       return INTVAL (x) == INTVAL (y);
1624
1625     case LABEL_REF:
1626       return XEXP (x, 0) == XEXP (y, 0);
1627
1628     case SYMBOL_REF:
1629       return XSTR (x, 0) == XSTR (y, 0);
1630
1631     case REG:
1632       return REGNO (x) == REGNO (y);
1633
1634     case MEM:
1635       /* Can't merge two expressions in different alias sets, since we can
1636          decide that the expression is transparent in a block when it isn't,
1637          due to it being set with the different alias set.  */
1638       if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1639         return 0;
1640       break;
1641
1642     /*  For commutative operations, check both orders.  */
1643     case PLUS:
1644     case MULT:
1645     case AND:
1646     case IOR:
1647     case XOR:
1648     case NE:
1649     case EQ:
1650       return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1651                && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1652               || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1653                   && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1654
1655     default:
1656       break;
1657     }
1658
1659   /* Compare the elements.  If any pair of corresponding elements
1660      fail to match, return 0 for the whole thing.  */
1661
1662   fmt = GET_RTX_FORMAT (code);
1663   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1664     {
1665       switch (fmt[i])
1666         {
1667         case 'e':
1668           if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1669             return 0;
1670           break;
1671
1672         case 'E':
1673           if (XVECLEN (x, i) != XVECLEN (y, i))
1674             return 0;
1675           for (j = 0; j < XVECLEN (x, i); j++)
1676             if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1677               return 0;
1678           break;
1679
1680         case 's':
1681           if (strcmp (XSTR (x, i), XSTR (y, i)))
1682             return 0;
1683           break;
1684
1685         case 'i':
1686           if (XINT (x, i) != XINT (y, i))
1687             return 0;
1688           break;
1689
1690         case 'w':
1691           if (XWINT (x, i) != XWINT (y, i))
1692             return 0;
1693         break;
1694
1695         case '0':
1696           break;
1697
1698         default:
1699           abort ();
1700         }
1701       }
1702
1703   return 1;
1704 }
1705
1706 /* Insert expression X in INSN in the hash table.
1707    If it is already present, record it as the last occurrence in INSN's
1708    basic block.
1709
1710    MODE is the mode of the value X is being stored into.
1711    It is only used if X is a CONST_INT.
1712
1713    ANTIC_P is non-zero if X is an anticipatable expression.
1714    AVAIL_P is non-zero if X is an available expression.  */
1715
1716 static void
1717 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1718      rtx x;
1719      enum machine_mode mode;
1720      rtx insn;
1721      int antic_p, avail_p;
1722 {
1723   int found, do_not_record_p;
1724   unsigned int hash;
1725   struct expr *cur_expr, *last_expr = NULL;
1726   struct occr *antic_occr, *avail_occr;
1727   struct occr *last_occr = NULL;
1728
1729   hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1730
1731   /* Do not insert expression in table if it contains volatile operands,
1732      or if hash_expr determines the expression is something we don't want
1733      to or can't handle.  */
1734   if (do_not_record_p)
1735     return;
1736
1737   cur_expr = expr_hash_table[hash];
1738   found = 0;
1739
1740   while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1741     {
1742       /* If the expression isn't found, save a pointer to the end of
1743          the list.  */
1744       last_expr = cur_expr;
1745       cur_expr = cur_expr->next_same_hash;
1746     }
1747
1748   if (! found)
1749     {
1750       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1751       bytes_used += sizeof (struct expr);
1752       if (expr_hash_table[hash] == NULL)
1753         {
1754           /* This is the first pattern that hashed to this index.  */
1755           expr_hash_table[hash] = cur_expr;
1756         }
1757       else
1758         {
1759           /* Add EXPR to end of this hash chain.  */
1760           last_expr->next_same_hash = cur_expr;
1761         }
1762       /* Set the fields of the expr element.  */
1763       cur_expr->expr = x;
1764       cur_expr->bitmap_index = n_exprs++;
1765       cur_expr->next_same_hash = NULL;
1766       cur_expr->antic_occr = NULL;
1767       cur_expr->avail_occr = NULL;
1768     }
1769
1770   /* Now record the occurrence(s).  */
1771
1772   if (antic_p)
1773     {
1774       antic_occr = cur_expr->antic_occr;
1775
1776       /* Search for another occurrence in the same basic block.  */
1777       while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1778         {
1779           /* If an occurrence isn't found, save a pointer to the end of
1780              the list.  */
1781           last_occr = antic_occr;
1782           antic_occr = antic_occr->next;
1783         }
1784
1785       if (antic_occr)
1786         {
1787           /* Found another instance of the expression in the same basic block.
1788              Prefer the currently recorded one.  We want the first one in the
1789              block and the block is scanned from start to end.  */
1790           ; /* nothing to do */
1791         }
1792       else
1793         {
1794           /* First occurrence of this expression in this basic block.  */
1795           antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1796           bytes_used += sizeof (struct occr);
1797           /* First occurrence of this expression in any block?  */
1798           if (cur_expr->antic_occr == NULL)
1799             cur_expr->antic_occr = antic_occr;
1800           else
1801             last_occr->next = antic_occr;
1802           antic_occr->insn = insn;
1803           antic_occr->next = NULL;
1804         }
1805     }
1806
1807   if (avail_p)
1808     {
1809       avail_occr = cur_expr->avail_occr;
1810
1811       /* Search for another occurrence in the same basic block.  */
1812       while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1813         {
1814           /* If an occurrence isn't found, save a pointer to the end of
1815              the list.  */
1816           last_occr = avail_occr;
1817           avail_occr = avail_occr->next;
1818         }
1819
1820       if (avail_occr)
1821         {
1822           /* Found another instance of the expression in the same basic block.
1823              Prefer this occurrence to the currently recorded one.  We want
1824              the last one in the block and the block is scanned from start
1825              to end.  */
1826           avail_occr->insn = insn;
1827         }
1828       else
1829         {
1830           /* First occurrence of this expression in this basic block.  */
1831           avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1832           bytes_used += sizeof (struct occr);
1833           /* First occurrence of this expression in any block?  */
1834           if (cur_expr->avail_occr == NULL)
1835             cur_expr->avail_occr = avail_occr;
1836           else
1837             last_occr->next = avail_occr;
1838           avail_occr->insn = insn;
1839           avail_occr->next = NULL;
1840         }
1841     }
1842 }
1843
1844 /* Insert pattern X in INSN in the hash table.
1845    X is a SET of a reg to either another reg or a constant.
1846    If it is already present, record it as the last occurrence in INSN's
1847    basic block.  */
1848
1849 static void
1850 insert_set_in_table (x, insn)
1851      rtx x;
1852      rtx insn;
1853 {
1854   int found;
1855   unsigned int hash;
1856   struct expr *cur_expr, *last_expr = NULL;
1857   struct occr *cur_occr, *last_occr = NULL;
1858
1859   if (GET_CODE (x) != SET
1860       || GET_CODE (SET_DEST (x)) != REG)
1861     abort ();
1862
1863   hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1864
1865   cur_expr = set_hash_table[hash];
1866   found = 0;
1867
1868   while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1869     {
1870       /* If the expression isn't found, save a pointer to the end of
1871          the list.  */
1872       last_expr = cur_expr;
1873       cur_expr = cur_expr->next_same_hash;
1874     }
1875
1876   if (! found)
1877     {
1878       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1879       bytes_used += sizeof (struct expr);
1880       if (set_hash_table[hash] == NULL)
1881         {
1882           /* This is the first pattern that hashed to this index.  */
1883           set_hash_table[hash] = cur_expr;
1884         }
1885       else
1886         {
1887           /* Add EXPR to end of this hash chain.  */
1888           last_expr->next_same_hash = cur_expr;
1889         }
1890       /* Set the fields of the expr element.
1891          We must copy X because it can be modified when copy propagation is
1892          performed on its operands.  */
1893       /* ??? Should this go in a different obstack?  */
1894       cur_expr->expr = copy_rtx (x);
1895       cur_expr->bitmap_index = n_sets++;
1896       cur_expr->next_same_hash = NULL;
1897       cur_expr->antic_occr = NULL;
1898       cur_expr->avail_occr = NULL;
1899     }
1900
1901   /* Now record the occurrence.  */
1902
1903   cur_occr = cur_expr->avail_occr;
1904
1905   /* Search for another occurrence in the same basic block.  */
1906   while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1907     {
1908       /* If an occurrence isn't found, save a pointer to the end of
1909          the list.  */
1910       last_occr = cur_occr;
1911       cur_occr = cur_occr->next;
1912     }
1913
1914   if (cur_occr)
1915     {
1916       /* Found another instance of the expression in the same basic block.
1917          Prefer this occurrence to the currently recorded one.  We want
1918          the last one in the block and the block is scanned from start
1919          to end.  */
1920       cur_occr->insn = insn;
1921     }
1922   else
1923     {
1924       /* First occurrence of this expression in this basic block.  */
1925       cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1926       bytes_used += sizeof (struct occr);
1927       /* First occurrence of this expression in any block?  */
1928       if (cur_expr->avail_occr == NULL)
1929         cur_expr->avail_occr = cur_occr;
1930       else
1931         last_occr->next = cur_occr;
1932       cur_occr->insn = insn;
1933       cur_occr->next = NULL;
1934     }
1935 }
1936
1937 /* Scan pattern PAT of INSN and add an entry to the hash table.
1938    If SET_P is non-zero, this is for the assignment hash table,
1939    otherwise it is for the expression hash table.  */
1940
1941 static void
1942 hash_scan_set (pat, insn, set_p)
1943      rtx pat, insn;
1944      int set_p;
1945 {
1946   rtx src = SET_SRC (pat);
1947   rtx dest = SET_DEST (pat);
1948
1949   if (GET_CODE (src) == CALL)
1950     hash_scan_call (src, insn);
1951
1952   if (GET_CODE (dest) == REG)
1953     {
1954       int regno = REGNO (dest);
1955       rtx tmp;
1956
1957       /* Only record sets of pseudo-regs in the hash table.  */
1958       if (! set_p
1959           && regno >= FIRST_PSEUDO_REGISTER
1960           /* Don't GCSE something if we can't do a reg/reg copy.  */
1961           && can_copy_p [GET_MODE (dest)]
1962           /* Is SET_SRC something we want to gcse?  */
1963           && want_to_gcse_p (src))
1964         {
1965           /* An expression is not anticipatable if its operands are
1966              modified before this insn.  */
1967           int antic_p = oprs_anticipatable_p (src, insn);
1968           /* An expression is not available if its operands are
1969              subsequently modified, including this insn.  */
1970           int avail_p = oprs_available_p (src, insn);
1971           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1972         }
1973       /* Record sets for constant/copy propagation.  */
1974       else if (set_p
1975                && regno >= FIRST_PSEUDO_REGISTER
1976                && ((GET_CODE (src) == REG
1977                     && REGNO (src) >= FIRST_PSEUDO_REGISTER
1978                     && can_copy_p [GET_MODE (dest)])
1979                    || GET_CODE (src) == CONST_INT
1980                    || GET_CODE (src) == SYMBOL_REF
1981                    || GET_CODE (src) == CONST_DOUBLE)
1982                /* A copy is not available if its src or dest is subsequently
1983                   modified.  Here we want to search from INSN+1 on, but
1984                   oprs_available_p searches from INSN on.  */
1985                && (insn == BLOCK_END (BLOCK_NUM (insn))
1986                    || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1987                        && oprs_available_p (pat, tmp))))
1988         insert_set_in_table (pat, insn);
1989     }
1990 }
1991
1992 static void
1993 hash_scan_clobber (x, insn)
1994      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1995 {
1996   /* Currently nothing to do.  */
1997 }
1998
1999 static void
2000 hash_scan_call (x, insn)
2001      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
2002 {
2003   /* Currently nothing to do.  */
2004 }
2005
2006 /* Process INSN and add hash table entries as appropriate.
2007
2008    Only available expressions that set a single pseudo-reg are recorded.
2009
2010    Single sets in a PARALLEL could be handled, but it's an extra complication
2011    that isn't dealt with right now.  The trick is handling the CLOBBERs that
2012    are also in the PARALLEL.  Later.
2013
2014    If SET_P is non-zero, this is for the assignment hash table,
2015    otherwise it is for the expression hash table.
2016    If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
2017    not record any expressions.  */
2018
2019 static void
2020 hash_scan_insn (insn, set_p, in_libcall_block)
2021      rtx insn;
2022      int set_p;
2023      int in_libcall_block;
2024 {
2025   rtx pat = PATTERN (insn);
2026
2027   /* Pick out the sets of INSN and for other forms of instructions record
2028      what's been modified.  */
2029
2030   if (GET_CODE (pat) == SET && ! in_libcall_block)
2031     {
2032       /* Ignore obvious no-ops.  */
2033       if (SET_SRC (pat) != SET_DEST (pat))
2034         hash_scan_set (pat, insn, set_p);
2035     }
2036   else if (GET_CODE (pat) == PARALLEL)
2037     {
2038       int i;
2039
2040       for (i = 0; i < XVECLEN (pat, 0); i++)
2041         {
2042           rtx x = XVECEXP (pat, 0, i);
2043
2044           if (GET_CODE (x) == SET)
2045             {
2046               if (GET_CODE (SET_SRC (x)) == CALL)
2047                 hash_scan_call (SET_SRC (x), insn);
2048             }
2049           else if (GET_CODE (x) == CLOBBER)
2050             hash_scan_clobber (x, insn);
2051           else if (GET_CODE (x) == CALL)
2052             hash_scan_call (x, insn);
2053         }
2054     }
2055   else if (GET_CODE (pat) == CLOBBER)
2056     hash_scan_clobber (pat, insn);
2057   else if (GET_CODE (pat) == CALL)
2058     hash_scan_call (pat, insn);
2059 }
2060
2061 static void
2062 dump_hash_table (file, name, table, table_size, total_size)
2063      FILE *file;
2064      const char *name;
2065      struct expr **table;
2066      int table_size, total_size;
2067 {
2068   int i;
2069   /* Flattened out table, so it's printed in proper order.  */
2070   struct expr **flat_table = (struct expr **) alloca (total_size * sizeof (struct expr *));
2071   unsigned int *hash_val = (unsigned int *) alloca (total_size * sizeof (unsigned int));
2072
2073   bzero ((char *) flat_table, total_size * sizeof (struct expr *));
2074   for (i = 0; i < table_size; i++)
2075     {
2076       struct expr *expr;
2077
2078       for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
2079         {
2080           flat_table[expr->bitmap_index] = expr;
2081           hash_val[expr->bitmap_index] = i;
2082         }
2083     }
2084
2085   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2086            name, table_size, total_size);
2087
2088   for (i = 0; i < total_size; i++)
2089     {
2090       struct expr *expr = flat_table[i];
2091
2092       fprintf (file, "Index %d (hash value %d)\n  ",
2093                expr->bitmap_index, hash_val[i]);
2094       print_rtl (file, expr->expr);
2095       fprintf (file, "\n");
2096     }
2097
2098   fprintf (file, "\n");
2099 }
2100
2101 /* Record register first/last/block set information for REGNO in INSN.
2102    reg_first_set records the first place in the block where the register
2103    is set and is used to compute "anticipatability".
2104    reg_last_set records the last place in the block where the register
2105    is set and is used to compute "availability".
2106    reg_set_in_block records whether the register is set in the block
2107    and is used to compute "transparency".  */
2108
2109 static void
2110 record_last_reg_set_info (insn, regno)
2111      rtx insn;
2112      int regno;
2113 {
2114   if (reg_first_set[regno] == NEVER_SET)
2115     reg_first_set[regno] = INSN_CUID (insn);
2116   reg_last_set[regno] = INSN_CUID (insn);
2117   SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2118 }
2119
2120 /* Record memory first/last/block set information for INSN.  */
2121
2122 static void
2123 record_last_mem_set_info (insn)
2124      rtx insn;
2125 {
2126   if (mem_first_set == NEVER_SET)
2127     mem_first_set = INSN_CUID (insn);
2128   mem_last_set = INSN_CUID (insn);
2129   mem_set_in_block[BLOCK_NUM (insn)] = 1;
2130 }
2131
2132 /* Called from compute_hash_table via note_stores to handle one
2133    SET or CLOBBER in an insn.  DATA is really the instruction in which
2134    the SET is taking place.  */
2135
2136 static void
2137 record_last_set_info (dest, setter, data)
2138      rtx dest, setter ATTRIBUTE_UNUSED;
2139      void *data;
2140 {
2141   rtx last_set_insn = (rtx) data;
2142
2143   if (GET_CODE (dest) == SUBREG)
2144     dest = SUBREG_REG (dest);
2145
2146   if (GET_CODE (dest) == REG)
2147     record_last_reg_set_info (last_set_insn, REGNO (dest));
2148   else if (GET_CODE (dest) == MEM
2149            /* Ignore pushes, they clobber nothing.  */
2150            && ! push_operand (dest, GET_MODE (dest)))
2151     record_last_mem_set_info (last_set_insn);
2152 }
2153
2154 /* Top level function to create an expression or assignment hash table.
2155
2156    Expression entries are placed in the hash table if
2157    - they are of the form (set (pseudo-reg) src),
2158    - src is something we want to perform GCSE on,
2159    - none of the operands are subsequently modified in the block
2160
2161    Assignment entries are placed in the hash table if
2162    - they are of the form (set (pseudo-reg) src),
2163    - src is something we want to perform const/copy propagation on,
2164    - none of the operands or target are subsequently modified in the block
2165    Currently src must be a pseudo-reg or a const_int.
2166
2167    F is the first insn.
2168    SET_P is non-zero for computing the assignment hash table.  */
2169
2170 static void
2171 compute_hash_table (set_p)
2172      int set_p;
2173 {
2174   int bb;
2175
2176   /* While we compute the hash table we also compute a bit array of which
2177      registers are set in which blocks.
2178      We also compute which blocks set memory, in the absence of aliasing
2179      support [which is TODO].
2180      ??? This isn't needed during const/copy propagation, but it's cheap to
2181      compute.  Later.  */
2182   sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2183   bzero ((char *) mem_set_in_block, n_basic_blocks);
2184
2185   /* Some working arrays used to track first and last set in each block.  */
2186   /* ??? One could use alloca here, but at some size a threshold is crossed
2187      beyond which one should use malloc.  Are we at that threshold here?  */
2188   reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2189   reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2190
2191   for (bb = 0; bb < n_basic_blocks; bb++)
2192     {
2193       rtx insn;
2194       int regno;
2195       int in_libcall_block;
2196       int i;
2197
2198       /* First pass over the instructions records information used to
2199          determine when registers and memory are first and last set.
2200          ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2201          could be moved to compute_sets since they currently don't change.  */
2202
2203       for (i = 0; i < max_gcse_regno; i++)
2204         reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2205       mem_first_set = NEVER_SET;
2206       mem_last_set = NEVER_SET;
2207
2208       for (insn = BLOCK_HEAD (bb);
2209            insn && insn != NEXT_INSN (BLOCK_END (bb));
2210            insn = NEXT_INSN (insn))
2211         {
2212 #ifdef NON_SAVING_SETJMP
2213           if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2214               && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2215             {
2216               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2217                 record_last_reg_set_info (insn, regno);
2218               continue;
2219             }
2220 #endif
2221
2222           if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
2223             continue;
2224
2225           if (GET_CODE (insn) == CALL_INSN)
2226             {
2227               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2228                 if ((call_used_regs[regno]
2229                      && regno != STACK_POINTER_REGNUM
2230 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2231                      && regno != HARD_FRAME_POINTER_REGNUM
2232 #endif
2233 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2234                      && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2235 #endif
2236 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2237                      && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2238 #endif
2239
2240                      && regno != FRAME_POINTER_REGNUM)
2241                     || global_regs[regno])
2242                   record_last_reg_set_info (insn, regno);
2243               if (! CONST_CALL_P (insn))
2244                 record_last_mem_set_info (insn);
2245             }
2246
2247           note_stores (PATTERN (insn), record_last_set_info, insn);
2248         }
2249
2250       /* The next pass builds the hash table.  */
2251
2252       for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2253            insn && insn != NEXT_INSN (BLOCK_END (bb));
2254            insn = NEXT_INSN (insn))
2255         {
2256           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
2257             {
2258               if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2259                 in_libcall_block = 1;
2260               else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2261                 in_libcall_block = 0;
2262               hash_scan_insn (insn, set_p, in_libcall_block);
2263             }
2264         }
2265     }
2266
2267   free (reg_first_set);
2268   free (reg_last_set);
2269   /* Catch bugs early.  */
2270   reg_first_set = reg_last_set = 0;
2271 }
2272
2273 /* Allocate space for the set hash table.
2274    N_INSNS is the number of instructions in the function.
2275    It is used to determine the number of buckets to use.  */
2276
2277 static void
2278 alloc_set_hash_table (n_insns)
2279      int n_insns;
2280 {
2281   int n;
2282
2283   set_hash_table_size = n_insns / 4;
2284   if (set_hash_table_size < 11)
2285     set_hash_table_size = 11;
2286   /* Attempt to maintain efficient use of hash table.
2287      Making it an odd number is simplest for now.
2288      ??? Later take some measurements.  */
2289   set_hash_table_size |= 1;
2290   n = set_hash_table_size * sizeof (struct expr *);
2291   set_hash_table = (struct expr **) gmalloc (n);
2292 }
2293
2294 /* Free things allocated by alloc_set_hash_table.  */
2295
2296 static void
2297 free_set_hash_table ()
2298 {
2299   free (set_hash_table);
2300 }
2301
2302 /* Compute the hash table for doing copy/const propagation.  */
2303
2304 static void
2305 compute_set_hash_table ()
2306 {
2307   /* Initialize count of number of entries in hash table.  */
2308   n_sets = 0;
2309   bzero ((char *) set_hash_table, set_hash_table_size * sizeof (struct expr *));
2310
2311   compute_hash_table (1);
2312 }
2313
2314 /* Allocate space for the expression hash table.
2315    N_INSNS is the number of instructions in the function.
2316    It is used to determine the number of buckets to use.  */
2317
2318 static void
2319 alloc_expr_hash_table (n_insns)
2320      int n_insns;
2321 {
2322   int n;
2323
2324   expr_hash_table_size = n_insns / 2;
2325   /* Make sure the amount is usable.  */
2326   if (expr_hash_table_size < 11)
2327     expr_hash_table_size = 11;
2328   /* Attempt to maintain efficient use of hash table.
2329      Making it an odd number is simplest for now.
2330      ??? Later take some measurements.  */
2331   expr_hash_table_size |= 1;
2332   n = expr_hash_table_size * sizeof (struct expr *);
2333   expr_hash_table = (struct expr **) gmalloc (n);
2334 }
2335
2336 /* Free things allocated by alloc_expr_hash_table.  */
2337
2338 static void
2339 free_expr_hash_table ()
2340 {
2341   free (expr_hash_table);
2342 }
2343
2344 /* Compute the hash table for doing GCSE.  */
2345
2346 static void
2347 compute_expr_hash_table ()
2348 {
2349   /* Initialize count of number of entries in hash table.  */
2350   n_exprs = 0;
2351   bzero ((char *) expr_hash_table, expr_hash_table_size * sizeof (struct expr *));
2352
2353   compute_hash_table (0);
2354 }
2355 \f
2356 /* Expression tracking support.  */
2357
2358 /* Lookup pattern PAT in the expression table.
2359    The result is a pointer to the table entry, or NULL if not found.  */
2360
2361 static struct expr *
2362 lookup_expr (pat)
2363      rtx pat;
2364 {
2365   int do_not_record_p;
2366   unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2367                                  expr_hash_table_size);
2368   struct expr *expr;
2369
2370   if (do_not_record_p)
2371     return NULL;
2372
2373   expr = expr_hash_table[hash];
2374
2375   while (expr && ! expr_equiv_p (expr->expr, pat))
2376     expr = expr->next_same_hash;
2377
2378   return expr;
2379 }
2380
2381 /* Lookup REGNO in the set table.
2382    If PAT is non-NULL look for the entry that matches it, otherwise return
2383    the first entry for REGNO.
2384    The result is a pointer to the table entry, or NULL if not found.  */
2385
2386 static struct expr *
2387 lookup_set (regno, pat)
2388      int regno;
2389      rtx pat;
2390 {
2391   unsigned int hash = hash_set (regno, set_hash_table_size);
2392   struct expr *expr;
2393
2394   expr = set_hash_table[hash];
2395
2396   if (pat)
2397     {
2398       while (expr && ! expr_equiv_p (expr->expr, pat))
2399         expr = expr->next_same_hash;
2400     }
2401   else
2402     {
2403       while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2404         expr = expr->next_same_hash;
2405     }
2406
2407   return expr;
2408 }
2409
2410 /* Return the next entry for REGNO in list EXPR.  */
2411
2412 static struct expr *
2413 next_set (regno, expr)
2414      int regno;
2415      struct expr *expr;
2416 {
2417   do
2418     expr = expr->next_same_hash;
2419   while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2420   return expr;
2421 }
2422
2423 /* Reset tables used to keep track of what's still available [since the
2424    start of the block].  */
2425
2426 static void
2427 reset_opr_set_tables ()
2428 {
2429   /* Maintain a bitmap of which regs have been set since beginning of
2430      the block.  */
2431   sbitmap_zero (reg_set_bitmap);
2432   /* Also keep a record of the last instruction to modify memory.
2433      For now this is very trivial, we only record whether any memory
2434      location has been modified.  */
2435   mem_last_set = 0;
2436 }
2437
2438 /* Return non-zero if the operands of X are not set before INSN in
2439    INSN's basic block.  */
2440
2441 static int
2442 oprs_not_set_p (x, insn)
2443      rtx x, insn;
2444 {
2445   int i;
2446   enum rtx_code code;
2447   const char *fmt;
2448
2449   /* repeat is used to turn tail-recursion into iteration.  */
2450 repeat:
2451
2452   if (x == 0)
2453     return 1;
2454
2455   code = GET_CODE (x);
2456   switch (code)
2457     {
2458     case PC:
2459     case CC0:
2460     case CONST:
2461     case CONST_INT:
2462     case CONST_DOUBLE:
2463     case SYMBOL_REF:
2464     case LABEL_REF:
2465     case ADDR_VEC:
2466     case ADDR_DIFF_VEC:
2467       return 1;
2468
2469     case MEM:
2470       if (mem_last_set != 0)
2471         return 0;
2472       x = XEXP (x, 0);
2473       goto repeat;
2474
2475     case REG:
2476       return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2477
2478     default:
2479       break;
2480     }
2481
2482   fmt = GET_RTX_FORMAT (code);
2483   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2484     {
2485       if (fmt[i] == 'e')
2486         {
2487           int not_set_p;
2488           /* If we are about to do the last recursive call
2489              needed at this level, change it into iteration.
2490              This function is called enough to be worth it.  */
2491           if (i == 0)
2492             {
2493               x = XEXP (x, 0);
2494               goto repeat;
2495             }
2496           not_set_p = oprs_not_set_p (XEXP (x, i), insn);
2497           if (! not_set_p)
2498             return 0;
2499         }
2500       else if (fmt[i] == 'E')
2501         {
2502           int j;
2503           for (j = 0; j < XVECLEN (x, i); j++)
2504             {
2505               int not_set_p = oprs_not_set_p (XVECEXP (x, i, j), insn);
2506               if (! not_set_p)
2507                 return 0;
2508             }
2509         }
2510     }
2511
2512   return 1;
2513 }
2514
2515 /* Mark things set by a CALL.  */
2516
2517 static void
2518 mark_call (insn)
2519      rtx insn;
2520 {
2521   mem_last_set = INSN_CUID (insn);
2522 }
2523
2524 /* Mark things set by a SET.  */
2525
2526 static void
2527 mark_set (pat, insn)
2528      rtx pat, insn;
2529 {
2530   rtx dest = SET_DEST (pat);
2531
2532   while (GET_CODE (dest) == SUBREG
2533          || GET_CODE (dest) == ZERO_EXTRACT
2534          || GET_CODE (dest) == SIGN_EXTRACT
2535          || GET_CODE (dest) == STRICT_LOW_PART)
2536     dest = XEXP (dest, 0);
2537
2538   if (GET_CODE (dest) == REG)
2539     SET_BIT (reg_set_bitmap, REGNO (dest));
2540   else if (GET_CODE (dest) == MEM)
2541     mem_last_set = INSN_CUID (insn);
2542
2543   if (GET_CODE (SET_SRC (pat)) == CALL)
2544     mark_call (insn);
2545 }
2546
2547 /* Record things set by a CLOBBER.  */
2548
2549 static void
2550 mark_clobber (pat, insn)
2551      rtx pat, insn;
2552 {
2553   rtx clob = XEXP (pat, 0);
2554
2555   while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2556     clob = XEXP (clob, 0);
2557
2558   if (GET_CODE (clob) == REG)
2559     SET_BIT (reg_set_bitmap, REGNO (clob));
2560   else
2561     mem_last_set = INSN_CUID (insn);
2562 }
2563
2564 /* Record things set by INSN.
2565    This data is used by oprs_not_set_p.  */
2566
2567 static void
2568 mark_oprs_set (insn)
2569      rtx insn;
2570 {
2571   rtx pat = PATTERN (insn);
2572
2573   if (GET_CODE (pat) == SET)
2574     mark_set (pat, insn);
2575   else if (GET_CODE (pat) == PARALLEL)
2576     {
2577       int i;
2578
2579       for (i = 0; i < XVECLEN (pat, 0); i++)
2580         {
2581           rtx x = XVECEXP (pat, 0, i);
2582
2583           if (GET_CODE (x) == SET)
2584             mark_set (x, insn);
2585           else if (GET_CODE (x) == CLOBBER)
2586             mark_clobber (x, insn);
2587           else if (GET_CODE (x) == CALL)
2588             mark_call (insn);
2589         }
2590     }
2591   else if (GET_CODE (pat) == CLOBBER)
2592     mark_clobber (pat, insn);
2593   else if (GET_CODE (pat) == CALL)
2594     mark_call (insn);
2595 }
2596
2597 \f
2598 /* Classic GCSE reaching definition support.  */
2599
2600 /* Allocate reaching def variables.  */
2601
2602 static void
2603 alloc_rd_mem (n_blocks, n_insns)
2604      int n_blocks, n_insns;
2605 {
2606   rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2607   sbitmap_vector_zero (rd_kill, n_basic_blocks);
2608
2609   rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2610   sbitmap_vector_zero (rd_gen, n_basic_blocks);
2611
2612   reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2613   sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2614
2615   rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2616   sbitmap_vector_zero (rd_out, n_basic_blocks);
2617 }
2618
2619 /* Free reaching def variables.  */
2620
2621 static void
2622 free_rd_mem ()
2623 {
2624   free (rd_kill);
2625   free (rd_gen);
2626   free (reaching_defs);
2627   free (rd_out);
2628 }
2629
2630 /* Add INSN to the kills of BB.
2631    REGNO, set in BB, is killed by INSN.  */
2632
2633 static void
2634 handle_rd_kill_set (insn, regno, bb)
2635      rtx insn;
2636      int regno, bb;
2637 {
2638   struct reg_set *this_reg = reg_set_table[regno];
2639
2640   while (this_reg)
2641     {
2642       if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2643         SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2644       this_reg = this_reg->next;
2645     }
2646 }
2647
2648 /* Compute the set of kill's for reaching definitions.  */
2649
2650 static void
2651 compute_kill_rd ()
2652 {
2653   int bb,cuid;
2654
2655   /* For each block
2656        For each set bit in `gen' of the block (i.e each insn which
2657            generates a definition in the block)
2658          Call the reg set by the insn corresponding to that bit regx
2659          Look at the linked list starting at reg_set_table[regx]
2660          For each setting of regx in the linked list, which is not in
2661              this block
2662            Set the bit in `kill' corresponding to that insn
2663     */
2664
2665   for (bb = 0; bb < n_basic_blocks; bb++)
2666     {
2667       for (cuid = 0; cuid < max_cuid; cuid++)
2668         {
2669           if (TEST_BIT (rd_gen[bb], cuid))
2670             {
2671               rtx insn = CUID_INSN (cuid);
2672               rtx pat = PATTERN (insn);
2673
2674               if (GET_CODE (insn) == CALL_INSN)
2675                 {
2676                   int regno;
2677
2678                   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2679                     {
2680                       if ((call_used_regs[regno]
2681                            && regno != STACK_POINTER_REGNUM
2682 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2683                            && regno != HARD_FRAME_POINTER_REGNUM
2684 #endif
2685 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2686                            && ! (regno == ARG_POINTER_REGNUM
2687                                  && fixed_regs[regno])
2688 #endif
2689 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2690                            && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2691 #endif
2692                            && regno != FRAME_POINTER_REGNUM)
2693                           || global_regs[regno])
2694                         handle_rd_kill_set (insn, regno, bb);
2695                     }
2696                 }
2697
2698               if (GET_CODE (pat) == PARALLEL)
2699                 {
2700                   int i;
2701
2702                   /* We work backwards because ... */
2703                   for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2704                     {
2705                       enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2706                       if ((code == SET || code == CLOBBER)
2707                           && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2708                         handle_rd_kill_set (insn,
2709                                             REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2710                                             bb);
2711                     }
2712                 }
2713               else if (GET_CODE (pat) == SET)
2714                 {
2715                   if (GET_CODE (SET_DEST (pat)) == REG)
2716                     {
2717                       /* Each setting of this register outside of this block
2718                          must be marked in the set of kills in this block.  */
2719                       handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2720                     }
2721                 }
2722               /* FIXME: CLOBBER? */
2723             }
2724         }
2725     }
2726 }
2727
2728 /* Compute the reaching definitions as in
2729    Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2730    Chapter 10.  It is the same algorithm as used for computing available
2731    expressions but applied to the gens and kills of reaching definitions.  */
2732
2733 static void
2734 compute_rd ()
2735 {
2736   int bb, changed, passes;
2737
2738   for (bb = 0; bb < n_basic_blocks; bb++)
2739     sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2740
2741   passes = 0;
2742   changed = 1;
2743   while (changed)
2744     {
2745       changed = 0;
2746       for (bb = 0; bb < n_basic_blocks; bb++)
2747         {
2748           sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
2749           changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2750                                             reaching_defs[bb], rd_kill[bb]);
2751         }
2752       passes++;
2753     }
2754
2755   if (gcse_file)
2756     fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2757 }
2758 \f
2759 /* Classic GCSE available expression support.  */
2760
2761 /* Allocate memory for available expression computation.  */
2762
2763 static void
2764 alloc_avail_expr_mem (n_blocks, n_exprs)
2765      int n_blocks, n_exprs;
2766 {
2767   ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2768   sbitmap_vector_zero (ae_kill, n_basic_blocks);
2769
2770   ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2771   sbitmap_vector_zero (ae_gen, n_basic_blocks);
2772
2773   ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2774   sbitmap_vector_zero (ae_in, n_basic_blocks);
2775
2776   ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2777   sbitmap_vector_zero (ae_out, n_basic_blocks);
2778
2779   u_bitmap = (sbitmap) sbitmap_alloc (n_exprs);
2780   sbitmap_ones (u_bitmap);
2781 }
2782
2783 static void
2784 free_avail_expr_mem ()
2785 {
2786   free (ae_kill);
2787   free (ae_gen);
2788   free (ae_in);
2789   free (ae_out);
2790   free (u_bitmap);
2791 }
2792
2793 /* Compute the set of available expressions generated in each basic block.  */
2794
2795 static void
2796 compute_ae_gen ()
2797 {
2798   int i;
2799
2800   /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2801      This is all we have to do because an expression is not recorded if it
2802      is not available, and the only expressions we want to work with are the
2803      ones that are recorded.  */
2804
2805   for (i = 0; i < expr_hash_table_size; i++)
2806     {
2807       struct expr *expr = expr_hash_table[i];
2808       while (expr != NULL)
2809         {
2810           struct occr *occr = expr->avail_occr;
2811           while (occr != NULL)
2812             {
2813               SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2814               occr = occr->next;
2815             }
2816           expr = expr->next_same_hash;
2817         }
2818     }
2819 }
2820
2821 /* Return non-zero if expression X is killed in BB.  */
2822
2823 static int
2824 expr_killed_p (x, bb)
2825      rtx x;
2826      int bb;
2827 {
2828   int i;
2829   enum rtx_code code;
2830   const char *fmt;
2831
2832   /* repeat is used to turn tail-recursion into iteration.  */
2833  repeat:
2834
2835   if (x == 0)
2836     return 1;
2837
2838   code = GET_CODE (x);
2839   switch (code)
2840     {
2841     case REG:
2842       return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2843
2844     case MEM:
2845       if (mem_set_in_block[bb])
2846         return 1;
2847       x = XEXP (x, 0);
2848       goto repeat;
2849
2850     case PC:
2851     case CC0: /*FIXME*/
2852     case CONST:
2853     case CONST_INT:
2854     case CONST_DOUBLE:
2855     case SYMBOL_REF:
2856     case LABEL_REF:
2857     case ADDR_VEC:
2858     case ADDR_DIFF_VEC:
2859       return 0;
2860
2861     default:
2862       break;
2863     }
2864
2865   i = GET_RTX_LENGTH (code) - 1;
2866   fmt = GET_RTX_FORMAT (code);
2867   for (; i >= 0; i--)
2868     {
2869       if (fmt[i] == 'e')
2870         {
2871           rtx tem = XEXP (x, i);
2872
2873           /* If we are about to do the last recursive call
2874              needed at this level, change it into iteration.
2875              This function is called enough to be worth it.  */
2876           if (i == 0)
2877             {
2878               x = tem;
2879               goto repeat;
2880             }
2881           if (expr_killed_p (tem, bb))
2882             return 1;
2883         }
2884       else if (fmt[i] == 'E')
2885         {
2886           int j;
2887           for (j = 0; j < XVECLEN (x, i); j++)
2888             {
2889               if (expr_killed_p (XVECEXP (x, i, j), bb))
2890                 return 1;
2891             }
2892         }
2893     }
2894
2895   return 0;
2896 }
2897
2898 /* Compute the set of available expressions killed in each basic block.  */
2899
2900 static void
2901 compute_ae_kill (ae_gen, ae_kill)
2902      sbitmap *ae_gen, *ae_kill;
2903 {
2904   int bb,i;
2905
2906   for (bb = 0; bb < n_basic_blocks; bb++)
2907     {
2908       for (i = 0; i < expr_hash_table_size; i++)
2909         {
2910           struct expr *expr = expr_hash_table[i];
2911
2912           for ( ; expr != NULL; expr = expr->next_same_hash)
2913             {
2914               /* Skip EXPR if generated in this block.  */
2915               if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2916                 continue;
2917
2918               if (expr_killed_p (expr->expr, bb))
2919                 SET_BIT (ae_kill[bb], expr->bitmap_index);
2920             }
2921         }
2922     }
2923 }
2924 \f
2925 /* Actually perform the Classic GCSE optimizations.  */
2926
2927 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2928
2929    CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2930    as a positive reach.  We want to do this when there are two computations
2931    of the expression in the block.
2932
2933    VISITED is a pointer to a working buffer for tracking which BB's have
2934    been visited.  It is NULL for the top-level call.
2935
2936    We treat reaching expressions that go through blocks containing the same
2937    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2938    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2939    2 as not reaching.  The intent is to improve the probability of finding
2940    only one reaching expression and to reduce register lifetimes by picking
2941    the closest such expression.  */
2942
2943 static int
2944 expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
2945      struct occr *occr;
2946      struct expr *expr;
2947      int bb;
2948      int check_self_loop;
2949      char *visited;
2950 {
2951   edge pred;
2952
2953   for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
2954     {
2955       int pred_bb = pred->src->index;
2956
2957       if (visited[pred_bb])
2958         {
2959           /* This predecessor has already been visited.
2960              Nothing to do.  */
2961           ;
2962         }
2963       else if (pred_bb == bb)
2964         {
2965           /* BB loops on itself.  */
2966           if (check_self_loop
2967               && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2968               && BLOCK_NUM (occr->insn) == pred_bb)
2969             return 1;
2970           visited[pred_bb] = 1;
2971         }
2972       /* Ignore this predecessor if it kills the expression.  */
2973       else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2974         visited[pred_bb] = 1;
2975       /* Does this predecessor generate this expression?  */
2976       else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2977         {
2978           /* Is this the occurrence we're looking for?
2979              Note that there's only one generating occurrence per block
2980              so we just need to check the block number.  */
2981           if (BLOCK_NUM (occr->insn) == pred_bb)
2982             return 1;
2983           visited[pred_bb] = 1;
2984         }
2985       /* Neither gen nor kill.  */
2986       else
2987         {
2988           visited[pred_bb] = 1;
2989           if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
2990               visited))
2991             return 1;
2992         }
2993     }
2994
2995   /* All paths have been checked.  */
2996   return 0;
2997 }
2998
2999 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
3000    memory allocated for that function is returned. */
3001
3002 static int
3003 expr_reaches_here_p (occr, expr, bb, check_self_loop)
3004      struct occr *occr;
3005      struct expr *expr;
3006      int bb;
3007      int check_self_loop;
3008 {
3009   int rval;
3010   char * visited = (char *) xcalloc (n_basic_blocks, 1);
3011
3012   rval = expr_reaches_here_p_work(occr, expr, bb, check_self_loop, visited);
3013
3014   free (visited);
3015
3016   return (rval);
3017 }
3018
3019 /* Return the instruction that computes EXPR that reaches INSN's basic block.
3020    If there is more than one such instruction, return NULL.
3021
3022    Called only by handle_avail_expr.  */
3023
3024 static rtx
3025 computing_insn (expr, insn)
3026      struct expr *expr;
3027      rtx insn;
3028 {
3029   int bb = BLOCK_NUM (insn);
3030
3031   if (expr->avail_occr->next == NULL)
3032     {
3033       if (BLOCK_NUM (expr->avail_occr->insn) == bb)
3034         {
3035           /* The available expression is actually itself
3036              (i.e. a loop in the flow graph) so do nothing.  */
3037           return NULL;
3038         }
3039       /* (FIXME) Case that we found a pattern that was created by
3040          a substitution that took place.  */
3041       return expr->avail_occr->insn;
3042     }
3043   else
3044     {
3045       /* Pattern is computed more than once.
3046          Search backwards from this insn to see how many of these
3047          computations actually reach this insn.  */
3048       struct occr *occr;
3049       rtx insn_computes_expr = NULL;
3050       int can_reach = 0;
3051
3052       for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
3053         {
3054           if (BLOCK_NUM (occr->insn) == bb)
3055             {
3056               /* The expression is generated in this block.
3057                  The only time we care about this is when the expression
3058                  is generated later in the block [and thus there's a loop].
3059                  We let the normal cse pass handle the other cases.  */
3060               if (INSN_CUID (insn) < INSN_CUID (occr->insn))
3061                 {
3062                   if (expr_reaches_here_p (occr, expr, bb, 1))
3063                     {
3064                       can_reach++;
3065                       if (can_reach > 1)
3066                         return NULL;
3067                       insn_computes_expr = occr->insn;
3068                     }
3069                 }
3070             }
3071           else /* Computation of the pattern outside this block.  */
3072             {
3073               if (expr_reaches_here_p (occr, expr, bb, 0))
3074                 {
3075                   can_reach++;
3076                   if (can_reach > 1)
3077                     return NULL;
3078                   insn_computes_expr = occr->insn;
3079                 }
3080             }
3081         }
3082
3083       if (insn_computes_expr == NULL)
3084         abort ();
3085       return insn_computes_expr;
3086     }
3087 }
3088
3089 /* Return non-zero if the definition in DEF_INSN can reach INSN.
3090    Only called by can_disregard_other_sets.  */
3091
3092 static int
3093 def_reaches_here_p (insn, def_insn)
3094      rtx insn, def_insn;
3095 {
3096   rtx reg;
3097
3098   if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
3099     return 1;
3100
3101   if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
3102     {
3103       if (INSN_CUID (def_insn) < INSN_CUID (insn))
3104         {
3105           if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
3106             return 1;
3107           if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
3108             reg = XEXP (PATTERN (def_insn), 0);
3109           else if (GET_CODE (PATTERN (def_insn)) == SET)
3110             reg = SET_DEST (PATTERN (def_insn));
3111           else
3112             abort ();
3113           return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
3114         }
3115       else
3116         return 0;
3117     }
3118
3119   return 0;
3120 }
3121
3122 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN.
3123    The value returned is the number of definitions that reach INSN.
3124    Returning a value of zero means that [maybe] more than one definition
3125    reaches INSN and the caller can't perform whatever optimization it is
3126    trying.  i.e. it is always safe to return zero.  */
3127
3128 static int
3129 can_disregard_other_sets (addr_this_reg, insn, for_combine)
3130      struct reg_set **addr_this_reg;
3131      rtx insn;
3132      int for_combine;
3133 {
3134   int number_of_reaching_defs = 0;
3135   struct reg_set *this_reg = *addr_this_reg;
3136
3137   while (this_reg)
3138     {
3139       if (def_reaches_here_p (insn, this_reg->insn))
3140         {
3141           number_of_reaching_defs++;
3142           /* Ignore parallels for now.  */
3143           if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3144             return 0;
3145           if (!for_combine
3146               && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3147                   || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3148                                     SET_SRC (PATTERN (insn)))))
3149             {
3150               /* A setting of the reg to a different value reaches INSN.  */
3151               return 0;
3152             }
3153           if (number_of_reaching_defs > 1)
3154             {
3155               /* If in this setting the value the register is being
3156                  set to is equal to the previous value the register
3157                  was set to and this setting reaches the insn we are
3158                  trying to do the substitution on then we are ok.  */
3159
3160               if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3161                 return 0;
3162               if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3163                                  SET_SRC (PATTERN (insn))))
3164                 return 0;
3165             }
3166           *addr_this_reg = this_reg;
3167         }
3168
3169       /* prev_this_reg = this_reg; */
3170       this_reg = this_reg->next;
3171     }
3172
3173   return number_of_reaching_defs;
3174 }
3175
3176 /* Expression computed by insn is available and the substitution is legal,
3177    so try to perform the substitution.
3178
3179    The result is non-zero if any changes were made.  */
3180
3181 static int
3182 handle_avail_expr (insn, expr)
3183      rtx insn;
3184      struct expr *expr;
3185 {
3186   rtx pat, insn_computes_expr;
3187   rtx to;
3188   struct reg_set *this_reg;
3189   int found_setting, use_src;
3190   int changed = 0;
3191
3192   /* We only handle the case where one computation of the expression
3193      reaches this instruction.  */
3194   insn_computes_expr = computing_insn (expr, insn);
3195   if (insn_computes_expr == NULL)
3196     return 0;
3197
3198   found_setting = 0;
3199   use_src = 0;
3200
3201   /* At this point we know only one computation of EXPR outside of this
3202      block reaches this insn.  Now try to find a register that the
3203      expression is computed into.  */
3204
3205   if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3206     {
3207       /* This is the case when the available expression that reaches
3208          here has already been handled as an available expression.  */
3209       int regnum_for_replacing = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3210       /* If the register was created by GCSE we can't use `reg_set_table',
3211          however we know it's set only once.  */
3212       if (regnum_for_replacing >= max_gcse_regno
3213           /* If the register the expression is computed into is set only once,
3214              or only one set reaches this insn, we can use it.  */
3215           || (((this_reg = reg_set_table[regnum_for_replacing]),
3216                this_reg->next == NULL)
3217               || can_disregard_other_sets (&this_reg, insn, 0)))
3218        {
3219          use_src = 1;
3220          found_setting = 1;
3221        }
3222     }
3223
3224   if (!found_setting)
3225     {
3226       int regnum_for_replacing = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3227       /* This shouldn't happen.  */
3228       if (regnum_for_replacing >= max_gcse_regno)
3229         abort ();
3230       this_reg = reg_set_table[regnum_for_replacing];
3231       /* If the register the expression is computed into is set only once,
3232          or only one set reaches this insn, use it.  */
3233       if (this_reg->next == NULL
3234           || can_disregard_other_sets (&this_reg, insn, 0))
3235         found_setting = 1;
3236     }
3237
3238   if (found_setting)
3239     {
3240       pat = PATTERN (insn);
3241       if (use_src)
3242         to = SET_SRC (PATTERN (insn_computes_expr));
3243       else
3244         to = SET_DEST (PATTERN (insn_computes_expr));
3245       changed = validate_change (insn, &SET_SRC (pat), to, 0);
3246
3247       /* We should be able to ignore the return code from validate_change but
3248          to play it safe we check.  */
3249       if (changed)
3250         {
3251           gcse_subst_count++;
3252           if (gcse_file != NULL)
3253             {
3254               fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d %s insn %d\n",
3255                        INSN_UID (insn), REGNO (to),
3256                        use_src ? "from" : "set in",
3257                        INSN_UID (insn_computes_expr));
3258             }
3259
3260         }
3261     }
3262   /* The register that the expr is computed into is set more than once.  */
3263   else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3264     {
3265       /* Insert an insn after insnx that copies the reg set in insnx
3266          into a new pseudo register call this new register REGN.
3267          From insnb until end of basic block or until REGB is set
3268          replace all uses of REGB with REGN.  */
3269       rtx new_insn;
3270
3271       to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3272
3273       /* Generate the new insn.  */
3274       /* ??? If the change fails, we return 0, even though we created
3275          an insn.  I think this is ok.  */
3276       new_insn
3277         = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3278                                         SET_DEST (PATTERN (insn_computes_expr))),
3279                                   insn_computes_expr);
3280       /* Keep block number table up to date.  */
3281       set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3282       /* Keep register set table up to date.  */
3283       record_one_set (REGNO (to), new_insn);
3284
3285       gcse_create_count++;
3286       if (gcse_file != NULL)
3287         {
3288           fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d, computed in insn %d,\n",
3289                    INSN_UID (NEXT_INSN (insn_computes_expr)),
3290                    REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))),
3291                    INSN_UID (insn_computes_expr));
3292           fprintf (gcse_file, "      into newly allocated reg %d\n", REGNO (to));
3293         }
3294
3295       pat = PATTERN (insn);
3296
3297       /* Do register replacement for INSN.  */
3298       changed = validate_change (insn, &SET_SRC (pat),
3299                                  SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr))),
3300                                  0);
3301
3302       /* We should be able to ignore the return code from validate_change but
3303          to play it safe we check.  */
3304       if (changed)
3305         {
3306           gcse_subst_count++;
3307           if (gcse_file != NULL)
3308             {
3309               fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d set in insn %d\n",
3310                        INSN_UID (insn),
3311                        REGNO (SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr)))),
3312                        INSN_UID (insn_computes_expr));
3313             }
3314
3315         }
3316     }
3317
3318   return changed;
3319 }
3320
3321 /* Perform classic GCSE.
3322    This is called by one_classic_gcse_pass after all the dataflow analysis
3323    has been done.
3324
3325    The result is non-zero if a change was made.  */
3326
3327 static int
3328 classic_gcse ()
3329 {
3330   int bb, changed;
3331   rtx insn;
3332
3333   /* Note we start at block 1.  */
3334
3335   changed = 0;
3336   for (bb = 1; bb < n_basic_blocks; bb++)
3337     {
3338       /* Reset tables used to keep track of what's still valid [since the
3339          start of the block].  */
3340       reset_opr_set_tables ();
3341
3342       for (insn = BLOCK_HEAD (bb);
3343            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3344            insn = NEXT_INSN (insn))
3345         {
3346           /* Is insn of form (set (pseudo-reg) ...)?  */
3347
3348           if (GET_CODE (insn) == INSN
3349               && GET_CODE (PATTERN (insn)) == SET
3350               && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3351               && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3352             {
3353               rtx pat = PATTERN (insn);
3354               rtx src = SET_SRC (pat);
3355               struct expr *expr;
3356
3357               if (want_to_gcse_p (src)
3358                   /* Is the expression recorded?  */
3359                   && ((expr = lookup_expr (src)) != NULL)
3360                   /* Is the expression available [at the start of the
3361                      block]?  */
3362                   && TEST_BIT (ae_in[bb], expr->bitmap_index)
3363                   /* Are the operands unchanged since the start of the
3364                      block?  */
3365                   && oprs_not_set_p (src, insn))
3366                 changed |= handle_avail_expr (insn, expr);
3367             }
3368
3369           /* Keep track of everything modified by this insn.  */
3370           /* ??? Need to be careful w.r.t. mods done to INSN.  */
3371           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3372             mark_oprs_set (insn);
3373         }
3374     }
3375
3376   return changed;
3377 }
3378
3379 /* Top level routine to perform one classic GCSE pass.
3380
3381    Return non-zero if a change was made.  */
3382
3383 static int
3384 one_classic_gcse_pass (pass)
3385      int pass;
3386 {
3387   int changed = 0;
3388
3389   gcse_subst_count = 0;
3390   gcse_create_count = 0;
3391
3392   alloc_expr_hash_table (max_cuid);
3393   alloc_rd_mem (n_basic_blocks, max_cuid);
3394   compute_expr_hash_table ();
3395   if (gcse_file)
3396     dump_hash_table (gcse_file, "Expression", expr_hash_table,
3397                      expr_hash_table_size, n_exprs);
3398   if (n_exprs > 0)
3399     {
3400       int passes;
3401       compute_kill_rd ();
3402       compute_rd ();
3403       alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3404       compute_ae_gen ();
3405       compute_ae_kill (ae_gen, ae_kill);
3406       passes = compute_available (ae_gen, ae_kill, ae_out, ae_in);
3407       if (gcse_file)
3408         fprintf (gcse_file, "avail expr computation: %d passes\n", passes);
3409       changed = classic_gcse ();
3410       free_avail_expr_mem ();
3411     }
3412   free_rd_mem ();
3413   free_expr_hash_table ();
3414
3415   if (gcse_file)
3416     {
3417       fprintf (gcse_file, "\n");
3418       fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
3419                current_function_name, pass,
3420                bytes_used, gcse_subst_count, gcse_create_count);
3421     }
3422
3423   return changed;
3424 }
3425 \f
3426 /* Compute copy/constant propagation working variables.  */
3427
3428 /* Local properties of assignments.  */
3429
3430 static sbitmap *cprop_pavloc;
3431 static sbitmap *cprop_absaltered;
3432
3433 /* Global properties of assignments (computed from the local properties).  */
3434
3435 static sbitmap *cprop_avin;
3436 static sbitmap *cprop_avout;
3437
3438 /* Allocate vars used for copy/const propagation.
3439    N_BLOCKS is the number of basic blocks.
3440    N_SETS is the number of sets.  */
3441
3442 static void
3443 alloc_cprop_mem (n_blocks, n_sets)
3444      int n_blocks, n_sets;
3445 {
3446   cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3447   cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3448
3449   cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3450   cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3451 }
3452
3453 /* Free vars used by copy/const propagation.  */
3454
3455 static void
3456 free_cprop_mem ()
3457 {
3458   free (cprop_pavloc);
3459   free (cprop_absaltered);
3460   free (cprop_avin);
3461   free (cprop_avout);
3462 }
3463
3464 /* For each block, compute whether X is transparent.
3465    X is either an expression or an assignment [though we don't care which,
3466    for this context an assignment is treated as an expression].
3467    For each block where an element of X is modified, set (SET_P == 1) or reset
3468    (SET_P == 0) the INDX bit in BMAP.  */
3469
3470 static void
3471 compute_transp (x, indx, bmap, set_p)
3472      rtx x;
3473      int indx;
3474      sbitmap *bmap;
3475      int set_p;
3476 {
3477   int bb,i;
3478   enum rtx_code code;
3479   const char *fmt;
3480
3481   /* repeat is used to turn tail-recursion into iteration.  */
3482  repeat:
3483
3484   if (x == 0)
3485     return;
3486
3487   code = GET_CODE (x);
3488   switch (code)
3489     {
3490     case REG:
3491       {
3492         reg_set *r;
3493         int regno = REGNO (x);
3494
3495         if (set_p)
3496           {
3497             if (regno < FIRST_PSEUDO_REGISTER)
3498               {
3499                 for (bb = 0; bb < n_basic_blocks; bb++)
3500                   if (TEST_BIT (reg_set_in_block[bb], regno))
3501                     SET_BIT (bmap[bb], indx);
3502               }
3503             else
3504               {
3505                 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3506                   {
3507                     bb = BLOCK_NUM (r->insn);
3508                     SET_BIT (bmap[bb], indx);
3509                   }
3510               }
3511           }
3512         else
3513           {
3514             if (regno < FIRST_PSEUDO_REGISTER)
3515               {
3516                 for (bb = 0; bb < n_basic_blocks; bb++)
3517                   if (TEST_BIT (reg_set_in_block[bb], regno))
3518                     RESET_BIT (bmap[bb], indx);
3519               }
3520             else
3521               {
3522                 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3523                   {
3524                     bb = BLOCK_NUM (r->insn);
3525                     RESET_BIT (bmap[bb], indx);
3526                   }
3527               }
3528           }
3529         return;
3530       }
3531
3532     case MEM:
3533       if (set_p)
3534         {
3535           for (bb = 0; bb < n_basic_blocks; bb++)
3536             if (mem_set_in_block[bb])
3537               SET_BIT (bmap[bb], indx);
3538         }
3539       else
3540         {
3541           for (bb = 0; bb < n_basic_blocks; bb++)
3542             if (mem_set_in_block[bb])
3543               RESET_BIT (bmap[bb], indx);
3544         }
3545       x = XEXP (x, 0);
3546       goto repeat;
3547
3548     case PC:
3549     case CC0: /*FIXME*/
3550     case CONST:
3551     case CONST_INT:
3552     case CONST_DOUBLE:
3553     case SYMBOL_REF:
3554     case LABEL_REF:
3555     case ADDR_VEC:
3556     case ADDR_DIFF_VEC:
3557       return;
3558
3559     default:
3560       break;
3561     }
3562
3563   i = GET_RTX_LENGTH (code) - 1;
3564   fmt = GET_RTX_FORMAT (code);
3565   for (; i >= 0; i--)
3566     {
3567       if (fmt[i] == 'e')
3568         {
3569           rtx tem = XEXP (x, i);
3570
3571           /* If we are about to do the last recursive call
3572              needed at this level, change it into iteration.
3573              This function is called enough to be worth it.  */
3574           if (i == 0)
3575             {
3576               x = tem;
3577               goto repeat;
3578             }
3579           compute_transp (tem, indx, bmap, set_p);
3580         }
3581       else if (fmt[i] == 'E')
3582         {
3583           int j;
3584           for (j = 0; j < XVECLEN (x, i); j++)
3585             compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3586         }
3587     }
3588 }
3589
3590 /* Compute the available expressions at the start and end of each
3591    basic block for cprop.  This particular dataflow equation is
3592    used often enough that we might want to generalize it and make
3593    as a subroutine for other global optimizations that need available
3594    in/out information.  */
3595 static void
3596 compute_cprop_avinout ()
3597 {
3598   int bb, changed, passes;
3599
3600   sbitmap_zero (cprop_avin[0]);
3601   sbitmap_vector_ones (cprop_avout, n_basic_blocks);
3602
3603   passes = 0;
3604   changed = 1;
3605   while (changed)
3606     {
3607       changed = 0;
3608       for (bb = 0; bb < n_basic_blocks; bb++)
3609         {
3610           if (bb != 0)
3611             sbitmap_intersection_of_preds (cprop_avin[bb], cprop_avout, bb);
3612           changed |= sbitmap_union_of_diff (cprop_avout[bb],
3613                                             cprop_pavloc[bb],
3614                                             cprop_avin[bb],
3615                                             cprop_absaltered[bb]);
3616         }
3617       passes++;
3618     }
3619
3620   if (gcse_file)
3621     fprintf (gcse_file, "cprop avail expr computation: %d passes\n", passes);
3622 }
3623
3624 /* Top level routine to do the dataflow analysis needed by copy/const
3625    propagation.  */
3626
3627 static void
3628 compute_cprop_data ()
3629 {
3630   compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
3631   compute_cprop_avinout ();
3632 }
3633 \f
3634 /* Copy/constant propagation.  */
3635
3636 /* Maximum number of register uses in an insn that we handle.  */
3637 #define MAX_USES 8
3638
3639 /* Table of uses found in an insn.
3640    Allocated statically to avoid alloc/free complexity and overhead.  */
3641 static struct reg_use reg_use_table[MAX_USES];
3642
3643 /* Index into `reg_use_table' while building it.  */
3644 static int reg_use_count;
3645
3646 /* Set up a list of register numbers used in INSN.
3647    The found uses are stored in `reg_use_table'.
3648    `reg_use_count' is initialized to zero before entry, and
3649    contains the number of uses in the table upon exit.
3650
3651    ??? If a register appears multiple times we will record it multiple
3652    times.  This doesn't hurt anything but it will slow things down.  */
3653
3654 static void
3655 find_used_regs (x)
3656      rtx x;
3657 {
3658   int i;
3659   enum rtx_code code;
3660   const char *fmt;
3661
3662   /* repeat is used to turn tail-recursion into iteration.  */
3663  repeat:
3664
3665   if (x == 0)
3666     return;
3667
3668   code = GET_CODE (x);
3669   switch (code)
3670     {
3671     case REG:
3672       if (reg_use_count == MAX_USES)
3673         return;
3674       reg_use_table[reg_use_count].reg_rtx = x;
3675       reg_use_count++;
3676       return;
3677
3678     case MEM:
3679       x = XEXP (x, 0);
3680       goto repeat;
3681
3682     case PC:
3683     case CC0:
3684     case CONST:
3685     case CONST_INT:
3686     case CONST_DOUBLE:
3687     case SYMBOL_REF:
3688     case LABEL_REF:
3689     case CLOBBER:
3690     case ADDR_VEC:
3691     case ADDR_DIFF_VEC:
3692     case ASM_INPUT: /*FIXME*/
3693       return;
3694
3695     case SET:
3696       if (GET_CODE (SET_DEST (x)) == MEM)
3697         find_used_regs (SET_DEST (x));
3698       x = SET_SRC (x);
3699       goto repeat;
3700
3701     default:
3702       break;
3703     }
3704
3705   /* Recursively scan the operands of this expression.  */
3706
3707   fmt = GET_RTX_FORMAT (code);
3708   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3709     {
3710       if (fmt[i] == 'e')
3711         {
3712           /* If we are about to do the last recursive call
3713              needed at this level, change it into iteration.
3714              This function is called enough to be worth it.  */
3715           if (i == 0)
3716             {
3717               x = XEXP (x, 0);
3718               goto repeat;
3719             }
3720           find_used_regs (XEXP (x, i));
3721         }
3722       else if (fmt[i] == 'E')
3723         {
3724           int j;
3725           for (j = 0; j < XVECLEN (x, i); j++)
3726             find_used_regs (XVECEXP (x, i, j));
3727         }
3728     }
3729 }
3730
3731 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3732    Returns non-zero is successful.  */
3733
3734 static int
3735 try_replace_reg (from, to, insn)
3736      rtx from, to, insn;
3737 {
3738   /* If this fails we could try to simplify the result of the
3739      replacement and attempt to recognize the simplified insn.
3740
3741      But we need a general simplify_rtx that doesn't have pass
3742      specific state variables.  I'm not aware of one at the moment.  */
3743   return validate_replace_src (from, to, insn);
3744 }
3745
3746 /* Find a set of REGNO that is available on entry to INSN's block.
3747    Returns NULL if not found.  */
3748
3749 static struct expr *
3750 find_avail_set (regno, insn)
3751      int regno;
3752      rtx insn;
3753 {
3754   /* SET1 contains the last set found that can be returned to the caller for
3755      use in a substitution.  */
3756   struct expr *set1 = 0;
3757
3758   /* Loops are not possible here.  To get a loop we would need two sets
3759      available at the start of the block containing INSN.  ie we would
3760      need two sets like this available at the start of the block:
3761
3762        (set (reg X) (reg Y))
3763        (set (reg Y) (reg X))
3764
3765      This can not happen since the set of (reg Y) would have killed the
3766      set of (reg X) making it unavailable at the start of this block.  */
3767   while (1)
3768      {
3769       rtx src;
3770       struct expr *set = lookup_set (regno, NULL_RTX);
3771
3772       /* Find a set that is available at the start of the block
3773          which contains INSN.  */
3774       while (set)
3775         {
3776           if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3777             break;
3778           set = next_set (regno, set);
3779         }
3780
3781       /* If no available set was found we've reached the end of the
3782          (possibly empty) copy chain.  */
3783       if (set == 0)
3784         break;
3785
3786       if (GET_CODE (set->expr) != SET)
3787         abort ();
3788
3789       src = SET_SRC (set->expr);
3790
3791       /* We know the set is available.
3792          Now check that SRC is ANTLOC (i.e. none of the source operands
3793          have changed since the start of the block).
3794
3795          If the source operand changed, we may still use it for the next
3796          iteration of this loop, but we may not use it for substitutions.  */
3797       if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3798         set1 = set;
3799
3800       /* If the source of the set is anything except a register, then
3801          we have reached the end of the copy chain.  */
3802       if (GET_CODE (src) != REG)
3803         break;
3804
3805       /* Follow the copy chain, ie start another iteration of the loop
3806          and see if we have an available copy into SRC.  */
3807       regno = REGNO (src);
3808      }
3809
3810   /* SET1 holds the last set that was available and anticipatable at
3811      INSN.  */
3812   return set1;
3813 }
3814
3815 /* Subroutine of cprop_insn that tries to propagate constants into
3816    JUMP_INSNS.  INSN must be a conditional jump; COPY is a copy of it
3817    that we can use for substitutions.
3818    REG_USED is the use we will try to replace, SRC is the constant we
3819    will try to substitute for it.
3820    Returns nonzero if a change was made.  */
3821 static int
3822 cprop_jump (insn, copy, reg_used, src)
3823      rtx insn, copy;
3824      struct reg_use *reg_used;
3825      rtx src;
3826 {
3827   rtx set = PATTERN (copy);
3828   rtx temp;
3829
3830   /* Replace the register with the appropriate constant.  */
3831   replace_rtx (SET_SRC (set), reg_used->reg_rtx, src);
3832
3833   temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)),
3834                                      GET_MODE (SET_SRC (set)),
3835                                      GET_MODE (XEXP (SET_SRC (set), 0)),
3836                                      XEXP (SET_SRC (set), 0),
3837                                      XEXP (SET_SRC (set), 1),
3838                                      XEXP (SET_SRC (set), 2));
3839
3840   /* If no simplification can be made, then try the next
3841      register.  */
3842   if (temp == 0)
3843     return 0;
3844
3845   SET_SRC (set) = temp;
3846
3847   /* That may have changed the structure of TEMP, so
3848      force it to be rerecognized if it has not turned
3849      into a nop or unconditional jump.  */
3850
3851   INSN_CODE (copy) = -1;
3852   if ((SET_DEST (set) == pc_rtx
3853        && (SET_SRC (set) == pc_rtx
3854            || GET_CODE (SET_SRC (set)) == LABEL_REF))
3855       || recog (PATTERN (copy), copy, NULL) >= 0)
3856     {
3857       /* This has either become an unconditional jump
3858          or a nop-jump.  We'd like to delete nop jumps
3859          here, but doing so confuses gcse.  So we just
3860          make the replacement and let later passes
3861          sort things out.  */
3862       PATTERN (insn) = set;
3863       INSN_CODE (insn) = -1;
3864
3865       /* One less use of the label this insn used to jump to
3866          if we turned this into a NOP jump.  */
3867       if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0)
3868         --LABEL_NUSES (JUMP_LABEL (insn));
3869
3870       /* If this has turned into an unconditional jump,
3871          then put a barrier after it so that the unreachable
3872          code will be deleted.  */
3873       if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3874         emit_barrier_after (insn);
3875
3876       run_jump_opt_after_gcse = 1;
3877
3878       const_prop_count++;
3879       if (gcse_file != NULL)
3880         {
3881           int regno = REGNO (reg_used->reg_rtx);
3882           fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ",
3883                    regno, INSN_UID (insn));
3884           print_rtl (gcse_file, src);
3885           fprintf (gcse_file, "\n");
3886         }
3887       return 1;
3888     }
3889   return 0;
3890 }
3891
3892 #ifdef HAVE_cc0
3893 /* Subroutine of cprop_insn that tries to propagate constants into
3894    JUMP_INSNS for machines that have CC0.  INSN is a single set that
3895    stores into CC0; the insn following it is a conditional jump.
3896    REG_USED is the use we will try to replace, SRC is the constant we
3897    will try to substitute for it.
3898    Returns nonzero if a change was made.  */
3899 static int
3900 cprop_cc0_jump (insn, reg_used, src)
3901      rtx insn;
3902      struct reg_use *reg_used;
3903      rtx src;
3904 {
3905   rtx jump = NEXT_INSN (insn);
3906   rtx copy = copy_rtx (jump);
3907   rtx set = PATTERN (copy);
3908
3909   /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3910      substitute into it.  */
3911   replace_rtx (SET_SRC (set), cc0_rtx, copy_rtx (SET_SRC (PATTERN (insn))));
3912   if (! cprop_jump (jump, copy, reg_used, src))
3913     return 0;
3914
3915   /* If we succeeded, delete the cc0 setter.  */
3916   PUT_CODE (insn, NOTE);
3917   NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3918   NOTE_SOURCE_FILE (insn) = 0;
3919   return 1;
3920  }
3921 #endif
3922
3923 /* Perform constant and copy propagation on INSN.
3924    The result is non-zero if a change was made.  */
3925
3926 static int
3927 cprop_insn (insn, alter_jumps)
3928      rtx insn;
3929      int alter_jumps;
3930 {
3931   struct reg_use *reg_used;
3932   int changed = 0;
3933
3934   /* Only propagate into SETs.  Note that a conditional jump is a
3935      SET with pc_rtx as the destination.  */
3936   if ((GET_CODE (insn) != INSN
3937        && GET_CODE (insn) != JUMP_INSN)
3938       || GET_CODE (PATTERN (insn)) != SET)
3939     return 0;
3940
3941   reg_use_count = 0;
3942   find_used_regs (PATTERN (insn));
3943
3944   reg_used = &reg_use_table[0];
3945   for ( ; reg_use_count > 0; reg_used++, reg_use_count--)
3946     {
3947       rtx pat, src;
3948       struct expr *set;
3949       int regno = REGNO (reg_used->reg_rtx);
3950
3951       /* Ignore registers created by GCSE.
3952          We do this because ... */
3953       if (regno >= max_gcse_regno)
3954         continue;
3955
3956       /* If the register has already been set in this block, there's
3957          nothing we can do.  */
3958       if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3959         continue;
3960
3961       /* Find an assignment that sets reg_used and is available
3962          at the start of the block.  */
3963       set = find_avail_set (regno, insn);
3964       if (! set)
3965         continue;
3966
3967       pat = set->expr;
3968       /* ??? We might be able to handle PARALLELs.  Later.  */
3969       if (GET_CODE (pat) != SET)
3970         abort ();
3971       src = SET_SRC (pat);
3972
3973       /* Constant propagation.  */
3974       if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
3975           || GET_CODE (src) == SYMBOL_REF)
3976         {
3977           /* Handle normal insns first.  */
3978           if (GET_CODE (insn) == INSN
3979               && try_replace_reg (reg_used->reg_rtx, src, insn))
3980             {
3981               changed = 1;
3982               const_prop_count++;
3983               if (gcse_file != NULL)
3984                 {
3985                   fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ",
3986                            regno, INSN_UID (insn));
3987                   print_rtl (gcse_file, src);
3988                   fprintf (gcse_file, "\n");
3989                 }
3990
3991               /* The original insn setting reg_used may or may not now be
3992                  deletable.  We leave the deletion to flow.  */
3993             }
3994
3995           /* Try to propagate a CONST_INT into a conditional jump.
3996              We're pretty specific about what we will handle in this
3997              code, we can extend this as necessary over time.
3998
3999              Right now the insn in question must look like
4000              (set (pc) (if_then_else ...))  */
4001           else if (alter_jumps
4002                    && GET_CODE (insn) == JUMP_INSN
4003                    && condjump_p (insn)
4004                    && ! simplejump_p (insn))
4005             changed |= cprop_jump (insn, copy_rtx (insn), reg_used, src);
4006 #ifdef HAVE_cc0
4007           /* Similar code for machines that use a pair of CC0 setter and
4008              conditional jump insn.  */
4009           else if (alter_jumps
4010                    && GET_CODE (PATTERN (insn)) == SET
4011                    && SET_DEST (PATTERN (insn)) == cc0_rtx
4012                    && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
4013                    && condjump_p (NEXT_INSN (insn))
4014                    && ! simplejump_p (NEXT_INSN (insn)))
4015             changed |= cprop_cc0_jump (insn, reg_used, src);
4016 #endif
4017         }
4018       else if (GET_CODE (src) == REG
4019                && REGNO (src) >= FIRST_PSEUDO_REGISTER
4020                && REGNO (src) != regno)
4021         {
4022           if (try_replace_reg (reg_used->reg_rtx, src, insn))
4023             {
4024               changed = 1;
4025               copy_prop_count++;
4026               if (gcse_file != NULL)
4027                 {
4028                   fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d with reg %d\n",
4029                            regno, INSN_UID (insn), REGNO (src));
4030                 }
4031
4032               /* The original insn setting reg_used may or may not now be
4033                  deletable.  We leave the deletion to flow.  */
4034               /* FIXME: If it turns out that the insn isn't deletable,
4035                  then we may have unnecessarily extended register lifetimes
4036                  and made things worse.  */
4037             }
4038         }
4039     }
4040
4041   return changed;
4042 }
4043
4044 /* Forward propagate copies.
4045    This includes copies and constants.
4046    Return non-zero if a change was made.  */
4047
4048 static int
4049 cprop (alter_jumps)
4050      int alter_jumps;
4051 {
4052   int bb, changed;
4053   rtx insn;
4054
4055   /* Note we start at block 1.  */
4056
4057   changed = 0;
4058   for (bb = 1; bb < n_basic_blocks; bb++)
4059     {
4060       /* Reset tables used to keep track of what's still valid [since the
4061          start of the block].  */
4062       reset_opr_set_tables ();
4063
4064       for (insn = BLOCK_HEAD (bb);
4065            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
4066            insn = NEXT_INSN (insn))
4067         {
4068           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4069             {
4070               changed |= cprop_insn (insn, alter_jumps);
4071
4072               /* Keep track of everything modified by this insn.  */
4073               /* ??? Need to be careful w.r.t. mods done to INSN.  Don't
4074                  call mark_oprs_set if we turned the insn into a NOTE.  */
4075               if (GET_CODE (insn) != NOTE)
4076                 mark_oprs_set (insn);
4077             }
4078         }
4079     }
4080
4081   if (gcse_file != NULL)
4082     fprintf (gcse_file, "\n");
4083
4084   return changed;
4085 }
4086
4087 /* Perform one copy/constant propagation pass.
4088    F is the first insn in the function.
4089    PASS is the pass count.  */
4090
4091 static int
4092 one_cprop_pass (pass, alter_jumps)
4093      int pass;
4094      int alter_jumps;
4095 {
4096   int changed = 0;
4097
4098   const_prop_count = 0;
4099   copy_prop_count = 0;
4100
4101   alloc_set_hash_table (max_cuid);
4102   compute_set_hash_table ();
4103   if (gcse_file)
4104     dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
4105                      n_sets);
4106   if (n_sets > 0)
4107     {
4108       alloc_cprop_mem (n_basic_blocks, n_sets);
4109       compute_cprop_data ();
4110       changed = cprop (alter_jumps);
4111       free_cprop_mem ();
4112     }
4113   free_set_hash_table ();
4114
4115   if (gcse_file)
4116     {
4117       fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, %d const props, %d copy props\n",
4118                current_function_name, pass,
4119                bytes_used, const_prop_count, copy_prop_count);
4120       fprintf (gcse_file, "\n");
4121     }
4122
4123   return changed;
4124 }
4125 \f
4126 /* Compute PRE+LCM working variables.  */
4127
4128 /* Local properties of expressions.  */
4129 /* Nonzero for expressions that are transparent in the block.  */
4130 static sbitmap *transp;
4131
4132 /* Nonzero for expressions that are transparent at the end of the block.
4133    This is only zero for expressions killed by abnormal critical edge
4134    created by a calls.  */
4135 static sbitmap *transpout;
4136
4137 /* Nonzero for expressions that are computed (available) in the block.  */
4138 static sbitmap *comp;
4139
4140 /* Nonzero for expressions that are locally anticipatable in the block.  */
4141 static sbitmap *antloc;
4142
4143 /* Nonzero for expressions where this block is an optimal computation
4144    point.  */
4145 static sbitmap *pre_optimal;
4146
4147 /* Nonzero for expressions which are redundant in a particular block.  */
4148 static sbitmap *pre_redundant;
4149
4150 /* Nonzero for expressions which should be inserted on a specific edge.  */
4151 static sbitmap *pre_insert_map;
4152
4153 /* Nonzero for expressions which should be deleted in a specific block.  */
4154 static sbitmap *pre_delete_map;
4155
4156 /* Contains the edge_list returned by pre_edge_lcm.  */
4157 static struct edge_list *edge_list;
4158
4159 static sbitmap *temp_bitmap;
4160
4161 /* Redundant insns.  */
4162 static sbitmap pre_redundant_insns;
4163
4164 /* Allocate vars used for PRE analysis.  */
4165
4166 static void
4167 alloc_pre_mem (n_blocks, n_exprs)
4168      int n_blocks, n_exprs;
4169 {
4170   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4171   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4172   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4173   temp_bitmap = sbitmap_vector_alloc (n_blocks, n_exprs);
4174
4175   pre_optimal = NULL;
4176   pre_redundant = NULL;
4177   pre_insert_map = NULL;
4178   pre_delete_map = NULL;
4179   ae_in = NULL;
4180   ae_out = NULL;
4181   u_bitmap = NULL;
4182   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
4183   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
4184   /* pre_insert and pre_delete are allocated later.  */
4185 }
4186
4187 /* Free vars used for PRE analysis.  */
4188
4189 static void
4190 free_pre_mem ()
4191 {
4192   free (transp);
4193   free (comp);
4194   free (antloc);
4195   free (temp_bitmap);
4196
4197   if (pre_optimal)
4198     free (pre_optimal);
4199   if (pre_redundant)
4200     free (pre_redundant);
4201   if (pre_insert_map)
4202     free (pre_insert_map);
4203   if (pre_delete_map)
4204     free (pre_delete_map);
4205   if (transpout)
4206     free (transpout);
4207
4208   if (ae_in)
4209     free (ae_in);
4210   if (ae_out)
4211     free (ae_out);
4212   if (ae_kill)
4213     free (ae_kill);
4214   if (u_bitmap)
4215     free (u_bitmap);
4216
4217   transp = comp = antloc = NULL;
4218   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
4219   transpout = ae_in = ae_out = ae_kill = NULL;
4220   u_bitmap = NULL;
4221
4222 }
4223
4224 /* Top level routine to do the dataflow analysis needed by PRE.  */
4225
4226 static void
4227 compute_pre_data ()
4228 {
4229   compute_local_properties (transp, comp, antloc, 0);
4230   compute_transpout ();
4231   sbitmap_vector_zero (ae_kill, n_basic_blocks);
4232   compute_ae_kill (comp, ae_kill);
4233   edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
4234                             ae_kill, &pre_insert_map, &pre_delete_map);
4235 }
4236
4237 \f
4238 /* PRE utilities */
4239
4240 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4241    block BB.
4242
4243    VISITED is a pointer to a working buffer for tracking which BB's have
4244    been visited.  It is NULL for the top-level call.
4245
4246    CHECK_PRE_COMP controls whether or not we check for a computation of
4247    EXPR in OCCR_BB.
4248
4249    We treat reaching expressions that go through blocks containing the same
4250    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
4251    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4252    2 as not reaching.  The intent is to improve the probability of finding
4253    only one reaching expression and to reduce register lifetimes by picking
4254    the closest such expression.  */
4255
4256 static int
4257 pre_expr_reaches_here_p_work (occr_bb, expr, bb, check_pre_comp, visited)
4258      int occr_bb;
4259      struct expr *expr;
4260      int bb;
4261      int check_pre_comp;
4262      char *visited;
4263 {
4264   edge pred;
4265
4266   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
4267     {
4268       int pred_bb = pred->src->index;
4269
4270       if (pred->src == ENTRY_BLOCK_PTR
4271           /* Has predecessor has already been visited?  */
4272           || visited[pred_bb])
4273         {
4274           /* Nothing to do.  */
4275         }
4276       /* Does this predecessor generate this expression?  */
4277       else if ((!check_pre_comp && occr_bb == pred_bb)
4278                || TEST_BIT (comp[pred_bb], expr->bitmap_index))
4279         {
4280           /* Is this the occurrence we're looking for?
4281              Note that there's only one generating occurrence per block
4282              so we just need to check the block number.  */
4283           if (occr_bb == pred_bb)
4284             return 1;
4285           visited[pred_bb] = 1;
4286         }
4287       /* Ignore this predecessor if it kills the expression.  */
4288       else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
4289         visited[pred_bb] = 1;
4290       /* Neither gen nor kill.  */
4291       else
4292         {
4293           visited[pred_bb] = 1;
4294           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb,
4295                                             check_pre_comp, visited))
4296             return 1;
4297         }
4298     }
4299
4300   /* All paths have been checked.  */
4301   return 0;
4302 }
4303
4304 /* The wrapper for pre_expr_reaches_here_work that ensures that any
4305    memory allocated for that function is returned. */
4306
4307 static int
4308 pre_expr_reaches_here_p (occr_bb, expr, bb, check_pre_comp)
4309      int occr_bb;
4310      struct expr *expr;
4311      int bb;
4312      int check_pre_comp;
4313 {
4314   int rval;
4315   char * visited = (char *) xcalloc (n_basic_blocks, 1);
4316
4317   rval = pre_expr_reaches_here_p_work(occr_bb, expr, bb, check_pre_comp,
4318                                       visited);
4319
4320   free (visited);
4321
4322   return (rval);
4323 }
4324 \f
4325
4326 /* Given an expr, generate RTL which we can insert at the end of a BB,
4327    or on an edge.  Set the block number of any insns generated to
4328    the value of BB.  */
4329
4330 static rtx
4331 process_insert_insn (expr)
4332      struct expr *expr;
4333 {
4334   rtx reg = expr->reaching_reg;
4335   rtx pat, copied_expr;
4336   rtx first_new_insn;
4337
4338   start_sequence ();
4339   copied_expr = copy_rtx (expr->expr);
4340   emit_move_insn (reg, copied_expr);
4341   first_new_insn = get_insns ();
4342   pat = gen_sequence ();
4343   end_sequence ();
4344
4345   return pat;
4346 }
4347
4348 /* Add EXPR to the end of basic block BB.
4349
4350    This is used by both the PRE and code hoisting.
4351
4352    For PRE, we want to verify that the expr is either transparent
4353    or locally anticipatable in the target block.  This check makes
4354    no sense for code hoisting.  */
4355
4356 static void
4357 insert_insn_end_bb (expr, bb, pre)
4358      struct expr *expr;
4359      int bb;
4360      int pre;
4361 {
4362   rtx insn = BLOCK_END (bb);
4363   rtx new_insn;
4364   rtx reg = expr->reaching_reg;
4365   int regno = REGNO (reg);
4366   rtx pat;
4367
4368   pat = process_insert_insn (expr);
4369
4370   /* If the last insn is a jump, insert EXPR in front [taking care to
4371      handle cc0, etc. properly].  */
4372
4373   if (GET_CODE (insn) == JUMP_INSN)
4374     {
4375 #ifdef HAVE_cc0
4376       rtx note;
4377 #endif
4378
4379       /* If this is a jump table, then we can't insert stuff here.  Since
4380          we know the previous real insn must be the tablejump, we insert
4381          the new instruction just before the tablejump.  */
4382       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4383           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4384         insn = prev_real_insn (insn);
4385
4386 #ifdef HAVE_cc0
4387       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4388          if cc0 isn't set.  */
4389       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4390       if (note)
4391         insn = XEXP (note, 0);
4392       else
4393         {
4394           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4395           if (maybe_cc0_setter
4396               && GET_RTX_CLASS (GET_CODE (maybe_cc0_setter)) == 'i'
4397               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4398             insn = maybe_cc0_setter;
4399         }
4400 #endif
4401       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
4402       new_insn = emit_insn_before (pat, insn);
4403       if (BLOCK_HEAD (bb) == insn)
4404         BLOCK_HEAD (bb) = new_insn;
4405     }
4406   /* Likewise if the last insn is a call, as will happen in the presence
4407      of exception handling.  */
4408   else if (GET_CODE (insn) == CALL_INSN)
4409     {
4410       HARD_REG_SET parm_regs;
4411       int nparm_regs;
4412       rtx p;
4413
4414       /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4415          we search backward and place the instructions before the first
4416          parameter is loaded.  Do this for everyone for consistency and a
4417          presumtion that we'll get better code elsewhere as well.  */
4418
4419       /* It should always be the case that we can put these instructions
4420          anywhere in the basic block with performing PRE optimizations.
4421          Check this.  */
4422       if (pre
4423           && !TEST_BIT (antloc[bb], expr->bitmap_index)
4424           && !TEST_BIT (transp[bb], expr->bitmap_index))
4425         abort ();
4426
4427       /* Since different machines initialize their parameter registers
4428          in different orders, assume nothing.  Collect the set of all
4429          parameter registers.  */
4430       CLEAR_HARD_REG_SET (parm_regs);
4431       nparm_regs = 0;
4432       for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4433         if (GET_CODE (XEXP (p, 0)) == USE
4434             && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4435           {
4436             int regno = REGNO (XEXP (XEXP (p, 0), 0));
4437             if (regno >= FIRST_PSEUDO_REGISTER)
4438               abort ();
4439             SET_HARD_REG_BIT (parm_regs, regno);
4440             nparm_regs++;
4441           }
4442
4443       /* Search backward for the first set of a register in this set.  */
4444       while (nparm_regs && BLOCK_HEAD (bb) != insn)
4445         {
4446           insn = PREV_INSN (insn);
4447           p = single_set (insn);
4448           if (p && GET_CODE (SET_DEST (p)) == REG
4449               && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4450               && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4451             {
4452               CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4453               nparm_regs--;
4454             }
4455         }
4456
4457       /* If we found all the parameter loads, then we want to insert
4458          before the first parameter load.
4459
4460          If we did not find all the parameter loads, then we might have
4461          stopped on the head of the block, which could be a CODE_LABEL.
4462          If we inserted before the CODE_LABEL, then we would be putting
4463          the insn in the wrong basic block.  In that case, put the insn
4464          after the CODE_LABEL.
4465
4466          ?!? Do we need to account for NOTE_INSN_BASIC_BLOCK here?  */
4467       if (GET_CODE (insn) != CODE_LABEL)
4468         {
4469           new_insn = emit_insn_before (pat, insn);
4470           if (BLOCK_HEAD (bb) == insn)
4471             BLOCK_HEAD (bb) = new_insn;
4472         }
4473       else
4474         {
4475           new_insn = emit_insn_after (pat, insn);
4476         }
4477     }
4478   else
4479     {
4480       new_insn = emit_insn_after (pat, insn);
4481       BLOCK_END (bb) = new_insn;
4482     }
4483
4484   /* Keep block number table up to date.
4485      Note, PAT could be a multiple insn sequence, we have to make
4486      sure that each insn in the sequence is handled.  */
4487   if (GET_CODE (pat) == SEQUENCE)
4488     {
4489       int i;
4490
4491       for (i = 0; i < XVECLEN (pat, 0); i++)
4492         {
4493           rtx insn = XVECEXP (pat, 0, i);
4494           set_block_num (insn, bb);
4495           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4496             add_label_notes (PATTERN (insn), new_insn);
4497           note_stores (PATTERN (insn), record_set_info, insn);
4498         }
4499     }
4500   else
4501     {
4502       add_label_notes (SET_SRC (pat), new_insn);
4503       set_block_num (new_insn, bb);
4504       /* Keep register set table up to date.  */
4505       record_one_set (regno, new_insn);
4506     }
4507
4508   gcse_create_count++;
4509
4510   if (gcse_file)
4511     {
4512       fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, copying expression %d to reg %d\n",
4513                bb, INSN_UID (new_insn), expr->bitmap_index, regno);
4514     }
4515 }
4516
4517 /* Insert partially redundant expressions on edges in the CFG to make
4518    the expressions fully redundant.  */
4519
4520 static int
4521 pre_edge_insert (edge_list, index_map)
4522      struct edge_list *edge_list;
4523      struct expr **index_map;
4524 {
4525   int e, i, num_edges, set_size, did_insert = 0;
4526   sbitmap *inserted;
4527
4528   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4529      if it reaches any of the deleted expressions.  */
4530
4531   set_size = pre_insert_map[0]->size;
4532   num_edges = NUM_EDGES (edge_list);
4533   inserted = sbitmap_vector_alloc (num_edges, n_exprs);
4534   sbitmap_vector_zero (inserted, num_edges);
4535
4536   for (e = 0; e < num_edges; e++)
4537     {
4538       int indx;
4539       basic_block pred = INDEX_EDGE_PRED_BB (edge_list, e);
4540       int bb = pred->index;
4541
4542       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4543         {
4544           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
4545           int j;
4546
4547           for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
4548             {
4549               if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4550                 {
4551                   struct expr *expr = index_map[j];
4552                   struct occr *occr;
4553
4554                   /* Now look at each deleted occurence of this expression.  */
4555                   for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4556                     {
4557                       if (! occr->deleted_p)
4558                         continue;
4559
4560                       /* Insert this expression on this edge if if it would
4561                          reach the deleted occurence in BB.  */
4562                       if (!TEST_BIT (inserted[e], j)
4563                           && (bb == ENTRY_BLOCK
4564                               || pre_expr_reaches_here_p (bb, expr,
4565                                                    BLOCK_NUM (occr->insn), 0)))
4566                         {
4567                           rtx insn;
4568                           edge eg = INDEX_EDGE (edge_list, e);
4569                           /* We can't insert anything on an abnormal
4570                              and critical edge, so we insert the
4571                              insn at the end of the previous block. There
4572                              are several alternatives detailed in
4573                              Morgans book P277 (sec 10.5) for handling
4574                              this situation.  This one is easiest for now.  */
4575
4576                           if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
4577                             {
4578                               insert_insn_end_bb (index_map[j], bb, 0);
4579                             }
4580                           else
4581                             {
4582                               insn = process_insert_insn (index_map[j]);
4583                               insert_insn_on_edge (insn, eg);
4584                             }
4585                           if (gcse_file)
4586                             {
4587                               fprintf (gcse_file,
4588                                        "PRE/HOIST: edge (%d,%d), copy expression %d\n",
4589                                         bb,
4590                                         INDEX_EDGE_SUCC_BB (edge_list, e)->index, expr->bitmap_index);
4591                             }
4592                           SET_BIT (inserted[e], j);
4593                           did_insert = 1;
4594                           gcse_create_count++;
4595                         }
4596                     }
4597                 }
4598             }
4599         }
4600     }
4601
4602   /* Clean up.  */
4603   free (inserted);
4604
4605   return did_insert;
4606 }
4607
4608 /* Copy the result of INSN to REG.
4609    INDX is the expression number.  */
4610
4611 static void
4612 pre_insert_copy_insn (expr, insn)
4613      struct expr *expr;
4614      rtx insn;
4615 {
4616   rtx reg = expr->reaching_reg;
4617   int regno = REGNO (reg);
4618   int indx = expr->bitmap_index;
4619   rtx set = single_set (insn);
4620   rtx new_insn;
4621   int bb = BLOCK_NUM (insn);
4622
4623   if (!set)
4624     abort ();
4625   new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4626                               insn);
4627   /* Keep block number table up to date.  */
4628   set_block_num (new_insn, bb);
4629   /* Keep register set table up to date.  */
4630   record_one_set (regno, new_insn);
4631   if (insn == BLOCK_END (bb))
4632     BLOCK_END (bb) = new_insn;
4633
4634   gcse_create_count++;
4635
4636   if (gcse_file)
4637     fprintf (gcse_file,
4638              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4639               BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4640               INSN_UID (insn), regno);
4641 }
4642
4643 /* Copy available expressions that reach the redundant expression
4644    to `reaching_reg'.  */
4645
4646 static void
4647 pre_insert_copies ()
4648 {
4649   int i;
4650
4651   /* For each available expression in the table, copy the result to
4652      `reaching_reg' if the expression reaches a deleted one.
4653
4654      ??? The current algorithm is rather brute force.
4655      Need to do some profiling.  */
4656
4657   for (i = 0; i < expr_hash_table_size; i++)
4658     {
4659       struct expr *expr;
4660
4661       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4662         {
4663           struct occr *occr;
4664
4665           /* If the basic block isn't reachable, PPOUT will be TRUE.
4666              However, we don't want to insert a copy here because the
4667              expression may not really be redundant.  So only insert
4668              an insn if the expression was deleted.
4669              This test also avoids further processing if the expression
4670              wasn't deleted anywhere.  */
4671           if (expr->reaching_reg == NULL)
4672             continue;
4673
4674           for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4675             {
4676               struct occr *avail;
4677
4678               if (! occr->deleted_p)
4679                 continue;
4680
4681               for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4682                 {
4683                   rtx insn = avail->insn;
4684
4685                   /* No need to handle this one if handled already.  */
4686                   if (avail->copied_p)
4687                     continue;
4688                   /* Don't handle this one if it's a redundant one.  */
4689                   if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4690                     continue;
4691                   /* Or if the expression doesn't reach the deleted one.  */
4692                   if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
4693                                                  BLOCK_NUM (occr->insn),1))
4694                     continue;
4695
4696                   /* Copy the result of avail to reaching_reg.  */
4697                   pre_insert_copy_insn (expr, insn);
4698                   avail->copied_p = 1;
4699                 }
4700             }
4701         }
4702     }
4703 }
4704
4705 /* Delete redundant computations.
4706    Deletion is done by changing the insn to copy the `reaching_reg' of
4707    the expression into the result of the SET.  It is left to later passes
4708    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4709
4710    Returns non-zero if a change is made.  */
4711
4712 static int
4713 pre_delete ()
4714 {
4715   int i, bb, changed;
4716
4717   /* Compute the expressions which are redundant and need to be replaced by
4718      copies from the reaching reg to the target reg.  */
4719   for (bb = 0; bb < n_basic_blocks; bb++)
4720     sbitmap_copy (temp_bitmap[bb], pre_delete_map[bb]);
4721
4722   changed = 0;
4723   for (i = 0; i < expr_hash_table_size; i++)
4724     {
4725       struct expr *expr;
4726
4727       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4728         {
4729           struct occr *occr;
4730           int indx = expr->bitmap_index;
4731
4732           /* We only need to search antic_occr since we require
4733              ANTLOC != 0.  */
4734
4735           for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4736             {
4737               rtx insn = occr->insn;
4738               rtx set;
4739               int bb = BLOCK_NUM (insn);
4740
4741               if (TEST_BIT (temp_bitmap[bb], indx))
4742                 {
4743                   set = single_set (insn);
4744                   if (! set)
4745                     abort ();
4746
4747                   /* Create a pseudo-reg to store the result of reaching
4748                      expressions into.  Get the mode for the new pseudo
4749                      from the mode of the original destination pseudo.  */
4750                   if (expr->reaching_reg == NULL)
4751                     expr->reaching_reg
4752                       = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4753
4754                   /* In theory this should never fail since we're creating
4755                      a reg->reg copy.
4756
4757                      However, on the x86 some of the movXX patterns actually
4758                      contain clobbers of scratch regs.  This may cause the
4759                      insn created by validate_change to not match any pattern
4760                      and thus cause validate_change to fail.   */
4761                   if (validate_change (insn, &SET_SRC (set),
4762                                        expr->reaching_reg, 0))
4763                     {
4764                       occr->deleted_p = 1;
4765                       SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4766                       changed = 1;
4767                       gcse_subst_count++;
4768                     }
4769
4770                   if (gcse_file)
4771                     {
4772                       fprintf (gcse_file,
4773                                "PRE: redundant insn %d (expression %d) in bb %d, reaching reg is %d\n",
4774                                INSN_UID (insn), indx, bb, REGNO (expr->reaching_reg));
4775                     }
4776                 }
4777             }
4778         }
4779     }
4780
4781   return changed;
4782 }
4783
4784 /* Perform GCSE optimizations using PRE.
4785    This is called by one_pre_gcse_pass after all the dataflow analysis
4786    has been done.
4787
4788    This is based on the original Morel-Renvoise paper Fred Chow's thesis,
4789    and lazy code motion from Knoop, Ruthing and Steffen as described in
4790    Advanced Compiler Design and Implementation.
4791
4792    ??? A new pseudo reg is created to hold the reaching expression.
4793    The nice thing about the classical approach is that it would try to
4794    use an existing reg.  If the register can't be adequately optimized
4795    [i.e. we introduce reload problems], one could add a pass here to
4796    propagate the new register through the block.
4797
4798    ??? We don't handle single sets in PARALLELs because we're [currently]
4799    not able to copy the rest of the parallel when we insert copies to create
4800    full redundancies from partial redundancies.  However, there's no reason
4801    why we can't handle PARALLELs in the cases where there are no partial
4802    redundancies.  */
4803
4804 static int
4805 pre_gcse ()
4806 {
4807   int i, did_insert;
4808   int changed;
4809   struct expr **index_map;
4810
4811   /* Compute a mapping from expression number (`bitmap_index') to
4812      hash table entry.  */
4813
4814   index_map = xcalloc (n_exprs, sizeof (struct expr *));
4815   for (i = 0; i < expr_hash_table_size; i++)
4816     {
4817       struct expr *expr;
4818
4819       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4820         index_map[expr->bitmap_index] = expr;
4821     }
4822
4823   /* Reset bitmap used to track which insns are redundant.  */
4824   pre_redundant_insns = sbitmap_alloc (max_cuid);
4825   sbitmap_zero (pre_redundant_insns);
4826
4827   /* Delete the redundant insns first so that
4828      - we know what register to use for the new insns and for the other
4829        ones with reaching expressions
4830      - we know which insns are redundant when we go to create copies  */
4831   changed = pre_delete ();
4832
4833   did_insert = pre_edge_insert (edge_list, index_map);
4834   /* In other places with reaching expressions, copy the expression to the
4835      specially allocated pseudo-reg that reaches the redundant expr.  */
4836   pre_insert_copies ();
4837   if (did_insert)
4838     {
4839       commit_edge_insertions ();
4840       changed = 1;
4841     }
4842
4843   free (index_map);
4844   free (pre_redundant_insns);
4845
4846   return changed;
4847 }
4848
4849 /* Top level routine to perform one PRE GCSE pass.
4850
4851    Return non-zero if a change was made.  */
4852
4853 static int
4854 one_pre_gcse_pass (pass)
4855      int pass;
4856 {
4857   int changed = 0;
4858
4859   gcse_subst_count = 0;
4860   gcse_create_count = 0;
4861
4862   alloc_expr_hash_table (max_cuid);
4863   add_noreturn_fake_exit_edges ();
4864   compute_expr_hash_table ();
4865   if (gcse_file)
4866     dump_hash_table (gcse_file, "Expression", expr_hash_table,
4867                      expr_hash_table_size, n_exprs);
4868   if (n_exprs > 0)
4869     {
4870       alloc_pre_mem (n_basic_blocks, n_exprs);
4871       compute_pre_data ();
4872       changed |= pre_gcse ();
4873       free_edge_list (edge_list);
4874       free_pre_mem ();
4875     }
4876   remove_fake_edges ();
4877   free_expr_hash_table ();
4878
4879   if (gcse_file)
4880     {
4881       fprintf (gcse_file, "\n");
4882       fprintf (gcse_file, "PRE GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
4883                current_function_name, pass,
4884                bytes_used, gcse_subst_count, gcse_create_count);
4885     }
4886
4887   return changed;
4888 }
4889 \f
4890 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4891    We have to add REG_LABEL notes, because the following loop optimization
4892    pass requires them.  */
4893
4894 /* ??? This is very similar to the loop.c add_label_notes function.  We
4895    could probably share code here.  */
4896
4897 /* ??? If there was a jump optimization pass after gcse and before loop,
4898    then we would not need to do this here, because jump would add the
4899    necessary REG_LABEL notes.  */
4900
4901 static void
4902 add_label_notes (x, insn)
4903      rtx x;
4904      rtx insn;
4905 {
4906   enum rtx_code code = GET_CODE (x);
4907   int i, j;
4908   const char *fmt;
4909
4910   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4911     {
4912       /* This code used to ignore labels that referred to dispatch tables to
4913          avoid flow generating (slighly) worse code.
4914
4915          We no longer ignore such label references (see LABEL_REF handling in
4916          mark_jump_label for additional information).  */
4917       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4918                                             REG_NOTES (insn));
4919       return;
4920     }
4921
4922   fmt = GET_RTX_FORMAT (code);
4923   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4924     {
4925       if (fmt[i] == 'e')
4926         add_label_notes (XEXP (x, i), insn);
4927       else if (fmt[i] == 'E')
4928         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4929           add_label_notes (XVECEXP (x, i, j), insn);
4930     }
4931 }
4932
4933 /* Compute transparent outgoing information for each block.
4934
4935    An expression is transparent to an edge unless it is killed by
4936    the edge itself.  This can only happen with abnormal control flow,
4937    when the edge is traversed through a call.  This happens with
4938    non-local labels and exceptions.
4939
4940    This would not be necessary if we split the edge.  While this is
4941    normally impossible for abnormal critical edges, with some effort
4942    it should be possible with exception handling, since we still have
4943    control over which handler should be invoked.  But due to increased
4944    EH table sizes, this may not be worthwhile.  */
4945
4946 static void
4947 compute_transpout ()
4948 {
4949   int bb;
4950
4951   sbitmap_vector_ones (transpout, n_basic_blocks);
4952
4953   for (bb = 0; bb < n_basic_blocks; ++bb)
4954     {
4955       int i;
4956
4957       /* Note that flow inserted a nop a the end of basic blocks that
4958          end in call instructions for reasons other than abnormal
4959          control flow.  */
4960       if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4961         continue;
4962
4963       for (i = 0; i < expr_hash_table_size; i++)
4964         {
4965           struct expr *expr;
4966           for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4967             if (GET_CODE (expr->expr) == MEM)
4968               {
4969                 rtx addr = XEXP (expr->expr, 0);
4970
4971                 if (GET_CODE (addr) == SYMBOL_REF
4972                     && CONSTANT_POOL_ADDRESS_P (addr))
4973                   continue;
4974
4975                 /* ??? Optimally, we would use interprocedural alias
4976                    analysis to determine if this mem is actually killed
4977                    by this call.  */
4978                 RESET_BIT (transpout[bb], expr->bitmap_index);
4979               }
4980         }
4981     }
4982 }
4983
4984 /* Removal of useless null pointer checks */
4985
4986 /* Called via note_stores.  X is set by SETTER.  If X is a register we must
4987    invalidate nonnull_local and set nonnull_killed.  DATA is really a
4988    `null_pointer_info *'.
4989
4990    We ignore hard registers.  */
4991 static void
4992 invalidate_nonnull_info (x, setter, data)
4993      rtx x;
4994      rtx setter ATTRIBUTE_UNUSED;
4995      void *data;
4996 {
4997   int offset, regno;
4998   struct null_pointer_info* npi = (struct null_pointer_info *) data;
4999
5000   offset = 0;
5001   while (GET_CODE (x) == SUBREG)
5002     x = SUBREG_REG (x);
5003
5004   /* Ignore anything that is not a register or is a hard register.  */
5005   if (GET_CODE (x) != REG
5006       || REGNO (x) < npi->min_reg
5007       || REGNO (x) >= npi->max_reg)
5008     return;
5009
5010   regno = REGNO (x) - npi->min_reg;
5011
5012   RESET_BIT (npi->nonnull_local[npi->current_block], regno);
5013   SET_BIT (npi->nonnull_killed[npi->current_block], regno);
5014 }
5015
5016 /* Do null-pointer check elimination for the registers indicated in
5017    NPI.  NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
5018    they are not our responsibility to free.  */
5019
5020 static void
5021 delete_null_pointer_checks_1 (s_preds, block_reg, nonnull_avin,
5022                               nonnull_avout, npi)
5023      int_list_ptr *s_preds;
5024      int *block_reg;
5025      sbitmap *nonnull_avin;
5026      sbitmap *nonnull_avout;
5027      struct null_pointer_info *npi;
5028 {
5029   int changed, bb;
5030   int current_block;
5031   sbitmap *nonnull_local = npi->nonnull_local;
5032   sbitmap *nonnull_killed = npi->nonnull_killed;
5033
5034   /* Compute local properties, nonnull and killed.  A register will have
5035      the nonnull property if at the end of the current block its value is
5036      known to be nonnull.  The killed property indicates that somewhere in
5037      the block any information we had about the register is killed.
5038
5039      Note that a register can have both properties in a single block.  That
5040      indicates that it's killed, then later in the block a new value is
5041      computed.  */
5042   sbitmap_vector_zero (nonnull_local, n_basic_blocks);
5043   sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
5044   for (current_block = 0; current_block < n_basic_blocks; current_block++)
5045     {
5046       rtx insn, stop_insn;
5047
5048       /* Set the current block for invalidate_nonnull_info.  */
5049       npi->current_block = current_block;
5050
5051       /* Scan each insn in the basic block looking for memory references and
5052          register sets.  */
5053       stop_insn = NEXT_INSN (BLOCK_END (current_block));
5054       for (insn = BLOCK_HEAD (current_block);
5055            insn != stop_insn;
5056            insn = NEXT_INSN (insn))
5057         {
5058           rtx set;
5059           rtx reg;
5060
5061           /* Ignore anything that is not a normal insn.  */
5062           if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
5063             continue;
5064
5065           /* Basically ignore anything that is not a simple SET.  We do have
5066              to make sure to invalidate nonnull_local and set nonnull_killed
5067              for such insns though.  */
5068           set = single_set (insn);
5069           if (!set)
5070             {
5071               note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5072               continue;
5073             }
5074
5075           /* See if we've got a useable memory load.  We handle it first
5076              in case it uses its address register as a dest (which kills
5077              the nonnull property).  */
5078           if (GET_CODE (SET_SRC (set)) == MEM
5079               && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
5080               && REGNO (reg) >= npi->min_reg
5081               && REGNO (reg) < npi->max_reg)
5082             SET_BIT (nonnull_local[current_block],
5083                      REGNO (reg) - npi->min_reg);
5084
5085           /* Now invalidate stuff clobbered by this insn.  */
5086           note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5087
5088           /* And handle stores, we do these last since any sets in INSN can
5089              not kill the nonnull property if it is derived from a MEM
5090              appearing in a SET_DEST.  */
5091           if (GET_CODE (SET_DEST (set)) == MEM
5092               && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
5093               && REGNO (reg) >= npi->min_reg
5094               && REGNO (reg) < npi->max_reg)
5095             SET_BIT (nonnull_local[current_block],
5096                      REGNO (reg) - npi->min_reg);
5097         }
5098     }
5099
5100   /* Now compute global properties based on the local properties.   This
5101      is a classic global availablity algorithm.  */
5102   sbitmap_zero (nonnull_avin[0]);
5103   sbitmap_vector_ones (nonnull_avout, n_basic_blocks);
5104   changed = 1;
5105   while (changed)
5106     {
5107       changed = 0;
5108
5109       for (bb = 0; bb < n_basic_blocks; bb++)
5110         {
5111           if (bb != 0)
5112             sbitmap_intersect_of_predecessors (nonnull_avin[bb],
5113                                                nonnull_avout, bb, s_preds);
5114
5115           changed |= sbitmap_union_of_diff (nonnull_avout[bb],
5116                                             nonnull_local[bb],
5117                                             nonnull_avin[bb],
5118                                             nonnull_killed[bb]);
5119         }
5120     }
5121
5122   /* Now look at each bb and see if it ends with a compare of a value
5123      against zero.  */
5124   for (bb = 0; bb < n_basic_blocks; bb++)
5125     {
5126       rtx last_insn = BLOCK_END (bb);
5127       rtx condition, earliest;
5128       int compare_and_branch;
5129
5130       /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
5131          since BLOCK_REG[BB] is zero if this block did not end with a
5132          comparison against zero, this condition works.  */
5133       if (block_reg[bb] < npi->min_reg
5134           || block_reg[bb] >= npi->max_reg)
5135         continue;
5136
5137       /* LAST_INSN is a conditional jump.  Get its condition.  */
5138       condition = get_condition (last_insn, &earliest);
5139
5140       /* Is the register known to have a nonzero value?  */
5141       if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
5142         continue;
5143
5144       /* Try to compute whether the compare/branch at the loop end is one or
5145          two instructions.  */
5146       if (earliest == last_insn)
5147         compare_and_branch = 1;
5148       else if (earliest == prev_nonnote_insn (last_insn))
5149         compare_and_branch = 2;
5150       else
5151         continue;
5152
5153       /* We know the register in this comparison is nonnull at exit from
5154          this block.  We can optimize this comparison.  */
5155       if (GET_CODE (condition) == NE)
5156         {
5157           rtx new_jump;
5158
5159           new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
5160                                             last_insn);
5161           JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5162           LABEL_NUSES (JUMP_LABEL (new_jump))++;
5163           emit_barrier_after (new_jump);
5164         }
5165       delete_insn (last_insn);
5166       if (compare_and_branch == 2)
5167         delete_insn (earliest);
5168
5169       /* Don't check this block again.  (Note that BLOCK_END is
5170          invalid here; we deleted the last instruction in the
5171          block.)  */
5172       block_reg[bb] = 0;
5173     }
5174 }
5175
5176 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5177    at compile time.
5178
5179    This is conceptually similar to global constant/copy propagation and
5180    classic global CSE (it even uses the same dataflow equations as cprop).
5181
5182    If a register is used as memory address with the form (mem (reg)), then we
5183    know that REG can not be zero at that point in the program.  Any instruction
5184    which sets REG "kills" this property.
5185
5186    So, if every path leading to a conditional branch has an available memory
5187    reference of that form, then we know the register can not have the value
5188    zero at the conditional branch.
5189
5190    So we merely need to compute the local properies and propagate that data
5191    around the cfg, then optimize where possible.
5192
5193    We run this pass two times.  Once before CSE, then again after CSE.  This
5194    has proven to be the most profitable approach.  It is rare for new
5195    optimization opportunities of this nature to appear after the first CSE
5196    pass.
5197
5198    This could probably be integrated with global cprop with a little work.  */
5199
5200 void
5201 delete_null_pointer_checks (f)
5202      rtx f;
5203 {
5204   int_list_ptr *s_preds, *s_succs;
5205   int *num_preds, *num_succs;
5206   sbitmap *nonnull_avin, *nonnull_avout;
5207   int *block_reg;
5208   int bb;
5209   int reg;
5210   int regs_per_pass;
5211   int max_reg;
5212   struct null_pointer_info npi;
5213
5214   /* First break the program into basic blocks.  */
5215   find_basic_blocks (f, max_reg_num (), NULL, 1);
5216
5217   /* If we have only a single block, then there's nothing to do.  */
5218   if (n_basic_blocks <= 1)
5219     {
5220       /* Free storage allocated by find_basic_blocks.  */
5221       free_basic_block_vars (0);
5222       return;
5223     }
5224
5225   /* Trying to perform global optimizations on flow graphs which have
5226      a high connectivity will take a long time and is unlikely to be
5227      particularly useful.
5228
5229      In normal circumstances a cfg should have about twice has many edges
5230      as blocks.  But we do not want to punish small functions which have
5231      a couple switch statements.  So we require a relatively large number
5232      of basic blocks and the ratio of edges to blocks to be high.  */
5233   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
5234     {
5235       /* Free storage allocated by find_basic_blocks.  */
5236       free_basic_block_vars (0);
5237       return;
5238     }
5239
5240   /* We need predecessor/successor lists as well as pred/succ counts for
5241      each basic block.  */
5242   s_preds = (int_list_ptr *) gmalloc (n_basic_blocks * sizeof (int_list_ptr));
5243   s_succs = (int_list_ptr *) gmalloc (n_basic_blocks * sizeof (int_list_ptr));
5244   num_preds = (int *) gmalloc (n_basic_blocks * sizeof (int));
5245   num_succs = (int *) gmalloc (n_basic_blocks * sizeof (int));
5246   compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
5247
5248   /* We need four bitmaps, each with a bit for each register in each
5249      basic block.  */
5250   max_reg = max_reg_num ();
5251   regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
5252
5253   /* Allocate bitmaps to hold local and global properties.  */
5254   npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5255   npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5256   nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5257   nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5258
5259   /* Go through the basic blocks, seeing whether or not each block
5260      ends with a conditional branch whose condition is a comparison
5261      against zero.  Record the register compared in BLOCK_REG.  */
5262   block_reg = (int *) xcalloc (n_basic_blocks, sizeof (int));
5263   for (bb = 0; bb < n_basic_blocks; bb++)
5264     {
5265       rtx last_insn = BLOCK_END (bb);
5266       rtx condition, earliest, reg;
5267
5268       /* We only want conditional branches.  */
5269       if (GET_CODE (last_insn) != JUMP_INSN
5270           || !condjump_p (last_insn)
5271           || simplejump_p (last_insn))
5272         continue;
5273
5274       /* LAST_INSN is a conditional jump.  Get its condition.  */
5275       condition = get_condition (last_insn, &earliest);
5276
5277       /* If we were unable to get the condition, or it is not a equality
5278          comparison against zero then there's nothing we can do.  */
5279       if (!condition
5280           || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5281           || GET_CODE (XEXP (condition, 1)) != CONST_INT
5282           || (XEXP (condition, 1)
5283               != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
5284         continue;
5285
5286       /* We must be checking a register against zero.  */
5287       reg = XEXP (condition, 0);
5288       if (GET_CODE (reg) != REG)
5289         continue;
5290
5291       block_reg[bb] = REGNO (reg);
5292     }
5293
5294   /* Go through the algorithm for each block of registers.  */
5295   for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
5296     {
5297       npi.min_reg = reg;
5298       npi.max_reg = MIN (reg + regs_per_pass, max_reg);
5299       delete_null_pointer_checks_1 (s_preds, block_reg, nonnull_avin,
5300                                     nonnull_avout, &npi);
5301     }
5302
5303   /* Free storage allocated by find_basic_blocks.  */
5304   free_basic_block_vars (0);
5305
5306   /* Free our local predecessor/successor lists. */
5307   free (s_preds);
5308   free (s_succs);
5309   free (num_preds);
5310   free (num_succs);
5311
5312   /* Free the table of registers compared at the end of every block.  */
5313   free (block_reg);
5314
5315   /* Free bitmaps.  */
5316   free (npi.nonnull_local);
5317   free (npi.nonnull_killed);
5318   free (nonnull_avin);
5319   free (nonnull_avout);
5320 }
5321
5322 /* Code Hoisting variables and subroutines.  */
5323
5324 /* Very busy expressions.  */
5325 static sbitmap *hoist_vbein;
5326 static sbitmap *hoist_vbeout;
5327
5328 /* Hoistable expressions.  */
5329 static sbitmap *hoist_exprs;
5330
5331 /* Dominator bitmaps.  */
5332 static sbitmap *dominators;
5333
5334 /* ??? We could compute post dominators and run this algorithm in
5335    reverse to to perform tail merging, doing so would probably be
5336    more effective than the tail merging code in jump.c.
5337
5338    It's unclear if tail merging could be run in parallel with
5339    code hoisting.  It would be nice.  */
5340
5341 /* Allocate vars used for code hoisting analysis.  */
5342
5343 static void
5344 alloc_code_hoist_mem (n_blocks, n_exprs)
5345      int n_blocks, n_exprs;
5346 {
5347   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5348   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5349   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5350
5351   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5352   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5353   hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5354   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5355
5356   dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5357 }
5358
5359 /* Free vars used for code hoisting analysis.  */
5360
5361 static void
5362 free_code_hoist_mem ()
5363 {
5364   free (antloc);
5365   free (transp);
5366   free (comp);
5367
5368   free (hoist_vbein);
5369   free (hoist_vbeout);
5370   free (hoist_exprs);
5371   free (transpout);
5372
5373   free (dominators);
5374 }
5375
5376 /* Compute the very busy expressions at entry/exit from each block.
5377
5378    An expression is very busy if all paths from a given point
5379    compute the expression.  */
5380
5381 static void
5382 compute_code_hoist_vbeinout ()
5383 {
5384   int bb, changed, passes;
5385
5386   sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5387   sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5388
5389   passes = 0;
5390   changed = 1;
5391   while (changed)
5392     {
5393       changed = 0;
5394       /* We scan the blocks in the reverse order to speed up
5395          the convergence.  */
5396       for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5397         {
5398           changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5399                                            hoist_vbeout[bb], transp[bb]);
5400           if (bb != n_basic_blocks - 1)
5401             sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
5402         }
5403       passes++;
5404     }
5405
5406   if (gcse_file)
5407     fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5408 }
5409
5410 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
5411
5412 static void
5413 compute_code_hoist_data ()
5414 {
5415   compute_local_properties (transp, comp, antloc, 0);
5416   compute_transpout ();
5417   compute_code_hoist_vbeinout ();
5418   compute_flow_dominators (dominators, NULL);
5419   if (gcse_file)
5420     fprintf (gcse_file, "\n");
5421 }
5422
5423 /* Determine if the expression identified by EXPR_INDEX would
5424    reach BB unimpared if it was placed at the end of EXPR_BB.
5425
5426    It's unclear exactly what Muchnick meant by "unimpared".  It seems
5427    to me that the expression must either be computed or transparent in
5428    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
5429    would allow the expression to be hoisted out of loops, even if
5430    the expression wasn't a loop invariant.
5431
5432    Contrast this to reachability for PRE where an expression is
5433    considered reachable if *any* path reaches instead of *all*
5434    paths.  */
5435
5436 static int
5437 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5438      int expr_bb;
5439      int expr_index;
5440      int bb;
5441      char *visited;
5442 {
5443   edge pred;
5444   int visited_allocated_locally = 0;
5445
5446
5447   if (visited == NULL)
5448     {
5449        visited_allocated_locally = 1;
5450        visited = xcalloc (n_basic_blocks, 1);
5451     }
5452
5453   visited[expr_bb] = 1;
5454   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5455     {
5456       int pred_bb = pred->src->index;
5457
5458       if (pred->src == ENTRY_BLOCK_PTR)
5459         break;
5460       else if (visited[pred_bb])
5461         continue;
5462       /* Does this predecessor generate this expression?  */
5463       else if (TEST_BIT (comp[pred_bb], expr_index))
5464         break;
5465       else if (! TEST_BIT (transp[pred_bb], expr_index))
5466         break;
5467       /* Not killed.  */
5468       else
5469         {
5470           visited[pred_bb] = 1;
5471           if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5472                                            pred_bb, visited))
5473             break;
5474         }
5475     }
5476   if (visited_allocated_locally)
5477     free (visited);
5478   return (pred == NULL);
5479 }
5480 \f
5481 /* Actually perform code hoisting.  */
5482 static void
5483 hoist_code ()
5484 {
5485   int bb, dominated, i;
5486   struct expr **index_map;
5487
5488   sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5489
5490   /* Compute a mapping from expression number (`bitmap_index') to
5491      hash table entry.  */
5492
5493   index_map = xcalloc (n_exprs, sizeof (struct expr *));
5494   for (i = 0; i < expr_hash_table_size; i++)
5495     {
5496       struct expr *expr;
5497
5498       for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5499         index_map[expr->bitmap_index] = expr;
5500     }
5501
5502   /* Walk over each basic block looking for potentially hoistable
5503      expressions, nothing gets hoisted from the entry block.  */
5504   for (bb = 0; bb < n_basic_blocks; bb++)
5505     {
5506       int found = 0;
5507       int insn_inserted_p;
5508
5509       /* Examine each expression that is very busy at the exit of this
5510          block.  These are the potentially hoistable expressions.  */
5511       for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5512         {
5513           int hoistable = 0;
5514           if (TEST_BIT (hoist_vbeout[bb], i)
5515               && TEST_BIT (transpout[bb], i))
5516             {
5517               /* We've found a potentially hoistable expression, now
5518                  we look at every block BB dominates to see if it
5519                  computes the expression.  */
5520               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5521                 {
5522                   /* Ignore self dominance.  */
5523                   if (bb == dominated
5524                       || ! TEST_BIT (dominators[dominated], bb))
5525                     continue;
5526
5527                   /* We've found a dominated block, now see if it computes
5528                      the busy expression and whether or not moving that
5529                      expression to the "beginning" of that block is safe.  */
5530                   if (!TEST_BIT (antloc[dominated], i))
5531                     continue;
5532
5533                   /* Note if the expression would reach the dominated block
5534                      unimpared if it was placed at the end of BB.
5535
5536                      Keep track of how many times this expression is hoistable
5537                      from a dominated block into BB.  */
5538                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5539                     hoistable++;
5540                 }
5541
5542               /* If we found more than one hoistable occurence of this
5543                  expression, then note it in the bitmap of expressions to
5544                  hoist.  It makes no sense to hoist things which are computed
5545                  in only one BB, and doing so tends to pessimize register
5546                  allocation.  One could increase this value to try harder
5547                  to avoid any possible code expansion due to register
5548                  allocation issues; however experiments have shown that
5549                  the vast majority of hoistable expressions are only movable
5550                  from two successors, so raising this threshhold is likely
5551                  to nullify any benefit we get from code hoisting.  */
5552               if (hoistable > 1)
5553                 {
5554                   SET_BIT (hoist_exprs[bb], i);
5555                   found = 1;
5556                 }
5557             }
5558         }
5559
5560       /* If we found nothing to hoist, then quit now.  */
5561       if (! found)
5562         continue;
5563
5564       /* Loop over all the hoistable expressions.  */
5565       for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5566         {
5567           /* We want to insert the expression into BB only once, so
5568              note when we've inserted it.  */
5569           insn_inserted_p = 0;
5570
5571           /* These tests should be the same as the tests above.  */
5572           if (TEST_BIT (hoist_vbeout[bb], i))
5573             {
5574               /* We've found a potentially hoistable expression, now
5575                  we look at every block BB dominates to see if it
5576                  computes the expression.  */
5577               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5578                 {
5579                   /* Ignore self dominance.  */
5580                   if (bb == dominated
5581                       || ! TEST_BIT (dominators[dominated], bb))
5582                     continue;
5583
5584                   /* We've found a dominated block, now see if it computes
5585                      the busy expression and whether or not moving that
5586                      expression to the "beginning" of that block is safe.  */
5587                   if (!TEST_BIT (antloc[dominated], i))
5588                     continue;
5589
5590                   /* The expression is computed in the dominated block and
5591                      it would be safe to compute it at the start of the
5592                      dominated block.  Now we have to determine if the
5593                      expresion would reach the dominated block if it was
5594                      placed at the end of BB.  */
5595                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5596                     {
5597                       struct expr *expr = index_map[i];
5598                       struct occr *occr = expr->antic_occr;
5599                       rtx insn;
5600                       rtx set;
5601
5602
5603                       /* Find the right occurence of this expression.  */
5604                       while (BLOCK_NUM (occr->insn) != dominated && occr)
5605                         occr = occr->next;
5606
5607                       /* Should never happen.  */
5608                       if (!occr)
5609                         abort ();
5610
5611                       insn = occr->insn;
5612
5613                       set = single_set (insn);
5614                       if (! set)
5615                         abort ();
5616
5617                       /* Create a pseudo-reg to store the result of reaching
5618                          expressions into.  Get the mode for the new pseudo
5619                          from the mode of the original destination pseudo.  */
5620                       if (expr->reaching_reg == NULL)
5621                         expr->reaching_reg
5622                           = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5623
5624                       /* In theory this should never fail since we're creating
5625                          a reg->reg copy.
5626
5627                          However, on the x86 some of the movXX patterns actually
5628                          contain clobbers of scratch regs.  This may cause the
5629                          insn created by validate_change to not match any
5630                          pattern and thus cause validate_change to fail.   */
5631                       if (validate_change (insn, &SET_SRC (set),
5632                                            expr->reaching_reg, 0))
5633                         {
5634                           occr->deleted_p = 1;
5635                           if (!insn_inserted_p)
5636                             {
5637                               insert_insn_end_bb (index_map[i], bb, 0);
5638                               insn_inserted_p = 1;
5639                             }
5640                         }
5641                     }
5642                 }
5643             }
5644         }
5645     }
5646     free (index_map);
5647 }
5648
5649 /* Top level routine to perform one code hoisting (aka unification) pass
5650
5651    Return non-zero if a change was made.  */
5652
5653 static int
5654 one_code_hoisting_pass ()
5655 {
5656   int changed = 0;
5657
5658   alloc_expr_hash_table (max_cuid);
5659   compute_expr_hash_table ();
5660   if (gcse_file)
5661     dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5662                      expr_hash_table_size, n_exprs);
5663   if (n_exprs > 0)
5664     {
5665       alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5666       compute_code_hoist_data ();
5667       hoist_code ();
5668       free_code_hoist_mem ();
5669     }
5670   free_expr_hash_table ();
5671
5672   return changed;
5673 }