gcc/gcse.c

   1 /* Global common subexpression elimination/Partial redundancy elimination
   2    and global constant/copy propagation for GNU compiler.
   3    Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* TODO
  23    - reordering of memory allocation and freeing to be more space efficient
  24    - do rough calc of how many regs are needed in each block, and a rough
  25      calc of how many regs are available in each class and use that to
  26      throttle back the code in cases where RTX_COST is minimal.
  27    - dead store elimination
  28    - a store to the same address as a load does not kill the load if the
  29      source of the store is also the destination of the load.  Handling this
  30      allows more load motion, particularly out of loops.
  31    - ability to realloc sbitmap vectors would allow one initial computation
  32      of reg_set_in_block with only subsequent additions, rather than
  33      recomputing it for each pass
  34
  35 */
  36
  37 /* References searched while implementing this.
  38
  39    Compilers Principles, Techniques and Tools
  40    Aho, Sethi, Ullman
  41    Addison-Wesley, 1988
  42
  43    Global Optimization by Suppression of Partial Redundancies
  44    E. Morel, C. Renvoise
  45    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  46
  47    A Portable Machine-Independent Global Optimizer - Design and Measurements
  48    Frederick Chow
  49    Stanford Ph.D. thesis, Dec. 1983
  50
  51    A Fast Algorithm for Code Movement Optimization
  52    D.M. Dhamdhere
  53    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  54
  55    A Solution to a Problem with Morel and Renvoise's
  56    Global Optimization by Suppression of Partial Redundancies
  57    K-H Drechsler, M.P. Stadel
  58    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  59
  60    Practical Adaptation of the Global Optimization
  61    Algorithm of Morel and Renvoise
  62    D.M. Dhamdhere
  63    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  64
  65    Efficiently Computing Static Single Assignment Form and the Control
  66    Dependence Graph
  67    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  68    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  69
  70    Lazy Code Motion
  71    J. Knoop, O. Ruthing, B. Steffen
  72    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  73
  74    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  75    Time for Reducible Flow Control
  76    Thomas Ball
  77    ACM Letters on Programming Languages and Systems,
  78    Vol. 2, Num. 1-4, Mar-Dec 1993
  79
  80    An Efficient Representation for Sparse Sets
  81    Preston Briggs, Linda Torczon
  82    ACM Letters on Programming Languages and Systems,
  83    Vol. 2, Num. 1-4, Mar-Dec 1993
  84
  85    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  86    K-H Drechsler, M.P. Stadel
  87    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  88
  89    Partial Dead Code Elimination
  90    J. Knoop, O. Ruthing, B. Steffen
  91    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  92
  93    Effective Partial Redundancy Elimination
  94    P. Briggs, K.D. Cooper
  95    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  96
  97    The Program Structure Tree: Computing Control Regions in Linear Time
  98    R. Johnson, D. Pearson, K. Pingali
  99    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
 100
 101    Optimal Code Motion: Theory and Practice
 102    J. Knoop, O. Ruthing, B. Steffen
 103    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
 104
 105    The power of assignment motion
 106    J. Knoop, O. Ruthing, B. Steffen
 107    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 108
 109    Global code motion / global value numbering
 110    C. Click
 111    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 112
 113    Value Driven Redundancy Elimination
 114    L.T. Simpson
 115    Rice University Ph.D. thesis, Apr. 1996
 116
 117    Value Numbering
 118    L.T. Simpson
 119    Massively Scalar Compiler Project, Rice University, Sep. 1996
 120
 121    High Performance Compilers for Parallel Computing
 122    Michael Wolfe
 123    Addison-Wesley, 1996
 124
 125    Advanced Compiler Design and Implementation
 126    Steven Muchnick
 127    Morgan Kaufmann, 1997
 128
 129    Building an Optimizing Compiler
 130    Robert Morgan
 131    Digital Press, 1998
 132
 133    People wishing to speed up the code here should read:
 134      Elimination Algorithms for Data Flow Analysis
 135      B.G. Ryder, M.C. Paull
 136      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 137
 138      How to Analyze Large Programs Efficiently and Informatively
 139      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 140      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 141
 142    People wishing to do something different can find various possibilities
 143    in the above papers and elsewhere.
 144 */
 145
 146 #include "config.h"
 147 #include "system.h"
 148 #include "toplev.h"
 149
 150 #include "rtl.h"
 151 #include "tm_p.h"
 152 #include "regs.h"
 153 #include "hard-reg-set.h"
 154 #include "flags.h"
 155 #include "real.h"
 156 #include "insn-config.h"
 157 #include "recog.h"
 158 #include "basic-block.h"
 159 #include "output.h"
 160 #include "function.h"
 161 #include "expr.h"
 162
 163 #include "obstack.h"
 164 #define obstack_chunk_alloc gmalloc
 165 #define obstack_chunk_free free
 166
 167 /* Maximum number of passes to perform.  */
 168 #define MAX_PASSES 1
 169
 170 /* Propagate flow information through back edges and thus enable PRE's
 171    moving loop invariant calculations out of loops.
 172
 173    Originally this tended to create worse overall code, but several
 174    improvements during the development of PRE seem to have made following
 175    back edges generally a win.
 176
 177    Note much of the loop invariant code motion done here would normally
 178    be done by loop.c, which has more heuristics for when to move invariants
 179    out of loops.  At some point we might need to move some of those
 180    heuristics into gcse.c.  */
 181 #define FOLLOW_BACK_EDGES 1
 182
 183 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 184    are a superset of those done by GCSE.
 185
 186    We perform the following steps:
 187
 188    1) Compute basic block information.
 189
 190    2) Compute table of places where registers are set.
 191
 192    3) Perform copy/constant propagation.
 193
 194    4) Perform global cse.
 195
 196    5) Perform another pass of copy/constant propagation.
 197
 198    Two passes of copy/constant propagation are done because the first one
 199    enables more GCSE and the second one helps to clean up the copies that
 200    GCSE creates.  This is needed more for PRE than for Classic because Classic
 201    GCSE will try to use an existing register containing the common
 202    subexpression rather than create a new one.  This is harder to do for PRE
 203    because of the code motion (which Classic GCSE doesn't do).
 204
 205    Expressions we are interested in GCSE-ing are of the form
 206    (set (pseudo-reg) (expression)).
 207    Function want_to_gcse_p says what these are.
 208
 209    PRE handles moving invariant expressions out of loops (by treating them as
 210    partially redundant).
 211
 212    Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
 213    assignment) based GVN (global value numbering).  L. T. Simpson's paper
 214    (Rice University) on value numbering is a useful reference for this.
 215
 216    **********************
 217
 218    We used to support multiple passes but there are diminishing returns in
 219    doing so.  The first pass usually makes 90% of the changes that are doable.
 220    A second pass can make a few more changes made possible by the first pass.
 221    Experiments show any further passes don't make enough changes to justify
 222    the expense.
 223
 224    A study of spec92 using an unlimited number of passes:
 225    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 226    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 227    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 228
 229    It was found doing copy propagation between each pass enables further
 230    substitutions.
 231
 232    PRE is quite expensive in complicated functions because the DFA can take
 233    awhile to converge.  Hence we only perform one pass.  Macro MAX_PASSES can
 234    be modified if one wants to experiment.
 235
 236    **********************
 237
 238    The steps for PRE are:
 239
 240    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 241
 242    2) Perform the data flow analysis for PRE.
 243
 244    3) Delete the redundant instructions
 245
 246    4) Insert the required copies [if any] that make the partially
 247       redundant instructions fully redundant.
 248
 249    5) For other reaching expressions, insert an instruction to copy the value
 250       to a newly created pseudo that will reach the redundant instruction.
 251
 252    The deletion is done first so that when we do insertions we
 253    know which pseudo reg to use.
 254
 255    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 256    argue it is not.  The number of iterations for the algorithm to converge
 257    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 258
 259    PRE GCSE depends heavily on the second CSE pass to clean up the copies
 260    we create.  To make an expression reach the place where it's redundant,
 261    the result of the expression is copied to a new register, and the redundant
 262    expression is deleted by replacing it with this new register.  Classic GCSE
 263    doesn't have this problem as much as it computes the reaching defs of
 264    each register in each block and thus can try to use an existing register.
 265
 266    **********************
 267
 268    A fair bit of simplicity is created by creating small functions for simple
 269    tasks, even when the function is only called in one place.  This may
 270    measurably slow things down [or may not] by creating more function call
 271    overhead than is necessary.  The source is laid out so that it's trivial
 272    to make the affected functions inline so that one can measure what speed
 273    up, if any, can be achieved, and maybe later when things settle things can
 274    be rearranged.
 275
 276    Help stamp out big monolithic functions!  */
 277 \f
 278 /* GCSE global vars.  */
 279
 280 /* -dG dump file.  */
 281 static FILE *gcse_file;
 282
 283 /* Note whether or not we should run jump optimization after gcse.  We
 284    want to do this for two cases.
 285
 286     * If we changed any jumps via cprop.
 287
 288     * If we added any labels via edge splitting.  */
 289
 290 static int run_jump_opt_after_gcse;
 291
 292 /* Bitmaps are normally not included in debugging dumps.
 293    However it's useful to be able to print them from GDB.
 294    We could create special functions for this, but it's simpler to
 295    just allow passing stderr to the dump_foo fns.  Since stderr can
 296    be a macro, we store a copy here.  */
 297 static FILE *debug_stderr;
 298
 299 /* An obstack for our working variables.  */
 300 static struct obstack gcse_obstack;
 301
 302 /* Non-zero for each mode that supports (set (reg) (reg)).
 303    This is trivially true for integer and floating point values.
 304    It may or may not be true for condition codes.  */
 305 static char can_copy_p[(int) NUM_MACHINE_MODES];
 306
 307 /* Non-zero if can_copy_p has been initialized.  */
 308 static int can_copy_init_p;
 309
 310 struct reg_use {rtx reg_rtx; };
 311
 312 /* Hash table of expressions.  */
 313
 314 struct expr
 315 {
 316   /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
 317   rtx expr;
 318   /* Index in the available expression bitmaps.  */
 319   int bitmap_index;
 320   /* Next entry with the same hash.  */
 321   struct expr *next_same_hash;
 322   /* List of anticipatable occurrences in basic blocks in the function.
 323      An "anticipatable occurrence" is one that is the first occurrence in the
 324      basic block, the operands are not modified in the basic block prior
 325      to the occurrence and the output is not used between the start of
 326      the block and the occurrence.  */
 327   struct occr *antic_occr;
 328   /* List of available occurrence in basic blocks in the function.
 329      An "available occurrence" is one that is the last occurrence in the
 330      basic block and the operands are not modified by following statements in
 331      the basic block [including this insn].  */
 332   struct occr *avail_occr;
 333   /* Non-null if the computation is PRE redundant.
 334      The value is the newly created pseudo-reg to record a copy of the
 335      expression in all the places that reach the redundant copy.  */
 336   rtx reaching_reg;
 337 };
 338
 339 /* Occurrence of an expression.
 340    There is one per basic block.  If a pattern appears more than once the
 341    last appearance is used [or first for anticipatable expressions].  */
 342
 343 struct occr
 344 {
 345   /* Next occurrence of this expression.  */
 346   struct occr *next;
 347   /* The insn that computes the expression.  */
 348   rtx insn;
 349   /* Non-zero if this [anticipatable] occurrence has been deleted.  */
 350   char deleted_p;
 351   /* Non-zero if this [available] occurrence has been copied to
 352      reaching_reg.  */
 353   /* ??? This is mutually exclusive with deleted_p, so they could share
 354      the same byte.  */
 355   char copied_p;
 356 };
 357
 358 /* Expression and copy propagation hash tables.
 359    Each hash table is an array of buckets.
 360    ??? It is known that if it were an array of entries, structure elements
 361    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 362    not clear whether in the final analysis a sufficient amount of memory would
 363    be saved as the size of the available expression bitmaps would be larger
 364    [one could build a mapping table without holes afterwards though].
 365    Someday I'll perform the computation and figure it out.  */
 366
 367 /* Total size of the expression hash table, in elements.  */
 368 static unsigned int expr_hash_table_size;
 369
 370 /* The table itself.
 371    This is an array of `expr_hash_table_size' elements.  */
 372 static struct expr **expr_hash_table;
 373
 374 /* Total size of the copy propagation hash table, in elements.  */
 375 static int set_hash_table_size;
 376
 377 /* The table itself.
 378    This is an array of `set_hash_table_size' elements.  */
 379 static struct expr **set_hash_table;
 380
 381 /* Mapping of uids to cuids.
 382    Only real insns get cuids.  */
 383 static int *uid_cuid;
 384
 385 /* Highest UID in UID_CUID.  */
 386 static int max_uid;
 387
 388 /* Get the cuid of an insn.  */
 389 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
 390
 391 /* Number of cuids.  */
 392 static int max_cuid;
 393
 394 /* Mapping of cuids to insns.  */
 395 static rtx *cuid_insn;
 396
 397 /* Get insn from cuid.  */
 398 #define CUID_INSN(CUID) (cuid_insn[CUID])
 399
 400 /* Maximum register number in function prior to doing gcse + 1.
 401    Registers created during this pass have regno >= max_gcse_regno.
 402    This is named with "gcse" to not collide with global of same name.  */
 403 static unsigned int max_gcse_regno;
 404
 405 /* Maximum number of cse-able expressions found.  */
 406 static int n_exprs;
 407
 408 /* Maximum number of assignments for copy propagation found.  */
 409 static int n_sets;
 410
 411 /* Table of registers that are modified.
 412
 413    For each register, each element is a list of places where the pseudo-reg
 414    is set.
 415
 416    For simplicity, GCSE is done on sets of pseudo-regs only.  PRE GCSE only
 417    requires knowledge of which blocks kill which regs [and thus could use
 418    a bitmap instead of the lists `reg_set_table' uses].
 419
 420    `reg_set_table' and could be turned into an array of bitmaps (num-bbs x
 421    num-regs) [however perhaps it may be useful to keep the data as is].  One
 422    advantage of recording things this way is that `reg_set_table' is fairly
 423    sparse with respect to pseudo regs but for hard regs could be fairly dense
 424    [relatively speaking].  And recording sets of pseudo-regs in lists speeds
 425    up functions like compute_transp since in the case of pseudo-regs we only
 426    need to iterate over the number of times a pseudo-reg is set, not over the
 427    number of basic blocks [clearly there is a bit of a slow down in the cases
 428    where a pseudo is set more than once in a block, however it is believed
 429    that the net effect is to speed things up].  This isn't done for hard-regs
 430    because recording call-clobbered hard-regs in `reg_set_table' at each
 431    function call can consume a fair bit of memory, and iterating over
 432    hard-regs stored this way in compute_transp will be more expensive.  */
 433
 434 typedef struct reg_set
 435 {
 436   /* The next setting of this register.  */
 437   struct reg_set *next;
 438   /* The insn where it was set.  */
 439   rtx insn;
 440 } reg_set;
 441
 442 static reg_set **reg_set_table;
 443
 444 /* Size of `reg_set_table'.
 445    The table starts out at max_gcse_regno + slop, and is enlarged as
 446    necessary.  */
 447 static int reg_set_table_size;
 448
 449 /* Amount to grow `reg_set_table' by when it's full.  */
 450 #define REG_SET_TABLE_SLOP 100
 451
 452 /* Bitmap containing one bit for each register in the program.
 453    Used when performing GCSE to track which registers have been set since
 454    the start of the basic block.  */
 455 static sbitmap reg_set_bitmap;
 456
 457 /* For each block, a bitmap of registers set in the block.
 458    This is used by expr_killed_p and compute_transp.
 459    It is computed during hash table computation and not by compute_sets
 460    as it includes registers added since the last pass (or between cprop and
 461    gcse) and it's currently not easy to realloc sbitmap vectors.  */
 462 static sbitmap *reg_set_in_block;
 463
 464 /* For each block, non-zero if memory is set in that block.
 465    This is computed during hash table computation and is used by
 466    expr_killed_p and compute_transp.
 467    ??? Handling of memory is very simple, we don't make any attempt
 468    to optimize things (later).
 469    ??? This can be computed by compute_sets since the information
 470    doesn't change.  */
 471 static char *mem_set_in_block;
 472
 473 /* Various variables for statistics gathering.  */
 474
 475 /* Memory used in a pass.
 476    This isn't intended to be absolutely precise.  Its intent is only
 477    to keep an eye on memory usage.  */
 478 static int bytes_used;
 479
 480 /* GCSE substitutions made.  */
 481 static int gcse_subst_count;
 482 /* Number of copy instructions created.  */
 483 static int gcse_create_count;
 484 /* Number of constants propagated.  */
 485 static int const_prop_count;
 486 /* Number of copys propagated.  */
 487 static int copy_prop_count;
 488 \f
 489 /* These variables are used by classic GCSE.
 490    Normally they'd be defined a bit later, but `rd_gen' needs to
 491    be declared sooner.  */
 492
 493 /* A bitmap of all ones for implementing the algorithm for available
 494    expressions and reaching definitions.  */
 495 /* ??? Available expression bitmaps have a different size than reaching
 496    definition bitmaps.  This should be the larger of the two, however, it
 497    is not currently used for reaching definitions.  */
 498 static sbitmap u_bitmap;
 499
 500 /* Each block has a bitmap of each type.
 501    The length of each blocks bitmap is:
 502
 503        max_cuid  - for reaching definitions
 504        n_exprs - for available expressions
 505
 506    Thus we view the bitmaps as 2 dimensional arrays.  i.e.
 507    rd_kill[block_num][cuid_num]
 508    ae_kill[block_num][expr_num]                  */
 509
 510 /* For reaching defs */
 511 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
 512
 513 /* for available exprs */
 514 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
 515
 516 /* Objects of this type are passed around by the null-pointer check
 517    removal routines.  */
 518 struct null_pointer_info
 519 {
 520   /* The basic block being processed.  */
 521   int current_block;
 522   /* The first register to be handled in this pass.  */
 523   unsigned int min_reg;
 524   /* One greater than the last register to be handled in this pass.  */
 525   unsigned int max_reg;
 526   sbitmap *nonnull_local;
 527   sbitmap *nonnull_killed;
 528 };
 529 \f
 530 static void compute_can_copy    PARAMS ((void));
 531 static char *gmalloc            PARAMS ((unsigned int));
 532 static char *grealloc           PARAMS ((char *, unsigned int));
 533 static char *gcse_alloc         PARAMS ((unsigned long));
 534 static void alloc_gcse_mem      PARAMS ((rtx));
 535 static void free_gcse_mem       PARAMS ((void));
 536 static void alloc_reg_set_mem   PARAMS ((int));
 537 static void free_reg_set_mem    PARAMS ((void));
 538 static int get_bitmap_width     PARAMS ((int, int, int));
 539 static void record_one_set      PARAMS ((int, rtx));
 540 static void record_set_info     PARAMS ((rtx, rtx, void *));
 541 static void compute_sets        PARAMS ((rtx));
 542 static void hash_scan_insn      PARAMS ((rtx, int, int));
 543 static void hash_scan_set       PARAMS ((rtx, rtx, int));
 544 static void hash_scan_clobber   PARAMS ((rtx, rtx));
 545 static void hash_scan_call      PARAMS ((rtx, rtx));
 546 static int want_to_gcse_p       PARAMS ((rtx));
 547 static int oprs_unchanged_p     PARAMS ((rtx, rtx, int));
 548 static int oprs_anticipatable_p PARAMS ((rtx, rtx));
 549 static int oprs_available_p     PARAMS ((rtx, rtx));
 550 static void insert_expr_in_table PARAMS ((rtx, enum machine_mode, rtx,
 551                                           int, int));
 552 static void insert_set_in_table PARAMS ((rtx, rtx));
 553 static unsigned int hash_expr   PARAMS ((rtx, enum machine_mode, int *, int));
 554 static unsigned int hash_expr_1 PARAMS ((rtx, enum machine_mode, int *));
 555 static unsigned int hash_set    PARAMS ((int, int));
 556 static int expr_equiv_p         PARAMS ((rtx, rtx));
 557 static void record_last_reg_set_info PARAMS ((rtx, int));
 558 static void record_last_mem_set_info PARAMS ((rtx));
 559 static void record_last_set_info PARAMS ((rtx, rtx, void *));
 560 static void compute_hash_table  PARAMS ((int));
 561 static void alloc_set_hash_table PARAMS ((int));
 562 static void free_set_hash_table PARAMS ((void));
 563 static void compute_set_hash_table PARAMS ((void));
 564 static void alloc_expr_hash_table PARAMS ((unsigned int));
 565 static void free_expr_hash_table PARAMS ((void));
 566 static void compute_expr_hash_table PARAMS ((void));
 567 static void dump_hash_table     PARAMS ((FILE *, const char *, struct expr **,
 568                                          int, int));
 569 static struct expr *lookup_expr PARAMS ((rtx));
 570 static struct expr *lookup_set  PARAMS ((unsigned int, rtx));
 571 static struct expr *next_set    PARAMS ((unsigned int, struct expr *));
 572 static void reset_opr_set_tables PARAMS ((void));
 573 static int oprs_not_set_p       PARAMS ((rtx, rtx));
 574 static void mark_call           PARAMS ((rtx));
 575 static void mark_set            PARAMS ((rtx, rtx));
 576 static void mark_clobber        PARAMS ((rtx, rtx));
 577 static void mark_oprs_set       PARAMS ((rtx));
 578 static void alloc_cprop_mem     PARAMS ((int, int));
 579 static void free_cprop_mem      PARAMS ((void));
 580 static void compute_transp      PARAMS ((rtx, int, sbitmap *, int));
 581 static void compute_transpout   PARAMS ((void));
 582 static void compute_local_properties PARAMS ((sbitmap *, sbitmap *, sbitmap *,
 583                                               int));
 584 static void compute_cprop_data  PARAMS ((void));
 585 static void find_used_regs      PARAMS ((rtx));
 586 static int try_replace_reg      PARAMS ((rtx, rtx, rtx));
 587 static struct expr *find_avail_set PARAMS ((int, rtx));
 588 static int cprop_jump           PARAMS ((rtx, rtx, struct reg_use *, rtx));
 589 #ifdef HAVE_cc0
 590 static int cprop_cc0_jump       PARAMS ((rtx, struct reg_use *, rtx));
 591 #endif
 592 static int cprop_insn           PARAMS ((rtx, int));
 593 static int cprop                PARAMS ((int));
 594 static int one_cprop_pass       PARAMS ((int, int));
 595 static void alloc_pre_mem       PARAMS ((int, int));
 596 static void free_pre_mem        PARAMS ((void));
 597 static void compute_pre_data    PARAMS ((void));
 598 static int pre_expr_reaches_here_p PARAMS ((int, struct expr *, int));
 599 static void insert_insn_end_bb  PARAMS ((struct expr *, int, int));
 600 static void pre_insert_copy_insn PARAMS ((struct expr *, rtx));
 601 static void pre_insert_copies   PARAMS ((void));
 602 static int pre_delete           PARAMS ((void));
 603 static int pre_gcse             PARAMS ((void));
 604 static int one_pre_gcse_pass    PARAMS ((int));
 605 static void add_label_notes     PARAMS ((rtx, rtx));
 606 static void alloc_code_hoist_mem PARAMS ((int, int));
 607 static void free_code_hoist_mem PARAMS ((void));
 608 static void compute_code_hoist_vbeinout PARAMS ((void));
 609 static void compute_code_hoist_data PARAMS ((void));
 610 static int hoist_expr_reaches_here_p PARAMS ((int, int, int, char *));
 611 static void hoist_code          PARAMS ((void));
 612 static int one_code_hoisting_pass PARAMS ((void));
 613 static void alloc_rd_mem        PARAMS ((int, int));
 614 static void free_rd_mem         PARAMS ((void));
 615 static void handle_rd_kill_set  PARAMS ((rtx, int, int));
 616 static void compute_kill_rd     PARAMS ((void));
 617 static void compute_rd          PARAMS ((void));
 618 static void alloc_avail_expr_mem PARAMS ((int, int));
 619 static void free_avail_expr_mem PARAMS ((void));
 620 static void compute_ae_gen      PARAMS ((void));
 621 static int expr_killed_p        PARAMS ((rtx, int));
 622 static void compute_ae_kill     PARAMS ((sbitmap *, sbitmap *));
 623 static int expr_reaches_here_p  PARAMS ((struct occr *, struct expr *,
 624                                          int, int));
 625 static rtx computing_insn       PARAMS ((struct expr *, rtx));
 626 static int def_reaches_here_p   PARAMS ((rtx, rtx));
 627 static int can_disregard_other_sets PARAMS ((struct reg_set **, rtx, int));
 628 static int handle_avail_expr    PARAMS ((rtx, struct expr *));
 629 static int classic_gcse         PARAMS ((void));
 630 static int one_classic_gcse_pass PARAMS ((int));
 631 static void invalidate_nonnull_info PARAMS ((rtx, rtx, void *));
 632 static void delete_null_pointer_checks_1 PARAMS ((unsigned int *, sbitmap *,
 633                                                   sbitmap *,
 634                                                   struct null_pointer_info *));
 635 static rtx process_insert_insn  PARAMS ((struct expr *));
 636 static int pre_edge_insert      PARAMS ((struct edge_list *, struct expr **));
 637 static int expr_reaches_here_p_work PARAMS ((struct occr *, struct expr *,
 638                                              int, int, char *));
 639 static int pre_expr_reaches_here_p_work PARAMS ((int, struct expr *,
 640                                                  int, char *));
 641 \f
 642 /* Entry point for global common subexpression elimination.
 643    F is the first instruction in the function.  */
 644
 645 int
 646 gcse_main (f, file)
 647      rtx f;
 648      FILE *file;
 649 {
 650   int changed, pass;
 651   /* Bytes used at start of pass.  */
 652   int initial_bytes_used;
 653   /* Maximum number of bytes used by a pass.  */
 654   int max_pass_bytes;
 655   /* Point to release obstack data from for each pass.  */
 656   char *gcse_obstack_bottom;
 657
 658   /* We do not construct an accurate cfg in functions which call
 659      setjmp, so just punt to be safe.  */
 660   if (current_function_calls_setjmp)
 661     return 0;
 662
 663   /* Assume that we do not need to run jump optimizations after gcse.  */
 664   run_jump_opt_after_gcse = 0;
 665
 666   /* For calling dump_foo fns from gdb.  */
 667   debug_stderr = stderr;
 668   gcse_file = file;
 669
 670   /* Identify the basic block information for this function, including
 671      successors and predecessors.  */
 672   max_gcse_regno = max_reg_num ();
 673
 674   if (file)
 675     dump_flow_info (file);
 676
 677   /* Return if there's nothing to do.  */
 678   if (n_basic_blocks <= 1)
 679     return 0;
 680
 681   /* Trying to perform global optimizations on flow graphs which have
 682      a high connectivity will take a long time and is unlikely to be
 683      particularly useful.
 684
 685      In normal circumstances a cfg should have about twice has many edges
 686      as blocks.  But we do not want to punish small functions which have
 687      a couple switch statements.  So we require a relatively large number
 688      of basic blocks and the ratio of edges to blocks to be high.  */
 689   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
 690     return 0;
 691
 692   /* See what modes support reg/reg copy operations.  */
 693   if (! can_copy_init_p)
 694     {
 695       compute_can_copy ();
 696       can_copy_init_p = 1;
 697     }
 698
 699   gcc_obstack_init (&gcse_obstack);
 700   bytes_used = 0;
 701
 702   /* Record where pseudo-registers are set.  This data is kept accurate
 703      during each pass.  ??? We could also record hard-reg information here
 704      [since it's unchanging], however it is currently done during hash table
 705      computation.
 706
 707      It may be tempting to compute MEM set information here too, but MEM sets
 708      will be subject to code motion one day and thus we need to compute
 709      information about memory sets when we build the hash tables.  */
 710
 711   alloc_reg_set_mem (max_gcse_regno);
 712   compute_sets (f);
 713
 714   pass = 0;
 715   initial_bytes_used = bytes_used;
 716   max_pass_bytes = 0;
 717   gcse_obstack_bottom = gcse_alloc (1);
 718   changed = 1;
 719   while (changed && pass < MAX_PASSES)
 720     {
 721       changed = 0;
 722       if (file)
 723         fprintf (file, "GCSE pass %d\n\n", pass + 1);
 724
 725       /* Initialize bytes_used to the space for the pred/succ lists,
 726          and the reg_set_table data.  */
 727       bytes_used = initial_bytes_used;
 728
 729       /* Each pass may create new registers, so recalculate each time.  */
 730       max_gcse_regno = max_reg_num ();
 731
 732       alloc_gcse_mem (f);
 733
 734       /* Don't allow constant propagation to modify jumps
 735          during this pass.  */
 736       changed = one_cprop_pass (pass + 1, 0);
 737
 738       if (optimize_size)
 739         changed |= one_classic_gcse_pass (pass + 1);
 740       else
 741         {
 742           changed |= one_pre_gcse_pass (pass + 1);
 743           free_reg_set_mem ();
 744           alloc_reg_set_mem (max_reg_num ());
 745           compute_sets (f);
 746           run_jump_opt_after_gcse = 1;
 747         }
 748
 749       if (max_pass_bytes < bytes_used)
 750         max_pass_bytes = bytes_used;
 751
 752       /* Free up memory, then reallocate for code hoisting.  We can
 753          not re-use the existing allocated memory because the tables
 754          will not have info for the insns or registers created by
 755          partial redundancy elimination.  */
 756       free_gcse_mem ();
 757
 758       /* It does not make sense to run code hoisting unless we optimizing
 759          for code size -- it rarely makes programs faster, and can make
 760          them bigger if we did partial redundancy elimination (when optimizing
 761          for space, we use a classic gcse algorithm instead of partial
 762          redundancy algorithms).  */
 763       if (optimize_size)
 764         {
 765           max_gcse_regno = max_reg_num ();
 766           alloc_gcse_mem (f);
 767           changed |= one_code_hoisting_pass ();
 768           free_gcse_mem ();
 769
 770           if (max_pass_bytes < bytes_used)
 771             max_pass_bytes = bytes_used;
 772         }
 773
 774       if (file)
 775         {
 776           fprintf (file, "\n");
 777           fflush (file);
 778         }
 779
 780       obstack_free (&gcse_obstack, gcse_obstack_bottom);
 781       pass++;
 782     }
 783
 784   /* Do one last pass of copy propagation, including cprop into
 785      conditional jumps.  */
 786
 787   max_gcse_regno = max_reg_num ();
 788   alloc_gcse_mem (f);
 789   /* This time, go ahead and allow cprop to alter jumps.  */
 790   one_cprop_pass (pass + 1, 1);
 791   free_gcse_mem ();
 792
 793   if (file)
 794     {
 795       fprintf (file, "GCSE of %s: %d basic blocks, ",
 796                current_function_name, n_basic_blocks);
 797       fprintf (file, "%d pass%s, %d bytes\n\n",
 798                pass, pass > 1 ? "es" : "", max_pass_bytes);
 799     }
 800
 801   obstack_free (&gcse_obstack, NULL_PTR);
 802   free_reg_set_mem ();
 803   return run_jump_opt_after_gcse;
 804 }
 805 \f
 806 /* Misc. utilities.  */
 807
 808 /* Compute which modes support reg/reg copy operations.  */
 809
 810 static void
 811 compute_can_copy ()
 812 {
 813   int i;
 814 #ifndef AVOID_CCMODE_COPIES
 815   rtx reg,insn;
 816 #endif
 817   char *free_point = (char *) oballoc (1);
 818
 819   bzero (can_copy_p, NUM_MACHINE_MODES);
 820
 821   start_sequence ();
 822   for (i = 0; i < NUM_MACHINE_MODES; i++)
 823     if (GET_MODE_CLASS (i) == MODE_CC)
 824       {
 825 #ifdef AVOID_CCMODE_COPIES
 826         can_copy_p[i] = 0;
 827 #else
 828         reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 829         insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 830         if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
 831           can_copy_p[i] = 1;
 832 #endif
 833       }
 834     else
 835       can_copy_p[i] = 1;
 836
 837   end_sequence ();
 838
 839   /* Free the objects we just allocated.  */
 840   obfree (free_point);
 841 }
 842 \f
 843 /* Cover function to xmalloc to record bytes allocated.  */
 844
 845 static char *
 846 gmalloc (size)
 847      unsigned int size;
 848 {
 849   bytes_used += size;
 850   return xmalloc (size);
 851 }
 852
 853 /* Cover function to xrealloc.
 854    We don't record the additional size since we don't know it.
 855    It won't affect memory usage stats much anyway.  */
 856
 857 static char *
 858 grealloc (ptr, size)
 859      char *ptr;
 860      unsigned int size;
 861 {
 862   return xrealloc (ptr, size);
 863 }
 864
 865 /* Cover function to obstack_alloc.
 866    We don't need to record the bytes allocated here since
 867    obstack_chunk_alloc is set to gmalloc.  */
 868
 869 static char *
 870 gcse_alloc (size)
 871      unsigned long size;
 872 {
 873   return (char *) obstack_alloc (&gcse_obstack, size);
 874 }
 875
 876 /* Allocate memory for the cuid mapping array,
 877    and reg/memory set tracking tables.
 878
 879    This is called at the start of each pass.  */
 880
 881 static void
 882 alloc_gcse_mem (f)
 883      rtx f;
 884 {
 885   int i,n;
 886   rtx insn;
 887
 888   /* Find the largest UID and create a mapping from UIDs to CUIDs.
 889      CUIDs are like UIDs except they increase monotonically, have no gaps,
 890      and only apply to real insns.  */
 891
 892   max_uid = get_max_uid ();
 893   n = (max_uid + 1) * sizeof (int);
 894   uid_cuid = (int *) gmalloc (n);
 895   bzero ((char *) uid_cuid, n);
 896   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 897     {
 898       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
 899         INSN_CUID (insn) = i++;
 900       else
 901         INSN_CUID (insn) = i;
 902     }
 903
 904   /* Create a table mapping cuids to insns.  */
 905
 906   max_cuid = i;
 907   n = (max_cuid + 1) * sizeof (rtx);
 908   cuid_insn = (rtx *) gmalloc (n);
 909   bzero ((char *) cuid_insn, n);
 910   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 911     if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
 912       CUID_INSN (i++) = insn;
 913
 914   /* Allocate vars to track sets of regs.  */
 915   reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
 916
 917   /* Allocate vars to track sets of regs, memory per block.  */
 918   reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
 919                                                        max_gcse_regno);
 920   mem_set_in_block = (char *) gmalloc (n_basic_blocks);
 921 }
 922
 923 /* Free memory allocated by alloc_gcse_mem.  */
 924
 925 static void
 926 free_gcse_mem ()
 927 {
 928   free (uid_cuid);
 929   free (cuid_insn);
 930
 931   free (reg_set_bitmap);
 932
 933   free (reg_set_in_block);
 934   free (mem_set_in_block);
 935 }
 936
 937 /* Many of the global optimization algorithms work by solving dataflow
 938    equations for various expressions.  Initially, some local value is
 939    computed for each expression in each block.  Then, the values across the
 940    various blocks are combined (by following flow graph edges) to arrive at
 941    global values.  Conceptually, each set of equations is independent.  We
 942    may therefore solve all the equations in parallel, solve them one at a
 943    time, or pick any intermediate approach.
 944
 945    When you're going to need N two-dimensional bitmaps, each X (say, the
 946    number of blocks) by Y (say, the number of expressions), call this
 947    function.  It's not important what X and Y represent; only that Y
 948    correspond to the things that can be done in parallel.  This function will
 949    return an appropriate chunking factor C; you should solve C sets of
 950    equations in parallel.  By going through this function, we can easily
 951    trade space against time; by solving fewer equations in parallel we use
 952    less space.  */
 953
 954 static int
 955 get_bitmap_width (n, x, y)
 956      int n;
 957      int x;
 958      int y;
 959 {
 960   /* It's not really worth figuring out *exactly* how much memory will
 961      be used by a particular choice.  The important thing is to get
 962      something approximately right.  */
 963   size_t max_bitmap_memory = 10 * 1024 * 1024;
 964
 965   /* The number of bytes we'd use for a single column of minimum
 966      width.  */
 967   size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
 968
 969   /* Often, it's reasonable just to solve all the equations in
 970      parallel.  */
 971   if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
 972     return y;
 973
 974   /* Otherwise, pick the largest width we can, without going over the
 975      limit.  */
 976   return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
 977                              / column_size);
 978 }
 979 \f
 980 /* Compute the local properties of each recorded expression.
 981
 982    Local properties are those that are defined by the block, irrespective of
 983    other blocks.
 984
 985    An expression is transparent in a block if its operands are not modified
 986    in the block.
 987
 988    An expression is computed (locally available) in a block if it is computed
 989    at least once and expression would contain the same value if the
 990    computation was moved to the end of the block.
 991
 992    An expression is locally anticipatable in a block if it is computed at
 993    least once and expression would contain the same value if the computation
 994    was moved to the beginning of the block.
 995
 996    We call this routine for cprop, pre and code hoisting.  They all compute
 997    basically the same information and thus can easily share this code.
 998
 999    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
1000    properties.  If NULL, then it is not necessary to compute or record that
1001    particular property.
1002
1003    SETP controls which hash table to look at.  If zero, this routine looks at
1004    the expr hash table; if nonzero this routine looks at the set hash table.
1005    Additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
1006    ABSALTERED.  */
1007
1008 static void
1009 compute_local_properties (transp, comp, antloc, setp)
1010      sbitmap *transp;
1011      sbitmap *comp;
1012      sbitmap *antloc;
1013      int setp;
1014 {
1015   unsigned int i, hash_table_size;
1016   struct expr **hash_table;
1017
1018   /* Initialize any bitmaps that were passed in.  */
1019   if (transp)
1020     {
1021       if (setp)
1022         sbitmap_vector_zero (transp, n_basic_blocks);
1023       else
1024         sbitmap_vector_ones (transp, n_basic_blocks);
1025     }
1026
1027   if (comp)
1028     sbitmap_vector_zero (comp, n_basic_blocks);
1029   if (antloc)
1030     sbitmap_vector_zero (antloc, n_basic_blocks);
1031
1032   /* We use the same code for cprop, pre and hoisting.  For cprop
1033      we care about the set hash table, for pre and hoisting we
1034      care about the expr hash table.  */
1035   hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1036   hash_table = setp ? set_hash_table : expr_hash_table;
1037
1038   for (i = 0; i < hash_table_size; i++)
1039     {
1040       struct expr *expr;
1041
1042       for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1043         {
1044           int indx = expr->bitmap_index;
1045           struct occr *occr;
1046
1047           /* The expression is transparent in this block if it is not killed.
1048              We start by assuming all are transparent [none are killed], and
1049              then reset the bits for those that are.  */
1050           if (transp)
1051             compute_transp (expr->expr, indx, transp, setp);
1052
1053           /* The occurrences recorded in antic_occr are exactly those that
1054              we want to set to non-zero in ANTLOC.  */
1055           if (antloc)
1056             for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1057               {
1058                 SET_BIT (antloc[BLOCK_NUM (occr->insn)], indx);
1059
1060                 /* While we're scanning the table, this is a good place to
1061                    initialize this.  */
1062                 occr->deleted_p = 0;
1063               }
1064
1065           /* The occurrences recorded in avail_occr are exactly those that
1066              we want to set to non-zero in COMP.  */
1067           if (comp)
1068             for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1069               {
1070                 SET_BIT (comp[BLOCK_NUM (occr->insn)], indx);
1071
1072                 /* While we're scanning the table, this is a good place to
1073                    initialize this.  */
1074                 occr->copied_p = 0;
1075               }
1076
1077           /* While we're scanning the table, this is a good place to
1078              initialize this.  */
1079           expr->reaching_reg = 0;
1080         }
1081     }
1082 }
1083 \f
1084 /* Register set information.
1085
1086    `reg_set_table' records where each register is set or otherwise
1087    modified.  */
1088
1089 static struct obstack reg_set_obstack;
1090
1091 static void
1092 alloc_reg_set_mem (n_regs)
1093      int n_regs;
1094 {
1095   unsigned int n;
1096
1097   reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1098   n = reg_set_table_size * sizeof (struct reg_set *);
1099   reg_set_table = (struct reg_set **) gmalloc (n);
1100   bzero ((char *) reg_set_table, n);
1101
1102   gcc_obstack_init (&reg_set_obstack);
1103 }
1104
1105 static void
1106 free_reg_set_mem ()
1107 {
1108   free (reg_set_table);
1109   obstack_free (&reg_set_obstack, NULL_PTR);
1110 }
1111
1112 /* Record REGNO in the reg_set table.  */
1113
1114 static void
1115 record_one_set (regno, insn)
1116      int regno;
1117      rtx insn;
1118 {
1119   /* allocate a new reg_set element and link it onto the list */
1120   struct reg_set *new_reg_info, *reg_info_ptr1, *reg_info_ptr2;
1121
1122   /* If the table isn't big enough, enlarge it.  */
1123   if (regno >= reg_set_table_size)
1124     {
1125       int new_size = regno + REG_SET_TABLE_SLOP;
1126
1127       reg_set_table
1128         = (struct reg_set **) grealloc ((char *) reg_set_table,
1129                                         new_size * sizeof (struct reg_set *));
1130       bzero ((char *) (reg_set_table + reg_set_table_size),
1131              (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1132       reg_set_table_size = new_size;
1133     }
1134
1135   new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1136                                                    sizeof (struct reg_set));
1137   bytes_used += sizeof (struct reg_set);
1138   new_reg_info->insn = insn;
1139   new_reg_info->next = NULL;
1140   if (reg_set_table[regno] == NULL)
1141     reg_set_table[regno] = new_reg_info;
1142   else
1143     {
1144       reg_info_ptr1 = reg_info_ptr2 = reg_set_table[regno];
1145       /* ??? One could keep a "last" pointer to speed this up.  */
1146       while (reg_info_ptr1 != NULL)
1147         {
1148           reg_info_ptr2 = reg_info_ptr1;
1149           reg_info_ptr1 = reg_info_ptr1->next;
1150         }
1151
1152       reg_info_ptr2->next = new_reg_info;
1153     }
1154 }
1155
1156 /* Called from compute_sets via note_stores to handle one SET or CLOBBER in
1157    an insn.  The DATA is really the instruction in which the SET is
1158    occurring.  */
1159
1160 static void
1161 record_set_info (dest, setter, data)
1162      rtx dest, setter ATTRIBUTE_UNUSED;
1163      void *data;
1164 {
1165   rtx record_set_insn = (rtx) data;
1166
1167   if (GET_CODE (dest) == REG && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1168     record_one_set (REGNO (dest), record_set_insn);
1169 }
1170
1171 /* Scan the function and record each set of each pseudo-register.
1172
1173    This is called once, at the start of the gcse pass.  See the comments for
1174    `reg_set_table' for further documenation.  */
1175
1176 static void
1177 compute_sets (f)
1178      rtx f;
1179 {
1180   rtx insn;
1181
1182   for (insn = f; insn != 0; insn = NEXT_INSN (insn))
1183     if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
1184       note_stores (PATTERN (insn), record_set_info, insn);
1185 }
1186 \f
1187 /* Hash table support.  */
1188
1189 /* For each register, the cuid of the first/last insn in the block to set it,
1190    or -1 if not set.  */
1191 #define NEVER_SET -1
1192 static int *reg_first_set;
1193 static int *reg_last_set;
1194
1195 /* While computing "first/last set" info, this is the CUID of first/last insn
1196    to set memory or -1 if not set.  `mem_last_set' is also used when
1197    performing GCSE to record whether memory has been set since the beginning
1198    of the block.
1199
1200    Note that handling of memory is very simple, we don't make any attempt
1201    to optimize things (later).  */
1202 static int mem_first_set;
1203 static int mem_last_set;
1204
1205 /* Perform a quick check whether X, the source of a set, is something
1206    we want to consider for GCSE.  */
1207
1208 static int
1209 want_to_gcse_p (x)
1210      rtx x;
1211 {
1212   switch (GET_CODE (x))
1213     {
1214     case REG:
1215     case SUBREG:
1216     case CONST_INT:
1217     case CONST_DOUBLE:
1218     case CALL:
1219       return 0;
1220
1221     default:
1222       break;
1223     }
1224
1225   return 1;
1226 }
1227
1228 /* Return non-zero if the operands of expression X are unchanged from the
1229    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1230    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
1231
1232 static int
1233 oprs_unchanged_p (x, insn, avail_p)
1234      rtx x, insn;
1235      int avail_p;
1236 {
1237   int i, j;
1238   enum rtx_code code;
1239   const char *fmt;
1240
1241   if (x == 0)
1242     return 1;
1243
1244   code = GET_CODE (x);
1245   switch (code)
1246     {
1247     case REG:
1248       if (avail_p)
1249         return (reg_last_set[REGNO (x)] == NEVER_SET
1250                 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1251       else
1252         return (reg_first_set[REGNO (x)] == NEVER_SET
1253                 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1254
1255     case MEM:
1256       if (avail_p && mem_last_set != NEVER_SET
1257           && mem_last_set >= INSN_CUID (insn))
1258         return 0;
1259       else if (! avail_p && mem_first_set != NEVER_SET
1260                && mem_first_set < INSN_CUID (insn))
1261         return 0;
1262       else
1263         return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
1264
1265     case PRE_DEC:
1266     case PRE_INC:
1267     case POST_DEC:
1268     case POST_INC:
1269       return 0;
1270
1271     case PC:
1272     case CC0: /*FIXME*/
1273     case CONST:
1274     case CONST_INT:
1275     case CONST_DOUBLE:
1276     case SYMBOL_REF:
1277     case LABEL_REF:
1278     case ADDR_VEC:
1279     case ADDR_DIFF_VEC:
1280       return 1;
1281
1282     default:
1283       break;
1284     }
1285
1286   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1287     {
1288       if (fmt[i] == 'e')
1289         {
1290           /* If we are about to do the last recursive call needed at this
1291              level, change it into iteration.  This function is called enough
1292              to be worth it.  */
1293           if (i == 0)
1294             return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
1295
1296           else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
1297             return 0;
1298         }
1299       else if (fmt[i] == 'E')
1300         for (j = 0; j < XVECLEN (x, i); j++)
1301           if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1302             return 0;
1303     }
1304
1305   return 1;
1306 }
1307
1308 /* Return non-zero if the operands of expression X are unchanged from
1309    the start of INSN's basic block up to but not including INSN.  */
1310
1311 static int
1312 oprs_anticipatable_p (x, insn)
1313      rtx x, insn;
1314 {
1315   return oprs_unchanged_p (x, insn, 0);
1316 }
1317
1318 /* Return non-zero if the operands of expression X are unchanged from
1319    INSN to the end of INSN's basic block.  */
1320
1321 static int
1322 oprs_available_p (x, insn)
1323      rtx x, insn;
1324 {
1325   return oprs_unchanged_p (x, insn, 1);
1326 }
1327
1328 /* Hash expression X.
1329
1330    MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
1331    indicating if a volatile operand is found or if the expression contains
1332    something we don't want to insert in the table.
1333
1334    ??? One might want to merge this with canon_hash.  Later.  */
1335
1336 static unsigned int
1337 hash_expr (x, mode, do_not_record_p, hash_table_size)
1338      rtx x;
1339      enum machine_mode mode;
1340      int *do_not_record_p;
1341      int hash_table_size;
1342 {
1343   unsigned int hash;
1344
1345   *do_not_record_p = 0;
1346
1347   hash = hash_expr_1 (x, mode, do_not_record_p);
1348   return hash % hash_table_size;
1349 }
1350
1351 /* Subroutine of hash_expr to do the actual work.  */
1352
1353 static unsigned int
1354 hash_expr_1 (x, mode, do_not_record_p)
1355      rtx x;
1356      enum machine_mode mode;
1357      int *do_not_record_p;
1358 {
1359   int i, j;
1360   unsigned hash = 0;
1361   enum rtx_code code;
1362   const char *fmt;
1363
1364   /* Used to turn recursion into iteration.  We can't rely on GCC's
1365      tail-recursion eliminatio since we need to keep accumulating values
1366      in HASH.  */
1367
1368   if (x == 0)
1369     return hash;
1370
1371  repeat:
1372   code = GET_CODE (x);
1373   switch (code)
1374     {
1375     case REG:
1376       hash += ((unsigned int) REG << 7) + REGNO (x);
1377       return hash;
1378
1379     case CONST_INT:
1380       hash += (((unsigned int) CONST_INT << 7) + (unsigned int) mode
1381                + (unsigned int) INTVAL (x));
1382       return hash;
1383
1384     case CONST_DOUBLE:
1385       /* This is like the general case, except that it only counts
1386          the integers representing the constant.  */
1387       hash += (unsigned int) code + (unsigned int) GET_MODE (x);
1388       if (GET_MODE (x) != VOIDmode)
1389         for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1390           hash += (unsigned int) XWINT (x, i);
1391       else
1392         hash += ((unsigned int) CONST_DOUBLE_LOW (x)
1393                  + (unsigned int) CONST_DOUBLE_HIGH (x));
1394       return hash;
1395
1396       /* Assume there is only one rtx object for any given label.  */
1397     case LABEL_REF:
1398       /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1399          differences and differences between each stage's debugging dumps.  */
1400       hash += (((unsigned int) LABEL_REF << 7)
1401                + CODE_LABEL_NUMBER (XEXP (x, 0)));
1402       return hash;
1403
1404     case SYMBOL_REF:
1405       {
1406         /* Don't hash on the symbol's address to avoid bootstrap differences.
1407            Different hash values may cause expressions to be recorded in
1408            different orders and thus different registers to be used in the
1409            final assembler.  This also avoids differences in the dump files
1410            between various stages.  */
1411         unsigned int h = 0;
1412         const unsigned char *p = (const unsigned char *) XSTR (x, 0);
1413
1414         while (*p)
1415           h += (h << 7) + *p++; /* ??? revisit */
1416
1417         hash += ((unsigned int) SYMBOL_REF << 7) + h;
1418         return hash;
1419       }
1420
1421     case MEM:
1422       if (MEM_VOLATILE_P (x))
1423         {
1424           *do_not_record_p = 1;
1425           return 0;
1426         }
1427
1428       hash += (unsigned int) MEM;
1429       hash += MEM_ALIAS_SET (x);
1430       x = XEXP (x, 0);
1431       goto repeat;
1432
1433     case PRE_DEC:
1434     case PRE_INC:
1435     case POST_DEC:
1436     case POST_INC:
1437     case PC:
1438     case CC0:
1439     case CALL:
1440     case UNSPEC_VOLATILE:
1441       *do_not_record_p = 1;
1442       return 0;
1443
1444     case ASM_OPERANDS:
1445       if (MEM_VOLATILE_P (x))
1446         {
1447           *do_not_record_p = 1;
1448           return 0;
1449         }
1450
1451     default:
1452       break;
1453     }
1454
1455   hash += (unsigned) code + (unsigned) GET_MODE (x);
1456   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1457     {
1458       if (fmt[i] == 'e')
1459         {
1460           /* If we are about to do the last recursive call
1461              needed at this level, change it into iteration.
1462              This function is called enough to be worth it.  */
1463           if (i == 0)
1464             {
1465               x = XEXP (x, i);
1466               goto repeat;
1467             }
1468
1469           hash += hash_expr_1 (XEXP (x, i), 0, do_not_record_p);
1470           if (*do_not_record_p)
1471             return 0;
1472         }
1473
1474       else if (fmt[i] == 'E')
1475         for (j = 0; j < XVECLEN (x, i); j++)
1476           {
1477             hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1478             if (*do_not_record_p)
1479               return 0;
1480           }
1481
1482       else if (fmt[i] == 's')
1483         {
1484           register const unsigned char *p =
1485             (const unsigned char *) XSTR (x, i);
1486
1487           if (p)
1488             while (*p)
1489               hash += *p++;
1490         }
1491       else if (fmt[i] == 'i')
1492         hash += (unsigned int) XINT (x, i);
1493       else
1494         abort ();
1495     }
1496
1497   return hash;
1498 }
1499
1500 /* Hash a set of register REGNO.
1501
1502    Sets are hashed on the register that is set.  This simplifies the PRE copy
1503    propagation code.
1504
1505    ??? May need to make things more elaborate.  Later, as necessary.  */
1506
1507 static unsigned int
1508 hash_set (regno, hash_table_size)
1509      int regno;
1510      int hash_table_size;
1511 {
1512   unsigned int hash;
1513
1514   hash = regno;
1515   return hash % hash_table_size;
1516 }
1517
1518 /* Return non-zero if exp1 is equivalent to exp2.
1519    ??? Borrowed from cse.c.  Might want to remerge with cse.c.  Later.  */
1520
1521 static int
1522 expr_equiv_p (x, y)
1523      rtx x, y;
1524 {
1525   register int i, j;
1526   register enum rtx_code code;
1527   register const char *fmt;
1528
1529   if (x == y)
1530     return 1;
1531
1532   if (x == 0 || y == 0)
1533     return x == y;
1534
1535   code = GET_CODE (x);
1536   if (code != GET_CODE (y))
1537     return 0;
1538
1539   /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.  */
1540   if (GET_MODE (x) != GET_MODE (y))
1541     return 0;
1542
1543   switch (code)
1544     {
1545     case PC:
1546     case CC0:
1547       return x == y;
1548
1549     case CONST_INT:
1550       return INTVAL (x) == INTVAL (y);
1551
1552     case LABEL_REF:
1553       return XEXP (x, 0) == XEXP (y, 0);
1554
1555     case SYMBOL_REF:
1556       return XSTR (x, 0) == XSTR (y, 0);
1557
1558     case REG:
1559       return REGNO (x) == REGNO (y);
1560
1561     case MEM:
1562       /* Can't merge two expressions in different alias sets, since we can
1563          decide that the expression is transparent in a block when it isn't,
1564          due to it being set with the different alias set.  */
1565       if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1566         return 0;
1567       break;
1568
1569     /*  For commutative operations, check both orders.  */
1570     case PLUS:
1571     case MULT:
1572     case AND:
1573     case IOR:
1574     case XOR:
1575     case NE:
1576     case EQ:
1577       return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1578                && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1579               || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1580                   && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1581
1582     default:
1583       break;
1584     }
1585
1586   /* Compare the elements.  If any pair of corresponding elements
1587      fail to match, return 0 for the whole thing.  */
1588
1589   fmt = GET_RTX_FORMAT (code);
1590   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1591     {
1592       switch (fmt[i])
1593         {
1594         case 'e':
1595           if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1596             return 0;
1597           break;
1598
1599         case 'E':
1600           if (XVECLEN (x, i) != XVECLEN (y, i))
1601             return 0;
1602           for (j = 0; j < XVECLEN (x, i); j++)
1603             if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1604               return 0;
1605           break;
1606
1607         case 's':
1608           if (strcmp (XSTR (x, i), XSTR (y, i)))
1609             return 0;
1610           break;
1611
1612         case 'i':
1613           if (XINT (x, i) != XINT (y, i))
1614             return 0;
1615           break;
1616
1617         case 'w':
1618           if (XWINT (x, i) != XWINT (y, i))
1619             return 0;
1620         break;
1621
1622         case '0':
1623           break;
1624
1625         default:
1626           abort ();
1627         }
1628       }
1629
1630   return 1;
1631 }
1632
1633 /* Insert expression X in INSN in the hash table.
1634    If it is already present, record it as the last occurrence in INSN's
1635    basic block.
1636
1637    MODE is the mode of the value X is being stored into.
1638    It is only used if X is a CONST_INT.
1639
1640    ANTIC_P is non-zero if X is an anticipatable expression.
1641    AVAIL_P is non-zero if X is an available expression.  */
1642
1643 static void
1644 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1645      rtx x;
1646      enum machine_mode mode;
1647      rtx insn;
1648      int antic_p, avail_p;
1649 {
1650   int found, do_not_record_p;
1651   unsigned int hash;
1652   struct expr *cur_expr, *last_expr = NULL;
1653   struct occr *antic_occr, *avail_occr;
1654   struct occr *last_occr = NULL;
1655
1656   hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1657
1658   /* Do not insert expression in table if it contains volatile operands,
1659      or if hash_expr determines the expression is something we don't want
1660      to or can't handle.  */
1661   if (do_not_record_p)
1662     return;
1663
1664   cur_expr = expr_hash_table[hash];
1665   found = 0;
1666
1667   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1668     {
1669       /* If the expression isn't found, save a pointer to the end of
1670          the list.  */
1671       last_expr = cur_expr;
1672       cur_expr = cur_expr->next_same_hash;
1673     }
1674
1675   if (! found)
1676     {
1677       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1678       bytes_used += sizeof (struct expr);
1679       if (expr_hash_table[hash] == NULL)
1680         /* This is the first pattern that hashed to this index.  */
1681         expr_hash_table[hash] = cur_expr;
1682       else
1683         /* Add EXPR to end of this hash chain.  */
1684         last_expr->next_same_hash = cur_expr;
1685
1686       /* Set the fields of the expr element.  */
1687       cur_expr->expr = x;
1688       cur_expr->bitmap_index = n_exprs++;
1689       cur_expr->next_same_hash = NULL;
1690       cur_expr->antic_occr = NULL;
1691       cur_expr->avail_occr = NULL;
1692     }
1693
1694   /* Now record the occurrence(s).  */
1695   if (antic_p)
1696     {
1697       antic_occr = cur_expr->antic_occr;
1698
1699       /* Search for another occurrence in the same basic block.  */
1700       while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1701         {
1702           /* If an occurrence isn't found, save a pointer to the end of
1703              the list.  */
1704           last_occr = antic_occr;
1705           antic_occr = antic_occr->next;
1706         }
1707
1708       if (antic_occr)
1709         /* Found another instance of the expression in the same basic block.
1710            Prefer the currently recorded one.  We want the first one in the
1711            block and the block is scanned from start to end.  */
1712         ; /* nothing to do */
1713       else
1714         {
1715           /* First occurrence of this expression in this basic block.  */
1716           antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1717           bytes_used += sizeof (struct occr);
1718           /* First occurrence of this expression in any block?  */
1719           if (cur_expr->antic_occr == NULL)
1720             cur_expr->antic_occr = antic_occr;
1721           else
1722             last_occr->next = antic_occr;
1723
1724           antic_occr->insn = insn;
1725           antic_occr->next = NULL;
1726         }
1727     }
1728
1729   if (avail_p)
1730     {
1731       avail_occr = cur_expr->avail_occr;
1732
1733       /* Search for another occurrence in the same basic block.  */
1734       while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1735         {
1736           /* If an occurrence isn't found, save a pointer to the end of
1737              the list.  */
1738           last_occr = avail_occr;
1739           avail_occr = avail_occr->next;
1740         }
1741
1742       if (avail_occr)
1743         /* Found another instance of the expression in the same basic block.
1744            Prefer this occurrence to the currently recorded one.  We want
1745            the last one in the block and the block is scanned from start
1746            to end.  */
1747         avail_occr->insn = insn;
1748       else
1749         {
1750           /* First occurrence of this expression in this basic block.  */
1751           avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1752           bytes_used += sizeof (struct occr);
1753
1754           /* First occurrence of this expression in any block?  */
1755           if (cur_expr->avail_occr == NULL)
1756             cur_expr->avail_occr = avail_occr;
1757           else
1758             last_occr->next = avail_occr;
1759
1760           avail_occr->insn = insn;
1761           avail_occr->next = NULL;
1762         }
1763     }
1764 }
1765
1766 /* Insert pattern X in INSN in the hash table.
1767    X is a SET of a reg to either another reg or a constant.
1768    If it is already present, record it as the last occurrence in INSN's
1769    basic block.  */
1770
1771 static void
1772 insert_set_in_table (x, insn)
1773      rtx x;
1774      rtx insn;
1775 {
1776   int found;
1777   unsigned int hash;
1778   struct expr *cur_expr, *last_expr = NULL;
1779   struct occr *cur_occr, *last_occr = NULL;
1780
1781   if (GET_CODE (x) != SET
1782       || GET_CODE (SET_DEST (x)) != REG)
1783     abort ();
1784
1785   hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1786
1787   cur_expr = set_hash_table[hash];
1788   found = 0;
1789
1790   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1791     {
1792       /* If the expression isn't found, save a pointer to the end of
1793          the list.  */
1794       last_expr = cur_expr;
1795       cur_expr = cur_expr->next_same_hash;
1796     }
1797
1798   if (! found)
1799     {
1800       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1801       bytes_used += sizeof (struct expr);
1802       if (set_hash_table[hash] == NULL)
1803         /* This is the first pattern that hashed to this index.  */
1804         set_hash_table[hash] = cur_expr;
1805       else
1806         /* Add EXPR to end of this hash chain.  */
1807         last_expr->next_same_hash = cur_expr;
1808
1809       /* Set the fields of the expr element.
1810          We must copy X because it can be modified when copy propagation is
1811          performed on its operands.  */
1812       /* ??? Should this go in a different obstack?  */
1813       cur_expr->expr = copy_rtx (x);
1814       cur_expr->bitmap_index = n_sets++;
1815       cur_expr->next_same_hash = NULL;
1816       cur_expr->antic_occr = NULL;
1817       cur_expr->avail_occr = NULL;
1818     }
1819
1820   /* Now record the occurrence.  */
1821   cur_occr = cur_expr->avail_occr;
1822
1823   /* Search for another occurrence in the same basic block.  */
1824   while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1825     {
1826       /* If an occurrence isn't found, save a pointer to the end of
1827          the list.  */
1828       last_occr = cur_occr;
1829       cur_occr = cur_occr->next;
1830     }
1831
1832   if (cur_occr)
1833     /* Found another instance of the expression in the same basic block.
1834        Prefer this occurrence to the currently recorded one.  We want the
1835        last one in the block and the block is scanned from start to end.  */
1836     cur_occr->insn = insn;
1837   else
1838     {
1839       /* First occurrence of this expression in this basic block.  */
1840       cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1841       bytes_used += sizeof (struct occr);
1842
1843       /* First occurrence of this expression in any block?  */
1844       if (cur_expr->avail_occr == NULL)
1845         cur_expr->avail_occr = cur_occr;
1846       else
1847         last_occr->next = cur_occr;
1848
1849       cur_occr->insn = insn;
1850       cur_occr->next = NULL;
1851     }
1852 }
1853
1854 /* Scan pattern PAT of INSN and add an entry to the hash table.  If SET_P is
1855    non-zero, this is for the assignment hash table, otherwise it is for the
1856    expression hash table.  */
1857
1858 static void
1859 hash_scan_set (pat, insn, set_p)
1860      rtx pat, insn;
1861      int set_p;
1862 {
1863   rtx src = SET_SRC (pat);
1864   rtx dest = SET_DEST (pat);
1865
1866   if (GET_CODE (src) == CALL)
1867     hash_scan_call (src, insn);
1868
1869   if (GET_CODE (dest) == REG)
1870     {
1871       int regno = REGNO (dest);
1872       rtx tmp;
1873
1874       /* Only record sets of pseudo-regs in the hash table.  */
1875       if (! set_p
1876           && regno >= FIRST_PSEUDO_REGISTER
1877           /* Don't GCSE something if we can't do a reg/reg copy.  */
1878           && can_copy_p [GET_MODE (dest)]
1879           /* Is SET_SRC something we want to gcse?  */
1880           && want_to_gcse_p (src))
1881         {
1882           /* An expression is not anticipatable if its operands are
1883              modified before this insn.  */
1884           int antic_p = oprs_anticipatable_p (src, insn);
1885           /* An expression is not available if its operands are
1886              subsequently modified, including this insn.  */
1887           int avail_p = oprs_available_p (src, insn);
1888
1889           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1890         }
1891
1892       /* Record sets for constant/copy propagation.  */
1893       else if (set_p
1894                && regno >= FIRST_PSEUDO_REGISTER
1895                && ((GET_CODE (src) == REG
1896                     && REGNO (src) >= FIRST_PSEUDO_REGISTER
1897                     && can_copy_p [GET_MODE (dest)])
1898                    || GET_CODE (src) == CONST_INT
1899                    || GET_CODE (src) == SYMBOL_REF
1900                    || GET_CODE (src) == CONST_DOUBLE)
1901                /* A copy is not available if its src or dest is subsequently
1902                   modified.  Here we want to search from INSN+1 on, but
1903                   oprs_available_p searches from INSN on.  */
1904                && (insn == BLOCK_END (BLOCK_NUM (insn))
1905                    || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1906                        && oprs_available_p (pat, tmp))))
1907         insert_set_in_table (pat, insn);
1908     }
1909 }
1910
1911 static void
1912 hash_scan_clobber (x, insn)
1913      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1914 {
1915   /* Currently nothing to do.  */
1916 }
1917
1918 static void
1919 hash_scan_call (x, insn)
1920      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1921 {
1922   /* Currently nothing to do.  */
1923 }
1924
1925 /* Process INSN and add hash table entries as appropriate.
1926
1927    Only available expressions that set a single pseudo-reg are recorded.
1928
1929    Single sets in a PARALLEL could be handled, but it's an extra complication
1930    that isn't dealt with right now.  The trick is handling the CLOBBERs that
1931    are also in the PARALLEL.  Later.
1932
1933    If SET_P is non-zero, this is for the assignment hash table,
1934    otherwise it is for the expression hash table.
1935    If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
1936    not record any expressions.  */
1937
1938 static void
1939 hash_scan_insn (insn, set_p, in_libcall_block)
1940      rtx insn;
1941      int set_p;
1942      int in_libcall_block;
1943 {
1944   rtx pat = PATTERN (insn);
1945   int i;
1946
1947   /* Pick out the sets of INSN and for other forms of instructions record
1948      what's been modified.  */
1949
1950   if (GET_CODE (pat) == SET && ! in_libcall_block)
1951     {
1952       /* Ignore obvious no-ops.  */
1953       if (SET_SRC (pat) != SET_DEST (pat))
1954         hash_scan_set (pat, insn, set_p);
1955     }
1956   else if (GET_CODE (pat) == PARALLEL)
1957     for (i = 0; i < XVECLEN (pat, 0); i++)
1958       {
1959         rtx x = XVECEXP (pat, 0, i);
1960
1961         if (GET_CODE (x) == SET)
1962           {
1963             if (GET_CODE (SET_SRC (x)) == CALL)
1964               hash_scan_call (SET_SRC (x), insn);
1965           }
1966         else if (GET_CODE (x) == CLOBBER)
1967           hash_scan_clobber (x, insn);
1968         else if (GET_CODE (x) == CALL)
1969           hash_scan_call (x, insn);
1970       }
1971
1972   else if (GET_CODE (pat) == CLOBBER)
1973     hash_scan_clobber (pat, insn);
1974   else if (GET_CODE (pat) == CALL)
1975     hash_scan_call (pat, insn);
1976 }
1977
1978 static void
1979 dump_hash_table (file, name, table, table_size, total_size)
1980      FILE *file;
1981      const char *name;
1982      struct expr **table;
1983      int table_size, total_size;
1984 {
1985   int i;
1986   /* Flattened out table, so it's printed in proper order.  */
1987   struct expr **flat_table;
1988   unsigned int *hash_val;
1989   struct expr *expr;
1990
1991   flat_table
1992     = (struct expr **) xcalloc (total_size, sizeof (struct expr *));
1993   hash_val = (unsigned int *) xmalloc (total_size * sizeof (unsigned int));
1994
1995   for (i = 0; i < table_size; i++)
1996     for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
1997       {
1998         flat_table[expr->bitmap_index] = expr;
1999         hash_val[expr->bitmap_index] = i;
2000       }
2001
2002   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2003            name, table_size, total_size);
2004
2005   for (i = 0; i < total_size; i++)
2006     if (flat_table[i] != 0)
2007       {
2008         expr = flat_table[i];
2009         fprintf (file, "Index %d (hash value %d)\n  ",
2010                  expr->bitmap_index, hash_val[i]);
2011         print_rtl (file, expr->expr);
2012         fprintf (file, "\n");
2013       }
2014
2015   fprintf (file, "\n");
2016
2017   free (flat_table);
2018   free (hash_val);
2019 }
2020
2021 /* Record register first/last/block set information for REGNO in INSN.
2022
2023    reg_first_set records the first place in the block where the register
2024    is set and is used to compute "anticipatability".
2025
2026    reg_last_set records the last place in the block where the register
2027    is set and is used to compute "availability".
2028
2029    reg_set_in_block records whether the register is set in the block
2030    and is used to compute "transparency".  */
2031
2032 static void
2033 record_last_reg_set_info (insn, regno)
2034      rtx insn;
2035      int regno;
2036 {
2037   if (reg_first_set[regno] == NEVER_SET)
2038     reg_first_set[regno] = INSN_CUID (insn);
2039
2040   reg_last_set[regno] = INSN_CUID (insn);
2041   SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2042 }
2043
2044 /* Record memory first/last/block set information for INSN.  */
2045
2046 static void
2047 record_last_mem_set_info (insn)
2048      rtx insn;
2049 {
2050   if (mem_first_set == NEVER_SET)
2051     mem_first_set = INSN_CUID (insn);
2052
2053   mem_last_set = INSN_CUID (insn);
2054   mem_set_in_block[BLOCK_NUM (insn)] = 1;
2055 }
2056
2057 /* Called from compute_hash_table via note_stores to handle one
2058    SET or CLOBBER in an insn.  DATA is really the instruction in which
2059    the SET is taking place.  */
2060
2061 static void
2062 record_last_set_info (dest, setter, data)
2063      rtx dest, setter ATTRIBUTE_UNUSED;
2064      void *data;
2065 {
2066   rtx last_set_insn = (rtx) data;
2067
2068   if (GET_CODE (dest) == SUBREG)
2069     dest = SUBREG_REG (dest);
2070
2071   if (GET_CODE (dest) == REG)
2072     record_last_reg_set_info (last_set_insn, REGNO (dest));
2073   else if (GET_CODE (dest) == MEM
2074            /* Ignore pushes, they clobber nothing.  */
2075            && ! push_operand (dest, GET_MODE (dest)))
2076     record_last_mem_set_info (last_set_insn);
2077 }
2078
2079 /* Top level function to create an expression or assignment hash table.
2080
2081    Expression entries are placed in the hash table if
2082    - they are of the form (set (pseudo-reg) src),
2083    - src is something we want to perform GCSE on,
2084    - none of the operands are subsequently modified in the block
2085
2086    Assignment entries are placed in the hash table if
2087    - they are of the form (set (pseudo-reg) src),
2088    - src is something we want to perform const/copy propagation on,
2089    - none of the operands or target are subsequently modified in the block
2090
2091    Currently src must be a pseudo-reg or a const_int.
2092
2093    F is the first insn.
2094    SET_P is non-zero for computing the assignment hash table.  */
2095
2096 static void
2097 compute_hash_table (set_p)
2098      int set_p;
2099 {
2100   int bb;
2101
2102   /* While we compute the hash table we also compute a bit array of which
2103      registers are set in which blocks.
2104      We also compute which blocks set memory, in the absence of aliasing
2105      support [which is TODO].
2106      ??? This isn't needed during const/copy propagation, but it's cheap to
2107      compute.  Later.  */
2108   sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2109   bzero ((char *) mem_set_in_block, n_basic_blocks);
2110
2111   /* Some working arrays used to track first and last set in each block.  */
2112   /* ??? One could use alloca here, but at some size a threshold is crossed
2113      beyond which one should use malloc.  Are we at that threshold here?  */
2114   reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2115   reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2116
2117   for (bb = 0; bb < n_basic_blocks; bb++)
2118     {
2119       rtx insn;
2120       unsigned int regno;
2121       int in_libcall_block;
2122       unsigned int i;
2123
2124       /* First pass over the instructions records information used to
2125          determine when registers and memory are first and last set.
2126          ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2127          could be moved to compute_sets since they currently don't change.  */
2128
2129       for (i = 0; i < max_gcse_regno; i++)
2130         reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2131
2132       mem_first_set = NEVER_SET;
2133       mem_last_set = NEVER_SET;
2134
2135       for (insn = BLOCK_HEAD (bb);
2136            insn && insn != NEXT_INSN (BLOCK_END (bb));
2137            insn = NEXT_INSN (insn))
2138         {
2139 #ifdef NON_SAVING_SETJMP
2140           if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2141               && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2142             {
2143               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2144                 record_last_reg_set_info (insn, regno);
2145               continue;
2146             }
2147 #endif
2148
2149           if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
2150             continue;
2151
2152           if (GET_CODE (insn) == CALL_INSN)
2153             {
2154               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2155                 if ((call_used_regs[regno]
2156                      && regno != STACK_POINTER_REGNUM
2157 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2158                      && regno != HARD_FRAME_POINTER_REGNUM
2159 #endif
2160 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2161                      && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2162 #endif
2163 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2164                      && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2165 #endif
2166
2167                      && regno != FRAME_POINTER_REGNUM)
2168                     || global_regs[regno])
2169                   record_last_reg_set_info (insn, regno);
2170
2171               if (! CONST_CALL_P (insn))
2172                 record_last_mem_set_info (insn);
2173             }
2174
2175           note_stores (PATTERN (insn), record_last_set_info, insn);
2176         }
2177
2178       /* The next pass builds the hash table.  */
2179
2180       for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2181            insn && insn != NEXT_INSN (BLOCK_END (bb));
2182            insn = NEXT_INSN (insn))
2183         if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
2184           {
2185             if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2186               in_libcall_block = 1;
2187             else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2188               in_libcall_block = 0;
2189             hash_scan_insn (insn, set_p, in_libcall_block);
2190         }
2191     }
2192
2193   free (reg_first_set);
2194   free (reg_last_set);
2195
2196   /* Catch bugs early.  */
2197   reg_first_set = reg_last_set = 0;
2198 }
2199
2200 /* Allocate space for the set hash table.
2201    N_INSNS is the number of instructions in the function.
2202    It is used to determine the number of buckets to use.  */
2203
2204 static void
2205 alloc_set_hash_table (n_insns)
2206      int n_insns;
2207 {
2208   int n;
2209
2210   set_hash_table_size = n_insns / 4;
2211   if (set_hash_table_size < 11)
2212     set_hash_table_size = 11;
2213
2214   /* Attempt to maintain efficient use of hash table.
2215      Making it an odd number is simplest for now.
2216      ??? Later take some measurements.  */
2217   set_hash_table_size |= 1;
2218   n = set_hash_table_size * sizeof (struct expr *);
2219   set_hash_table = (struct expr **) gmalloc (n);
2220 }
2221
2222 /* Free things allocated by alloc_set_hash_table.  */
2223
2224 static void
2225 free_set_hash_table ()
2226 {
2227   free (set_hash_table);
2228 }
2229
2230 /* Compute the hash table for doing copy/const propagation.  */
2231
2232 static void
2233 compute_set_hash_table ()
2234 {
2235   /* Initialize count of number of entries in hash table.  */
2236   n_sets = 0;
2237   bzero ((char *) set_hash_table,
2238          set_hash_table_size * sizeof (struct expr *));
2239
2240   compute_hash_table (1);
2241 }
2242
2243 /* Allocate space for the expression hash table.
2244    N_INSNS is the number of instructions in the function.
2245    It is used to determine the number of buckets to use.  */
2246
2247 static void
2248 alloc_expr_hash_table (n_insns)
2249      unsigned int n_insns;
2250 {
2251   int n;
2252
2253   expr_hash_table_size = n_insns / 2;
2254   /* Make sure the amount is usable.  */
2255   if (expr_hash_table_size < 11)
2256     expr_hash_table_size = 11;
2257
2258   /* Attempt to maintain efficient use of hash table.
2259      Making it an odd number is simplest for now.
2260      ??? Later take some measurements.  */
2261   expr_hash_table_size |= 1;
2262   n = expr_hash_table_size * sizeof (struct expr *);
2263   expr_hash_table = (struct expr **) gmalloc (n);
2264 }
2265
2266 /* Free things allocated by alloc_expr_hash_table.  */
2267
2268 static void
2269 free_expr_hash_table ()
2270 {
2271   free (expr_hash_table);
2272 }
2273
2274 /* Compute the hash table for doing GCSE.  */
2275
2276 static void
2277 compute_expr_hash_table ()
2278 {
2279   /* Initialize count of number of entries in hash table.  */
2280   n_exprs = 0;
2281   bzero ((char *) expr_hash_table,
2282          expr_hash_table_size * sizeof (struct expr *));
2283
2284   compute_hash_table (0);
2285 }
2286 \f
2287 /* Expression tracking support.  */
2288
2289 /* Lookup pattern PAT in the expression table.
2290    The result is a pointer to the table entry, or NULL if not found.  */
2291
2292 static struct expr *
2293 lookup_expr (pat)
2294      rtx pat;
2295 {
2296   int do_not_record_p;
2297   unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2298                                  expr_hash_table_size);
2299   struct expr *expr;
2300
2301   if (do_not_record_p)
2302     return NULL;
2303
2304   expr = expr_hash_table[hash];
2305
2306   while (expr && ! expr_equiv_p (expr->expr, pat))
2307     expr = expr->next_same_hash;
2308
2309   return expr;
2310 }
2311
2312 /* Lookup REGNO in the set table.  If PAT is non-NULL look for the entry that
2313    matches it, otherwise return the first entry for REGNO.  The result is a
2314    pointer to the table entry, or NULL if not found.  */
2315
2316 static struct expr *
2317 lookup_set (regno, pat)
2318      unsigned int regno;
2319      rtx pat;
2320 {
2321   unsigned int hash = hash_set (regno, set_hash_table_size);
2322   struct expr *expr;
2323
2324   expr = set_hash_table[hash];
2325
2326   if (pat)
2327     {
2328       while (expr && ! expr_equiv_p (expr->expr, pat))
2329         expr = expr->next_same_hash;
2330     }
2331   else
2332     {
2333       while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2334         expr = expr->next_same_hash;
2335     }
2336
2337   return expr;
2338 }
2339
2340 /* Return the next entry for REGNO in list EXPR.  */
2341
2342 static struct expr *
2343 next_set (regno, expr)
2344      unsigned int regno;
2345      struct expr *expr;
2346 {
2347   do
2348     expr = expr->next_same_hash;
2349   while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2350
2351   return expr;
2352 }
2353
2354 /* Reset tables used to keep track of what's still available [since the
2355    start of the block].  */
2356
2357 static void
2358 reset_opr_set_tables ()
2359 {
2360   /* Maintain a bitmap of which regs have been set since beginning of
2361      the block.  */
2362   sbitmap_zero (reg_set_bitmap);
2363
2364   /* Also keep a record of the last instruction to modify memory.
2365      For now this is very trivial, we only record whether any memory
2366      location has been modified.  */
2367   mem_last_set = 0;
2368 }
2369
2370 /* Return non-zero if the operands of X are not set before INSN in
2371    INSN's basic block.  */
2372
2373 static int
2374 oprs_not_set_p (x, insn)
2375      rtx x, insn;
2376 {
2377   int i, j;
2378   enum rtx_code code;
2379   const char *fmt;
2380
2381   if (x == 0)
2382     return 1;
2383
2384   code = GET_CODE (x);
2385   switch (code)
2386     {
2387     case PC:
2388     case CC0:
2389     case CONST:
2390     case CONST_INT:
2391     case CONST_DOUBLE:
2392     case SYMBOL_REF:
2393     case LABEL_REF:
2394     case ADDR_VEC:
2395     case ADDR_DIFF_VEC:
2396       return 1;
2397
2398     case MEM:
2399       if (mem_last_set != 0)
2400         return 0;
2401       else
2402         return oprs_not_set_p (XEXP (x, 0), insn);
2403
2404     case REG:
2405       return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2406
2407     default:
2408       break;
2409     }
2410
2411   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2412     {
2413       if (fmt[i] == 'e')
2414         {
2415           /* If we are about to do the last recursive call
2416              needed at this level, change it into iteration.
2417              This function is called enough to be worth it.  */
2418           if (i == 0)
2419             return oprs_not_set_p (XEXP (x, i), insn);
2420
2421           if (! oprs_not_set_p (XEXP (x, i), insn))
2422             return 0;
2423         }
2424       else if (fmt[i] == 'E')
2425         for (j = 0; j < XVECLEN (x, i); j++)
2426           if (! oprs_not_set_p (XVECEXP (x, i, j), insn))
2427             return 0;
2428     }
2429
2430   return 1;
2431 }
2432
2433 /* Mark things set by a CALL.  */
2434
2435 static void
2436 mark_call (insn)
2437      rtx insn;
2438 {
2439   mem_last_set = INSN_CUID (insn);
2440 }
2441
2442 /* Mark things set by a SET.  */
2443
2444 static void
2445 mark_set (pat, insn)
2446      rtx pat, insn;
2447 {
2448   rtx dest = SET_DEST (pat);
2449
2450   while (GET_CODE (dest) == SUBREG
2451          || GET_CODE (dest) == ZERO_EXTRACT
2452          || GET_CODE (dest) == SIGN_EXTRACT
2453          || GET_CODE (dest) == STRICT_LOW_PART)
2454     dest = XEXP (dest, 0);
2455
2456   if (GET_CODE (dest) == REG)
2457     SET_BIT (reg_set_bitmap, REGNO (dest));
2458   else if (GET_CODE (dest) == MEM)
2459     mem_last_set = INSN_CUID (insn);
2460
2461   if (GET_CODE (SET_SRC (pat)) == CALL)
2462     mark_call (insn);
2463 }
2464
2465 /* Record things set by a CLOBBER.  */
2466
2467 static void
2468 mark_clobber (pat, insn)
2469      rtx pat, insn;
2470 {
2471   rtx clob = XEXP (pat, 0);
2472
2473   while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2474     clob = XEXP (clob, 0);
2475
2476   if (GET_CODE (clob) == REG)
2477     SET_BIT (reg_set_bitmap, REGNO (clob));
2478   else
2479     mem_last_set = INSN_CUID (insn);
2480 }
2481
2482 /* Record things set by INSN.
2483    This data is used by oprs_not_set_p.  */
2484
2485 static void
2486 mark_oprs_set (insn)
2487      rtx insn;
2488 {
2489   rtx pat = PATTERN (insn);
2490   int i;
2491
2492   if (GET_CODE (pat) == SET)
2493     mark_set (pat, insn);
2494   else if (GET_CODE (pat) == PARALLEL)
2495     for (i = 0; i < XVECLEN (pat, 0); i++)
2496       {
2497         rtx x = XVECEXP (pat, 0, i);
2498
2499         if (GET_CODE (x) == SET)
2500           mark_set (x, insn);
2501         else if (GET_CODE (x) == CLOBBER)
2502           mark_clobber (x, insn);
2503         else if (GET_CODE (x) == CALL)
2504           mark_call (insn);
2505       }
2506
2507   else if (GET_CODE (pat) == CLOBBER)
2508     mark_clobber (pat, insn);
2509   else if (GET_CODE (pat) == CALL)
2510     mark_call (insn);
2511 }
2512
2513 \f
2514 /* Classic GCSE reaching definition support.  */
2515
2516 /* Allocate reaching def variables.  */
2517
2518 static void
2519 alloc_rd_mem (n_blocks, n_insns)
2520      int n_blocks, n_insns;
2521 {
2522   rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2523   sbitmap_vector_zero (rd_kill, n_basic_blocks);
2524
2525   rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2526   sbitmap_vector_zero (rd_gen, n_basic_blocks);
2527
2528   reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2529   sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2530
2531   rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2532   sbitmap_vector_zero (rd_out, n_basic_blocks);
2533 }
2534
2535 /* Free reaching def variables.  */
2536
2537 static void
2538 free_rd_mem ()
2539 {
2540   free (rd_kill);
2541   free (rd_gen);
2542   free (reaching_defs);
2543   free (rd_out);
2544 }
2545
2546 /* Add INSN to the kills of BB.  REGNO, set in BB, is killed by INSN.  */
2547
2548 static void
2549 handle_rd_kill_set (insn, regno, bb)
2550      rtx insn;
2551      int regno, bb;
2552 {
2553   struct reg_set *this_reg;
2554
2555   for (this_reg = reg_set_table[regno]; this_reg; this_reg = this_reg ->next)
2556     if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2557       SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2558 }
2559
2560 /* Compute the set of kill's for reaching definitions.  */
2561
2562 static void
2563 compute_kill_rd ()
2564 {
2565   int bb, cuid;
2566   int regno, i;
2567
2568   /* For each block
2569        For each set bit in `gen' of the block (i.e each insn which
2570            generates a definition in the block)
2571          Call the reg set by the insn corresponding to that bit regx
2572          Look at the linked list starting at reg_set_table[regx]
2573          For each setting of regx in the linked list, which is not in
2574              this block
2575            Set the bit in `kill' corresponding to that insn.   */
2576   for (bb = 0; bb < n_basic_blocks; bb++)
2577     for (cuid = 0; cuid < max_cuid; cuid++)
2578       if (TEST_BIT (rd_gen[bb], cuid))
2579         {
2580           rtx insn = CUID_INSN (cuid);
2581           rtx pat = PATTERN (insn);
2582
2583           if (GET_CODE (insn) == CALL_INSN)
2584             {
2585               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2586                 {
2587                   if ((call_used_regs[regno]
2588                        && regno != STACK_POINTER_REGNUM
2589 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2590                        && regno != HARD_FRAME_POINTER_REGNUM
2591 #endif
2592 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2593                        && ! (regno == ARG_POINTER_REGNUM
2594                              && fixed_regs[regno])
2595 #endif
2596 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2597                        && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2598 #endif
2599                        && regno != FRAME_POINTER_REGNUM)
2600                       || global_regs[regno])
2601                     handle_rd_kill_set (insn, regno, bb);
2602                 }
2603             }
2604
2605           if (GET_CODE (pat) == PARALLEL)
2606             {
2607               for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2608                 {
2609                   enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2610
2611                   if ((code == SET || code == CLOBBER)
2612                       && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2613                     handle_rd_kill_set (insn,
2614                                         REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2615                                         bb);
2616                 }
2617             }
2618           else if (GET_CODE (pat) == SET && GET_CODE (SET_DEST (pat)) == REG)
2619             /* Each setting of this register outside of this block
2620                must be marked in the set of kills in this block.  */
2621             handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2622         }
2623 }
2624
2625 /* Compute the reaching definitions as in
2626    Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2627    Chapter 10.  It is the same algorithm as used for computing available
2628    expressions but applied to the gens and kills of reaching definitions.  */
2629
2630 static void
2631 compute_rd ()
2632 {
2633   int bb, changed, passes;
2634
2635   for (bb = 0; bb < n_basic_blocks; bb++)
2636     sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2637
2638   passes = 0;
2639   changed = 1;
2640   while (changed)
2641     {
2642       changed = 0;
2643       for (bb = 0; bb < n_basic_blocks; bb++)
2644         {
2645           sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
2646           changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2647                                             reaching_defs[bb], rd_kill[bb]);
2648         }
2649       passes++;
2650     }
2651
2652   if (gcse_file)
2653     fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2654 }
2655 \f
2656 /* Classic GCSE available expression support.  */
2657
2658 /* Allocate memory for available expression computation.  */
2659
2660 static void
2661 alloc_avail_expr_mem (n_blocks, n_exprs)
2662      int n_blocks, n_exprs;
2663 {
2664   ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2665   sbitmap_vector_zero (ae_kill, n_basic_blocks);
2666
2667   ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2668   sbitmap_vector_zero (ae_gen, n_basic_blocks);
2669
2670   ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2671   sbitmap_vector_zero (ae_in, n_basic_blocks);
2672
2673   ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2674   sbitmap_vector_zero (ae_out, n_basic_blocks);
2675
2676   u_bitmap = (sbitmap) sbitmap_alloc (n_exprs);
2677   sbitmap_ones (u_bitmap);
2678 }
2679
2680 static void
2681 free_avail_expr_mem ()
2682 {
2683   free (ae_kill);
2684   free (ae_gen);
2685   free (ae_in);
2686   free (ae_out);
2687   free (u_bitmap);
2688 }
2689
2690 /* Compute the set of available expressions generated in each basic block.  */
2691
2692 static void
2693 compute_ae_gen ()
2694 {
2695   unsigned int i;
2696   struct expr *expr;
2697   struct occr *occr;
2698
2699   /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2700      This is all we have to do because an expression is not recorded if it
2701      is not available, and the only expressions we want to work with are the
2702      ones that are recorded.  */
2703   for (i = 0; i < expr_hash_table_size; i++)
2704     for (expr = expr_hash_table[i]; expr != 0; expr = expr->next_same_hash)
2705       for (occr = expr->avail_occr; occr != 0; occr = occr->next)
2706         SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2707 }
2708
2709 /* Return non-zero if expression X is killed in BB.  */
2710
2711 static int
2712 expr_killed_p (x, bb)
2713      rtx x;
2714      int bb;
2715 {
2716   int i, j;
2717   enum rtx_code code;
2718   const char *fmt;
2719
2720   if (x == 0)
2721     return 1;
2722
2723   code = GET_CODE (x);
2724   switch (code)
2725     {
2726     case REG:
2727       return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2728
2729     case MEM:
2730       if (mem_set_in_block[bb])
2731         return 1;
2732       else
2733         return expr_killed_p (XEXP (x, 0), bb);
2734
2735     case PC:
2736     case CC0: /*FIXME*/
2737     case CONST:
2738     case CONST_INT:
2739     case CONST_DOUBLE:
2740     case SYMBOL_REF:
2741     case LABEL_REF:
2742     case ADDR_VEC:
2743     case ADDR_DIFF_VEC:
2744       return 0;
2745
2746     default:
2747       break;
2748     }
2749
2750   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2751     {
2752       if (fmt[i] == 'e')
2753         {
2754           /* If we are about to do the last recursive call
2755              needed at this level, change it into iteration.
2756              This function is called enough to be worth it.  */
2757           if (i == 0)
2758             return expr_killed_p (XEXP (x, i), bb);
2759           else if (expr_killed_p (XEXP (x, i), bb))
2760             return 1;
2761         }
2762       else if (fmt[i] == 'E')
2763         for (j = 0; j < XVECLEN (x, i); j++)
2764           if (expr_killed_p (XVECEXP (x, i, j), bb))
2765             return 1;
2766     }
2767
2768   return 0;
2769 }
2770
2771 /* Compute the set of available expressions killed in each basic block.  */
2772
2773 static void
2774 compute_ae_kill (ae_gen, ae_kill)
2775      sbitmap *ae_gen, *ae_kill;
2776 {
2777   int bb;
2778   unsigned int i;
2779   struct expr *expr;
2780
2781   for (bb = 0; bb < n_basic_blocks; bb++)
2782     for (i = 0; i < expr_hash_table_size; i++)
2783       for (expr = expr_hash_table[i]; expr; expr = expr->next_same_hash)
2784         {
2785           /* Skip EXPR if generated in this block.  */
2786           if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2787             continue;
2788
2789           if (expr_killed_p (expr->expr, bb))
2790             SET_BIT (ae_kill[bb], expr->bitmap_index);
2791         }
2792 }
2793 \f
2794 /* Actually perform the Classic GCSE optimizations.  */
2795
2796 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2797
2798    CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2799    as a positive reach.  We want to do this when there are two computations
2800    of the expression in the block.
2801
2802    VISITED is a pointer to a working buffer for tracking which BB's have
2803    been visited.  It is NULL for the top-level call.
2804
2805    We treat reaching expressions that go through blocks containing the same
2806    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2807    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2808    2 as not reaching.  The intent is to improve the probability of finding
2809    only one reaching expression and to reduce register lifetimes by picking
2810    the closest such expression.  */
2811
2812 static int
2813 expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
2814      struct occr *occr;
2815      struct expr *expr;
2816      int bb;
2817      int check_self_loop;
2818      char *visited;
2819 {
2820   edge pred;
2821
2822   for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
2823     {
2824       int pred_bb = pred->src->index;
2825
2826       if (visited[pred_bb])
2827         /* This predecessor has already been visited. Nothing to do.  */
2828           ;
2829       else if (pred_bb == bb)
2830         {
2831           /* BB loops on itself.  */
2832           if (check_self_loop
2833               && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2834               && BLOCK_NUM (occr->insn) == pred_bb)
2835             return 1;
2836
2837           visited[pred_bb] = 1;
2838         }
2839
2840       /* Ignore this predecessor if it kills the expression.  */
2841       else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2842         visited[pred_bb] = 1;
2843
2844       /* Does this predecessor generate this expression?  */
2845       else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2846         {
2847           /* Is this the occurrence we're looking for?
2848              Note that there's only one generating occurrence per block
2849              so we just need to check the block number.  */
2850           if (BLOCK_NUM (occr->insn) == pred_bb)
2851             return 1;
2852
2853           visited[pred_bb] = 1;
2854         }
2855
2856       /* Neither gen nor kill.  */
2857       else
2858         {
2859           visited[pred_bb] = 1;
2860           if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
2861               visited))
2862
2863             return 1;
2864         }
2865     }
2866
2867   /* All paths have been checked.  */
2868   return 0;
2869 }
2870
2871 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
2872    memory allocated for that function is returned. */
2873
2874 static int
2875 expr_reaches_here_p (occr, expr, bb, check_self_loop)
2876      struct occr *occr;
2877      struct expr *expr;
2878      int bb;
2879      int check_self_loop;
2880 {
2881   int rval;
2882   char *visited = (char *) xcalloc (n_basic_blocks, 1);
2883
2884   rval = expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited);
2885
2886   free (visited);
2887   return rval;
2888 }
2889
2890 /* Return the instruction that computes EXPR that reaches INSN's basic block.
2891    If there is more than one such instruction, return NULL.
2892
2893    Called only by handle_avail_expr.  */
2894
2895 static rtx
2896 computing_insn (expr, insn)
2897      struct expr *expr;
2898      rtx insn;
2899 {
2900   int bb = BLOCK_NUM (insn);
2901
2902   if (expr->avail_occr->next == NULL)
2903     {
2904       if (BLOCK_NUM (expr->avail_occr->insn) == bb)
2905         /* The available expression is actually itself
2906            (i.e. a loop in the flow graph) so do nothing.  */
2907         return NULL;
2908
2909       /* (FIXME) Case that we found a pattern that was created by
2910          a substitution that took place.  */
2911       return expr->avail_occr->insn;
2912     }
2913   else
2914     {
2915       /* Pattern is computed more than once.
2916          Search backwards from this insn to see how many of these
2917          computations actually reach this insn.  */
2918       struct occr *occr;
2919       rtx insn_computes_expr = NULL;
2920       int can_reach = 0;
2921
2922       for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
2923         {
2924           if (BLOCK_NUM (occr->insn) == bb)
2925             {
2926               /* The expression is generated in this block.
2927                  The only time we care about this is when the expression
2928                  is generated later in the block [and thus there's a loop].
2929                  We let the normal cse pass handle the other cases.  */
2930               if (INSN_CUID (insn) < INSN_CUID (occr->insn)
2931                   && expr_reaches_here_p (occr, expr, bb, 1))
2932                 {
2933                   can_reach++;
2934                   if (can_reach > 1)
2935                     return NULL;
2936
2937                   insn_computes_expr = occr->insn;
2938                 }
2939             }
2940           else if (expr_reaches_here_p (occr, expr, bb, 0))
2941             {
2942               can_reach++;
2943               if (can_reach > 1)
2944                 return NULL;
2945
2946               insn_computes_expr = occr->insn;
2947             }
2948         }
2949
2950       if (insn_computes_expr == NULL)
2951         abort ();
2952
2953       return insn_computes_expr;
2954     }
2955 }
2956
2957 /* Return non-zero if the definition in DEF_INSN can reach INSN.
2958    Only called by can_disregard_other_sets.  */
2959
2960 static int
2961 def_reaches_here_p (insn, def_insn)
2962      rtx insn, def_insn;
2963 {
2964   rtx reg;
2965
2966   if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
2967     return 1;
2968
2969   if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
2970     {
2971       if (INSN_CUID (def_insn) < INSN_CUID (insn))
2972         {
2973           if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
2974             return 1;
2975           else if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
2976             reg = XEXP (PATTERN (def_insn), 0);
2977           else if (GET_CODE (PATTERN (def_insn)) == SET)
2978             reg = SET_DEST (PATTERN (def_insn));
2979           else
2980             abort ();
2981
2982           return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
2983         }
2984       else
2985         return 0;
2986     }
2987
2988   return 0;
2989 }
2990
2991 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN.  The
2992    value returned is the number of definitions that reach INSN.  Returning a
2993    value of zero means that [maybe] more than one definition reaches INSN and
2994    the caller can't perform whatever optimization it is trying.  i.e. it is
2995    always safe to return zero.  */
2996
2997 static int
2998 can_disregard_other_sets (addr_this_reg, insn, for_combine)
2999      struct reg_set **addr_this_reg;
3000      rtx insn;
3001      int for_combine;
3002 {
3003   int number_of_reaching_defs = 0;
3004   struct reg_set *this_reg;
3005
3006   for (this_reg = *addr_this_reg; this_reg != 0; this_reg = this_reg->next)
3007     if (def_reaches_here_p (insn, this_reg->insn))
3008       {
3009         number_of_reaching_defs++;
3010         /* Ignore parallels for now.  */
3011         if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3012           return 0;
3013
3014         if (!for_combine
3015             && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3016                 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3017                                   SET_SRC (PATTERN (insn)))))
3018           /* A setting of the reg to a different value reaches INSN.  */
3019           return 0;
3020
3021         if (number_of_reaching_defs > 1)
3022           {
3023             /* If in this setting the value the register is being set to is
3024                equal to the previous value the register was set to and this
3025                setting reaches the insn we are trying to do the substitution
3026                on then we are ok.  */
3027             if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3028               return 0;
3029             else if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3030                                     SET_SRC (PATTERN (insn))))
3031               return 0;
3032           }
3033
3034         *addr_this_reg = this_reg;
3035       }
3036
3037   return number_of_reaching_defs;
3038 }
3039
3040 /* Expression computed by insn is available and the substitution is legal,
3041    so try to perform the substitution.
3042
3043    The result is non-zero if any changes were made.  */
3044
3045 static int
3046 handle_avail_expr (insn, expr)
3047      rtx insn;
3048      struct expr *expr;
3049 {
3050   rtx pat, insn_computes_expr;
3051   rtx to;
3052   struct reg_set *this_reg;
3053   int found_setting, use_src;
3054   int changed = 0;
3055
3056   /* We only handle the case where one computation of the expression
3057      reaches this instruction.  */
3058   insn_computes_expr = computing_insn (expr, insn);
3059   if (insn_computes_expr == NULL)
3060     return 0;
3061
3062   found_setting = 0;
3063   use_src = 0;
3064
3065   /* At this point we know only one computation of EXPR outside of this
3066      block reaches this insn.  Now try to find a register that the
3067      expression is computed into.  */
3068   if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3069     {
3070       /* This is the case when the available expression that reaches
3071          here has already been handled as an available expression.  */
3072       unsigned int regnum_for_replacing
3073         = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3074
3075       /* If the register was created by GCSE we can't use `reg_set_table',
3076          however we know it's set only once.  */
3077       if (regnum_for_replacing >= max_gcse_regno
3078           /* If the register the expression is computed into is set only once,
3079              or only one set reaches this insn, we can use it.  */
3080           || (((this_reg = reg_set_table[regnum_for_replacing]),
3081                this_reg->next == NULL)
3082               || can_disregard_other_sets (&this_reg, insn, 0)))
3083        {
3084          use_src = 1;
3085          found_setting = 1;
3086        }
3087     }
3088
3089   if (!found_setting)
3090     {
3091       unsigned int regnum_for_replacing
3092         = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3093
3094       /* This shouldn't happen.  */
3095       if (regnum_for_replacing >= max_gcse_regno)
3096         abort ();
3097
3098       this_reg = reg_set_table[regnum_for_replacing];
3099
3100       /* If the register the expression is computed into is set only once,
3101          or only one set reaches this insn, use it.  */
3102       if (this_reg->next == NULL
3103           || can_disregard_other_sets (&this_reg, insn, 0))
3104         found_setting = 1;
3105     }
3106
3107   if (found_setting)
3108     {
3109       pat = PATTERN (insn);
3110       if (use_src)
3111         to = SET_SRC (PATTERN (insn_computes_expr));
3112       else
3113         to = SET_DEST (PATTERN (insn_computes_expr));
3114       changed = validate_change (insn, &SET_SRC (pat), to, 0);
3115
3116       /* We should be able to ignore the return code from validate_change but
3117          to play it safe we check.  */
3118       if (changed)
3119         {
3120           gcse_subst_count++;
3121           if (gcse_file != NULL)
3122             {
3123               fprintf (gcse_file, "GCSE: Replacing the source in insn %d with",
3124                        INSN_UID (insn));
3125               fprintf (gcse_file, " reg %d %s insn %d\n",
3126                        REGNO (to), use_src ? "from" : "set in",
3127                        INSN_UID (insn_computes_expr));
3128             }
3129         }
3130     }
3131
3132   /* The register that the expr is computed into is set more than once.  */
3133   else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3134     {
3135       /* Insert an insn after insnx that copies the reg set in insnx
3136          into a new pseudo register call this new register REGN.
3137          From insnb until end of basic block or until REGB is set
3138          replace all uses of REGB with REGN.  */
3139       rtx new_insn;
3140
3141       to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3142
3143       /* Generate the new insn.  */
3144       /* ??? If the change fails, we return 0, even though we created
3145          an insn.  I think this is ok.  */
3146       new_insn
3147         = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3148                                         SET_DEST (PATTERN
3149                                                   (insn_computes_expr))),
3150                            insn_computes_expr);
3151
3152       /* Keep block number table up to date.  */
3153       set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3154
3155       /* Keep register set table up to date.  */
3156       record_one_set (REGNO (to), new_insn);
3157
3158       gcse_create_count++;
3159       if (gcse_file != NULL)
3160         {
3161           fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d",
3162                    INSN_UID (NEXT_INSN (insn_computes_expr)),
3163                    REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))));
3164           fprintf (gcse_file, ", computed in insn %d,\n",
3165                    INSN_UID (insn_computes_expr));
3166           fprintf (gcse_file, "      into newly allocated reg %d\n",
3167                    REGNO (to));
3168         }
3169
3170       pat = PATTERN (insn);
3171
3172       /* Do register replacement for INSN.  */
3173       changed = validate_change (insn, &SET_SRC (pat),
3174                                  SET_DEST (PATTERN
3175                                            (NEXT_INSN (insn_computes_expr))),
3176                                  0);
3177
3178       /* We should be able to ignore the return code from validate_change but
3179          to play it safe we check.  */
3180       if (changed)
3181         {
3182           gcse_subst_count++;
3183           if (gcse_file != NULL)
3184             {
3185               fprintf (gcse_file,
3186                        "GCSE: Replacing the source in insn %d with reg %d ",
3187                        INSN_UID (insn),
3188                        REGNO (SET_DEST (PATTERN (NEXT_INSN
3189                                                  (insn_computes_expr)))));
3190               fprintf (gcse_file, "set in insn %d\n",
3191                        INSN_UID (insn_computes_expr));
3192             }
3193         }
3194     }
3195
3196   return changed;
3197 }
3198
3199 /* Perform classic GCSE.  This is called by one_classic_gcse_pass after all
3200    the dataflow analysis has been done.
3201
3202    The result is non-zero if a change was made.  */
3203
3204 static int
3205 classic_gcse ()
3206 {
3207   int bb, changed;
3208   rtx insn;
3209
3210   /* Note we start at block 1.  */
3211
3212   changed = 0;
3213   for (bb = 1; bb < n_basic_blocks; bb++)
3214     {
3215       /* Reset tables used to keep track of what's still valid [since the
3216          start of the block].  */
3217       reset_opr_set_tables ();
3218
3219       for (insn = BLOCK_HEAD (bb);
3220            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3221            insn = NEXT_INSN (insn))
3222         {
3223           /* Is insn of form (set (pseudo-reg) ...)?  */
3224           if (GET_CODE (insn) == INSN
3225               && GET_CODE (PATTERN (insn)) == SET
3226               && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3227               && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3228             {
3229               rtx pat = PATTERN (insn);
3230               rtx src = SET_SRC (pat);
3231               struct expr *expr;
3232
3233               if (want_to_gcse_p (src)
3234                   /* Is the expression recorded?  */
3235                   && ((expr = lookup_expr (src)) != NULL)
3236                   /* Is the expression available [at the start of the
3237                      block]?  */
3238                   && TEST_BIT (ae_in[bb], expr->bitmap_index)
3239                   /* Are the operands unchanged since the start of the
3240                      block?  */
3241                   && oprs_not_set_p (src, insn))
3242                 changed |= handle_avail_expr (insn, expr);
3243             }
3244
3245           /* Keep track of everything modified by this insn.  */
3246           /* ??? Need to be careful w.r.t. mods done to INSN.  */
3247           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3248             mark_oprs_set (insn);
3249         }
3250     }
3251
3252   return changed;
3253 }
3254
3255 /* Top level routine to perform one classic GCSE pass.
3256
3257    Return non-zero if a change was made.  */
3258
3259 static int
3260 one_classic_gcse_pass (pass)
3261      int pass;
3262 {
3263   int changed = 0;
3264
3265   gcse_subst_count = 0;
3266   gcse_create_count = 0;
3267
3268   alloc_expr_hash_table (max_cuid);
3269   alloc_rd_mem (n_basic_blocks, max_cuid);
3270   compute_expr_hash_table ();
3271   if (gcse_file)
3272     dump_hash_table (gcse_file, "Expression", expr_hash_table,
3273                      expr_hash_table_size, n_exprs);
3274
3275   if (n_exprs > 0)
3276     {
3277       compute_kill_rd ();
3278       compute_rd ();
3279       alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3280       compute_ae_gen ();
3281       compute_ae_kill (ae_gen, ae_kill);
3282       compute_available (ae_gen, ae_kill, ae_out, ae_in);
3283       changed = classic_gcse ();
3284       free_avail_expr_mem ();
3285     }
3286
3287   free_rd_mem ();
3288   free_expr_hash_table ();
3289
3290   if (gcse_file)
3291     {
3292       fprintf (gcse_file, "\n");
3293       fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs,",
3294                current_function_name, pass, bytes_used, gcse_subst_count);
3295       fprintf (gcse_file, "%d insns created\n", gcse_create_count);
3296     }
3297
3298   return changed;
3299 }
3300 \f
3301 /* Compute copy/constant propagation working variables.  */
3302
3303 /* Local properties of assignments.  */
3304 static sbitmap *cprop_pavloc;
3305 static sbitmap *cprop_absaltered;
3306
3307 /* Global properties of assignments (computed from the local properties).  */
3308 static sbitmap *cprop_avin;
3309 static sbitmap *cprop_avout;
3310
3311 /* Allocate vars used for copy/const propagation.  N_BLOCKS is the number of
3312    basic blocks.  N_SETS is the number of sets.  */
3313
3314 static void
3315 alloc_cprop_mem (n_blocks, n_sets)
3316      int n_blocks, n_sets;
3317 {
3318   cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3319   cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3320
3321   cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3322   cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3323 }
3324
3325 /* Free vars used by copy/const propagation.  */
3326
3327 static void
3328 free_cprop_mem ()
3329 {
3330   free (cprop_pavloc);
3331   free (cprop_absaltered);
3332   free (cprop_avin);
3333   free (cprop_avout);
3334 }
3335
3336 /* For each block, compute whether X is transparent.  X is either an
3337    expression or an assignment [though we don't care which, for this context
3338    an assignment is treated as an expression].  For each block where an
3339    element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
3340    bit in BMAP.  */
3341
3342 static void
3343 compute_transp (x, indx, bmap, set_p)
3344      rtx x;
3345      int indx;
3346      sbitmap *bmap;
3347      int set_p;
3348 {
3349   int bb, i, j;
3350   enum rtx_code code;
3351   reg_set *r;
3352   const char *fmt;
3353
3354   /* repeat is used to turn tail-recursion into iteration since GCC
3355      can't do it when there's no return value.  */
3356  repeat:
3357
3358   if (x == 0)
3359     return;
3360
3361   code = GET_CODE (x);
3362   switch (code)
3363     {
3364     case REG:
3365       if (set_p)
3366         {
3367           if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3368             {
3369               for (bb = 0; bb < n_basic_blocks; bb++)
3370                 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3371                   SET_BIT (bmap[bb], indx);
3372             }
3373           else
3374             {
3375               for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3376                 SET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3377             }
3378         }
3379       else
3380         {
3381           if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3382             {
3383               for (bb = 0; bb < n_basic_blocks; bb++)
3384                 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3385                   RESET_BIT (bmap[bb], indx);
3386             }
3387           else
3388             {
3389               for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3390                 RESET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3391             }
3392         }
3393
3394       return;
3395
3396     case MEM:
3397       if (set_p)
3398         {
3399           for (bb = 0; bb < n_basic_blocks; bb++)
3400             if (mem_set_in_block[bb])
3401               SET_BIT (bmap[bb], indx);
3402         }
3403       else
3404         {
3405           for (bb = 0; bb < n_basic_blocks; bb++)
3406             if (mem_set_in_block[bb])
3407               RESET_BIT (bmap[bb], indx);
3408         }
3409
3410       x = XEXP (x, 0);
3411       goto repeat;
3412
3413     case PC:
3414     case CC0: /*FIXME*/
3415     case CONST:
3416     case CONST_INT:
3417     case CONST_DOUBLE:
3418     case SYMBOL_REF:
3419     case LABEL_REF:
3420     case ADDR_VEC:
3421     case ADDR_DIFF_VEC:
3422       return;
3423
3424     default:
3425       break;
3426     }
3427
3428   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3429     {
3430       if (fmt[i] == 'e')
3431         {
3432           /* If we are about to do the last recursive call
3433              needed at this level, change it into iteration.
3434              This function is called enough to be worth it.  */
3435           if (i == 0)
3436             {
3437               x = XEXP (x, i);
3438               goto repeat;
3439             }
3440
3441           compute_transp (XEXP (x, i), indx, bmap, set_p);
3442         }
3443       else if (fmt[i] == 'E')
3444         for (j = 0; j < XVECLEN (x, i); j++)
3445           compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3446     }
3447 }
3448
3449 /* Top level routine to do the dataflow analysis needed by copy/const
3450    propagation.  */
3451
3452 static void
3453 compute_cprop_data ()
3454 {
3455   compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
3456   compute_available (cprop_pavloc, cprop_absaltered,
3457                      cprop_avout, cprop_avin);
3458 }
3459 \f
3460 /* Copy/constant propagation.  */
3461
3462 /* Maximum number of register uses in an insn that we handle.  */
3463 #define MAX_USES 8
3464
3465 /* Table of uses found in an insn.
3466    Allocated statically to avoid alloc/free complexity and overhead.  */
3467 static struct reg_use reg_use_table[MAX_USES];
3468
3469 /* Index into `reg_use_table' while building it.  */
3470 static int reg_use_count;
3471
3472 /* Set up a list of register numbers used in INSN.  The found uses are stored
3473    in `reg_use_table'.  `reg_use_count' is initialized to zero before entry,
3474    and contains the number of uses in the table upon exit.
3475
3476    ??? If a register appears multiple times we will record it multiple times.
3477    This doesn't hurt anything but it will slow things down.  */
3478
3479 static void
3480 find_used_regs (x)
3481      rtx x;
3482 {
3483   int i, j;
3484   enum rtx_code code;
3485   const char *fmt;
3486
3487   /* repeat is used to turn tail-recursion into iteration since GCC
3488      can't do it when there's no return value.  */
3489  repeat:
3490
3491   if (x == 0)
3492     return;
3493
3494   code = GET_CODE (x);
3495   switch (code)
3496     {
3497     case REG:
3498       if (reg_use_count == MAX_USES)
3499         return;
3500
3501       reg_use_table[reg_use_count].reg_rtx = x;
3502       reg_use_count++;
3503       return;
3504
3505     case MEM:
3506       x = XEXP (x, 0);
3507       goto repeat;
3508
3509     case PC:
3510     case CC0:
3511     case CONST:
3512     case CONST_INT:
3513     case CONST_DOUBLE:
3514     case SYMBOL_REF:
3515     case LABEL_REF:
3516     case CLOBBER:
3517     case ADDR_VEC:
3518     case ADDR_DIFF_VEC:
3519     case ASM_INPUT: /*FIXME*/
3520       return;
3521
3522     case SET:
3523       if (GET_CODE (SET_DEST (x)) == MEM)
3524         find_used_regs (SET_DEST (x));
3525       x = SET_SRC (x);
3526       goto repeat;
3527
3528     default:
3529       break;
3530     }
3531
3532   /* Recursively scan the operands of this expression.  */
3533
3534   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3535     {
3536       if (fmt[i] == 'e')
3537         {
3538           /* If we are about to do the last recursive call
3539              needed at this level, change it into iteration.
3540              This function is called enough to be worth it.  */
3541           if (i == 0)
3542             {
3543               x = XEXP (x, 0);
3544               goto repeat;
3545             }
3546
3547           find_used_regs (XEXP (x, i));
3548         }
3549       else if (fmt[i] == 'E')
3550         for (j = 0; j < XVECLEN (x, i); j++)
3551           find_used_regs (XVECEXP (x, i, j));
3552     }
3553 }
3554
3555 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3556    Returns non-zero is successful.  */
3557
3558 static int
3559 try_replace_reg (from, to, insn)
3560      rtx from, to, insn;
3561 {
3562   rtx note;
3563   rtx src;
3564   int success;
3565   rtx set;
3566
3567   note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3568
3569   if (!note)
3570     note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3571
3572   /* If this fails we could try to simplify the result of the
3573      replacement and attempt to recognize the simplified insn.
3574
3575      But we need a general simplify_rtx that doesn't have pass
3576      specific state variables.  I'm not aware of one at the moment.  */
3577
3578   success = validate_replace_src (from, to, insn);
3579   set = single_set (insn);
3580
3581   /* We've failed to do replacement. Try to add REG_EQUAL note to not loose
3582      information.  */
3583   if (!success && !note)
3584     {
3585       if (!set)
3586         return 0;
3587
3588       note = REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL,
3589                                                    copy_rtx (SET_SRC (set)),
3590                                                    REG_NOTES (insn));
3591     }
3592
3593   /* Always do the replacement in REQ_EQUAL and REG_EQUIV notes.  Also
3594      try to simplify them.  */
3595   if (note)
3596     {
3597       rtx simplified;
3598
3599       src = XEXP (note, 0);
3600       replace_rtx (src, from, to);
3601
3602       /* Try to simplify resulting note. */
3603       simplified = simplify_rtx (src);
3604       if (simplified)
3605         {
3606           src = simplified;
3607           XEXP (note, 0) = src;
3608         }
3609
3610       /* REG_EQUAL may get simplified into register.
3611          We don't allow that. Remove that note. This code ought
3612          not to hapen, because previous code ought to syntetize
3613          reg-reg move, but be on the safe side.  */
3614       else if (REG_P (src))
3615         remove_note (insn, note);
3616     }
3617   return success;
3618 }
3619
3620 /* Find a set of REGNOs that are available on entry to INSN's block.  Returns
3621    NULL no such set is found.  */
3622
3623 static struct expr *
3624 find_avail_set (regno, insn)
3625      int regno;
3626      rtx insn;
3627 {
3628   /* SET1 contains the last set found that can be returned to the caller for
3629      use in a substitution.  */
3630   struct expr *set1 = 0;
3631
3632   /* Loops are not possible here.  To get a loop we would need two sets
3633      available at the start of the block containing INSN.  ie we would
3634      need two sets like this available at the start of the block:
3635
3636        (set (reg X) (reg Y))
3637        (set (reg Y) (reg X))
3638
3639      This can not happen since the set of (reg Y) would have killed the
3640      set of (reg X) making it unavailable at the start of this block.  */
3641   while (1)
3642      {
3643       rtx src;
3644       struct expr *set = lookup_set (regno, NULL_RTX);
3645
3646       /* Find a set that is available at the start of the block
3647          which contains INSN.  */
3648       while (set)
3649         {
3650           if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3651             break;
3652           set = next_set (regno, set);
3653         }
3654
3655       /* If no available set was found we've reached the end of the
3656          (possibly empty) copy chain.  */
3657       if (set == 0)
3658         break;
3659
3660       if (GET_CODE (set->expr) != SET)
3661         abort ();
3662
3663       src = SET_SRC (set->expr);
3664
3665       /* We know the set is available.
3666          Now check that SRC is ANTLOC (i.e. none of the source operands
3667          have changed since the start of the block).
3668
3669          If the source operand changed, we may still use it for the next
3670          iteration of this loop, but we may not use it for substitutions.  */
3671
3672       if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3673         set1 = set;
3674
3675       /* If the source of the set is anything except a register, then
3676          we have reached the end of the copy chain.  */
3677       if (GET_CODE (src) != REG)
3678         break;
3679
3680       /* Follow the copy chain, ie start another iteration of the loop
3681          and see if we have an available copy into SRC.  */
3682       regno = REGNO (src);
3683      }
3684
3685   /* SET1 holds the last set that was available and anticipatable at
3686      INSN.  */
3687   return set1;
3688 }
3689
3690 /* Subroutine of cprop_insn that tries to propagate constants into
3691    JUMP_INSNS.  INSN must be a conditional jump; COPY is a copy of it
3692    that we can use for substitutions.
3693    REG_USED is the use we will try to replace, SRC is the constant we
3694    will try to substitute for it.
3695    Returns nonzero if a change was made.  */
3696
3697 static int
3698 cprop_jump (insn, copy, reg_used, src)
3699      rtx insn, copy;
3700      struct reg_use *reg_used;
3701      rtx src;
3702 {
3703   rtx set = PATTERN (copy);
3704   rtx temp;
3705
3706   /* Replace the register with the appropriate constant.  */
3707   replace_rtx (SET_SRC (set), reg_used->reg_rtx, src);
3708
3709   temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)),
3710                                      GET_MODE (SET_SRC (set)),
3711                                      GET_MODE (XEXP (SET_SRC (set), 0)),
3712                                      XEXP (SET_SRC (set), 0),
3713                                      XEXP (SET_SRC (set), 1),
3714                                      XEXP (SET_SRC (set), 2));
3715
3716   /* If no simplification can be made, then try the next
3717      register.  */
3718   if (temp == 0)
3719     return 0;
3720
3721   SET_SRC (set) = temp;
3722
3723   /* That may have changed the structure of TEMP, so
3724      force it to be rerecognized if it has not turned
3725      into a nop or unconditional jump.  */
3726
3727   INSN_CODE (copy) = -1;
3728   if ((SET_DEST (set) == pc_rtx
3729        && (SET_SRC (set) == pc_rtx
3730            || GET_CODE (SET_SRC (set)) == LABEL_REF))
3731       || recog (PATTERN (copy), copy, NULL) >= 0)
3732     {
3733       /* This has either become an unconditional jump
3734          or a nop-jump.  We'd like to delete nop jumps
3735          here, but doing so confuses gcse.  So we just
3736          make the replacement and let later passes
3737          sort things out.  */
3738       PATTERN (insn) = set;
3739       INSN_CODE (insn) = -1;
3740
3741       /* One less use of the label this insn used to jump to
3742          if we turned this into a NOP jump.  */
3743       if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0)
3744         --LABEL_NUSES (JUMP_LABEL (insn));
3745
3746       /* If this has turned into an unconditional jump,
3747          then put a barrier after it so that the unreachable
3748          code will be deleted.  */
3749       if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3750         emit_barrier_after (insn);
3751
3752       run_jump_opt_after_gcse = 1;
3753
3754       const_prop_count++;
3755       if (gcse_file != NULL)
3756         {
3757           fprintf (gcse_file,
3758                    "CONST-PROP: Replacing reg %d in insn %d with constant ",
3759                    REGNO (reg_used->reg_rtx), INSN_UID (insn));
3760           print_rtl (gcse_file, src);
3761           fprintf (gcse_file, "\n");
3762         }
3763
3764       return 1;
3765     }
3766   return 0;
3767 }
3768
3769 #ifdef HAVE_cc0
3770
3771 /* Subroutine of cprop_insn that tries to propagate constants into JUMP_INSNS
3772    for machines that have CC0.  INSN is a single set that stores into CC0;
3773    the insn following it is a conditional jump.  REG_USED is the use we will
3774    try to replace, SRC is the constant we will try to substitute for it.
3775    Returns nonzero if a change was made.  */
3776
3777 static int
3778 cprop_cc0_jump (insn, reg_used, src)
3779      rtx insn;
3780      struct reg_use *reg_used;
3781      rtx src;
3782 {
3783   rtx jump = NEXT_INSN (insn);
3784   rtx copy = copy_rtx (jump);
3785   rtx set = PATTERN (copy);
3786
3787   /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3788      substitute into it.  */
3789   replace_rtx (SET_SRC (set), cc0_rtx, copy_rtx (SET_SRC (PATTERN (insn))));
3790   if (! cprop_jump (jump, copy, reg_used, src))
3791     return 0;
3792
3793   /* If we succeeded, delete the cc0 setter.  */
3794   PUT_CODE (insn, NOTE);
3795   NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3796   NOTE_SOURCE_FILE (insn) = 0;
3797   return 1;
3798  }
3799 #endif
3800
3801 /* Perform constant and copy propagation on INSN.
3802    The result is non-zero if a change was made.  */
3803
3804 static int
3805 cprop_insn (insn, alter_jumps)
3806      rtx insn;
3807      int alter_jumps;
3808 {
3809   struct reg_use *reg_used;
3810   int changed = 0;
3811   rtx note;
3812
3813   /* Only propagate into SETs.  Note that a conditional jump is a
3814      SET with pc_rtx as the destination.  */
3815   if ((GET_CODE (insn) != INSN
3816        && GET_CODE (insn) != JUMP_INSN)
3817       || GET_CODE (PATTERN (insn)) != SET)
3818     return 0;
3819
3820   reg_use_count = 0;
3821   find_used_regs (PATTERN (insn));
3822
3823   note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3824   if (!note)
3825     note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3826
3827   /* We may win even when propagating constants into notes. */
3828   if (note)
3829     find_used_regs (XEXP (note, 0));
3830
3831   for (reg_used = &reg_use_table[0]; reg_use_count > 0;
3832        reg_used++, reg_use_count--)
3833     {
3834       unsigned int regno = REGNO (reg_used->reg_rtx);
3835       rtx pat, src;
3836       struct expr *set;
3837
3838       /* Ignore registers created by GCSE.
3839          We do this because ... */
3840       if (regno >= max_gcse_regno)
3841         continue;
3842
3843       /* If the register has already been set in this block, there's
3844          nothing we can do.  */
3845       if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3846         continue;
3847
3848       /* Find an assignment that sets reg_used and is available
3849          at the start of the block.  */
3850       set = find_avail_set (regno, insn);
3851       if (! set)
3852         continue;
3853
3854       pat = set->expr;
3855       /* ??? We might be able to handle PARALLELs.  Later.  */
3856       if (GET_CODE (pat) != SET)
3857         abort ();
3858
3859       src = SET_SRC (pat);
3860
3861       /* Constant propagation.  */
3862       if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
3863           || GET_CODE (src) == SYMBOL_REF)
3864         {
3865           /* Handle normal insns first.  */
3866           if (GET_CODE (insn) == INSN
3867               && try_replace_reg (reg_used->reg_rtx, src, insn))
3868             {
3869               changed = 1;
3870               const_prop_count++;
3871               if (gcse_file != NULL)
3872                 {
3873                   fprintf (gcse_file, "CONST-PROP: Replacing reg %d in ",
3874                            regno);
3875                   fprintf (gcse_file, "insn %d with constant ",
3876                            INSN_UID (insn));
3877                   print_rtl (gcse_file, src);
3878                   fprintf (gcse_file, "\n");
3879                 }
3880
3881               /* The original insn setting reg_used may or may not now be
3882                  deletable.  We leave the deletion to flow.  */
3883             }
3884
3885           /* Try to propagate a CONST_INT into a conditional jump.
3886              We're pretty specific about what we will handle in this
3887              code, we can extend this as necessary over time.
3888
3889              Right now the insn in question must look like
3890              (set (pc) (if_then_else ...))  */
3891           else if (alter_jumps
3892                    && GET_CODE (insn) == JUMP_INSN
3893                    && condjump_p (insn)
3894                    && ! simplejump_p (insn))
3895             changed |= cprop_jump (insn, copy_rtx (insn), reg_used, src);
3896 #ifdef HAVE_cc0
3897           /* Similar code for machines that use a pair of CC0 setter and
3898              conditional jump insn.  */
3899           else if (alter_jumps
3900                    && GET_CODE (PATTERN (insn)) == SET
3901                    && SET_DEST (PATTERN (insn)) == cc0_rtx
3902                    && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
3903                    && condjump_p (NEXT_INSN (insn))
3904                    && ! simplejump_p (NEXT_INSN (insn)))
3905             changed |= cprop_cc0_jump (insn, reg_used, src);
3906 #endif
3907         }
3908       else if (GET_CODE (src) == REG
3909                && REGNO (src) >= FIRST_PSEUDO_REGISTER
3910                && REGNO (src) != regno)
3911         {
3912           if (try_replace_reg (reg_used->reg_rtx, src, insn))
3913             {
3914               changed = 1;
3915               copy_prop_count++;
3916               if (gcse_file != NULL)
3917                 {
3918                   fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d",
3919                            regno, INSN_UID (insn));
3920                   fprintf (gcse_file, " with reg %d\n", REGNO (src));
3921                 }
3922
3923               /* The original insn setting reg_used may or may not now be
3924                  deletable.  We leave the deletion to flow.  */
3925               /* FIXME: If it turns out that the insn isn't deletable,
3926                  then we may have unnecessarily extended register lifetimes
3927                  and made things worse.  */
3928             }
3929         }
3930     }
3931
3932   return changed;
3933 }
3934
3935 /* Forward propagate copies.  This includes copies and constants.  Return
3936    non-zero if a change was made.  */
3937
3938 static int
3939 cprop (alter_jumps)
3940      int alter_jumps;
3941 {
3942   int bb, changed;
3943   rtx insn;
3944
3945   /* Note we start at block 1.  */
3946
3947   changed = 0;
3948   for (bb = 1; bb < n_basic_blocks; bb++)
3949     {
3950       /* Reset tables used to keep track of what's still valid [since the
3951          start of the block].  */
3952       reset_opr_set_tables ();
3953
3954       for (insn = BLOCK_HEAD (bb);
3955            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3956            insn = NEXT_INSN (insn))
3957         {
3958           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3959             {
3960               changed |= cprop_insn (insn, alter_jumps);
3961
3962               /* Keep track of everything modified by this insn.  */
3963               /* ??? Need to be careful w.r.t. mods done to INSN.  Don't
3964                  call mark_oprs_set if we turned the insn into a NOTE.  */
3965               if (GET_CODE (insn) != NOTE)
3966                 mark_oprs_set (insn);
3967             }
3968         }
3969     }
3970
3971   if (gcse_file != NULL)
3972     fprintf (gcse_file, "\n");
3973
3974   return changed;
3975 }
3976
3977 /* Perform one copy/constant propagation pass.
3978    F is the first insn in the function.
3979    PASS is the pass count.  */
3980
3981 static int
3982 one_cprop_pass (pass, alter_jumps)
3983      int pass;
3984      int alter_jumps;
3985 {
3986   int changed = 0;
3987
3988   const_prop_count = 0;
3989   copy_prop_count = 0;
3990
3991   alloc_set_hash_table (max_cuid);
3992   compute_set_hash_table ();
3993   if (gcse_file)
3994     dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
3995                      n_sets);
3996   if (n_sets > 0)
3997     {
3998       alloc_cprop_mem (n_basic_blocks, n_sets);
3999       compute_cprop_data ();
4000       changed = cprop (alter_jumps);
4001       free_cprop_mem ();
4002     }
4003
4004   free_set_hash_table ();
4005
4006   if (gcse_file)
4007     {
4008       fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, ",
4009                current_function_name, pass, bytes_used);
4010       fprintf (gcse_file, "%d const props, %d copy props\n\n",
4011                const_prop_count, copy_prop_count);
4012     }
4013
4014   return changed;
4015 }
4016 \f
4017 /* Compute PRE+LCM working variables.  */
4018
4019 /* Local properties of expressions.  */
4020 /* Nonzero for expressions that are transparent in the block.  */
4021 static sbitmap *transp;
4022
4023 /* Nonzero for expressions that are transparent at the end of the block.
4024    This is only zero for expressions killed by abnormal critical edge
4025    created by a calls.  */
4026 static sbitmap *transpout;
4027
4028 /* Nonzero for expressions that are computed (available) in the block.  */
4029 static sbitmap *comp;
4030
4031 /* Nonzero for expressions that are locally anticipatable in the block.  */
4032 static sbitmap *antloc;
4033
4034 /* Nonzero for expressions where this block is an optimal computation
4035    point.  */
4036 static sbitmap *pre_optimal;
4037
4038 /* Nonzero for expressions which are redundant in a particular block.  */
4039 static sbitmap *pre_redundant;
4040
4041 /* Nonzero for expressions which should be inserted on a specific edge.  */
4042 static sbitmap *pre_insert_map;
4043
4044 /* Nonzero for expressions which should be deleted in a specific block.  */
4045 static sbitmap *pre_delete_map;
4046
4047 /* Contains the edge_list returned by pre_edge_lcm.  */
4048 static struct edge_list *edge_list;
4049
4050 static sbitmap *temp_bitmap;
4051
4052 /* Redundant insns.  */
4053 static sbitmap pre_redundant_insns;
4054
4055 /* Allocate vars used for PRE analysis.  */
4056
4057 static void
4058 alloc_pre_mem (n_blocks, n_exprs)
4059      int n_blocks, n_exprs;
4060 {
4061   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4062   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4063   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4064   temp_bitmap = sbitmap_vector_alloc (n_blocks, n_exprs);
4065
4066   pre_optimal = NULL;
4067   pre_redundant = NULL;
4068   pre_insert_map = NULL;
4069   pre_delete_map = NULL;
4070   ae_in = NULL;
4071   ae_out = NULL;
4072   u_bitmap = NULL;
4073   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
4074   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
4075
4076   /* pre_insert and pre_delete are allocated later.  */
4077 }
4078
4079 /* Free vars used for PRE analysis.  */
4080
4081 static void
4082 free_pre_mem ()
4083 {
4084   free (transp);
4085   free (comp);
4086   free (antloc);
4087   free (temp_bitmap);
4088
4089   if (pre_optimal)
4090     free (pre_optimal);
4091   if (pre_redundant)
4092     free (pre_redundant);
4093   if (pre_insert_map)
4094     free (pre_insert_map);
4095   if (pre_delete_map)
4096     free (pre_delete_map);
4097   if (transpout)
4098     free (transpout);
4099
4100   if (ae_in)
4101     free (ae_in);
4102   if (ae_out)
4103     free (ae_out);
4104   if (ae_kill)
4105     free (ae_kill);
4106   if (u_bitmap)
4107     free (u_bitmap);
4108
4109   transp = comp = antloc = NULL;
4110   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
4111   transpout = ae_in = ae_out = ae_kill = NULL;
4112   u_bitmap = NULL;
4113
4114 }
4115
4116 /* Top level routine to do the dataflow analysis needed by PRE.  */
4117
4118 static void
4119 compute_pre_data ()
4120 {
4121   int i;
4122
4123   compute_local_properties (transp, comp, antloc, 0);
4124   compute_transpout ();
4125   sbitmap_vector_zero (ae_kill, n_basic_blocks);
4126
4127   /* Compute ae_kill for each basic block using:
4128
4129      ~(TRANSP | COMP)
4130
4131      This is significantly after than compute_ae_kill.  */
4132
4133   for (i = 0; i < n_basic_blocks; i++)
4134     {
4135       sbitmap_a_or_b (ae_kill[i], transp[i], comp[i]);
4136       sbitmap_not (ae_kill[i], ae_kill[i]);
4137     }
4138
4139   edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
4140                             ae_kill, &pre_insert_map, &pre_delete_map);
4141 }
4142 \f
4143 /* PRE utilities */
4144
4145 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4146    block BB.
4147
4148    VISITED is a pointer to a working buffer for tracking which BB's have
4149    been visited.  It is NULL for the top-level call.
4150
4151    We treat reaching expressions that go through blocks containing the same
4152    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
4153    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4154    2 as not reaching.  The intent is to improve the probability of finding
4155    only one reaching expression and to reduce register lifetimes by picking
4156    the closest such expression.  */
4157
4158 static int
4159 pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited)
4160      int occr_bb;
4161      struct expr *expr;
4162      int bb;
4163      char *visited;
4164 {
4165   edge pred;
4166
4167   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
4168     {
4169       int pred_bb = pred->src->index;
4170
4171       if (pred->src == ENTRY_BLOCK_PTR
4172           /* Has predecessor has already been visited?  */
4173           || visited[pred_bb])
4174         ;/* Nothing to do.  */
4175
4176       /* Does this predecessor generate this expression?  */
4177       else if (TEST_BIT (comp[pred_bb], expr->bitmap_index))
4178         {
4179           /* Is this the occurrence we're looking for?
4180              Note that there's only one generating occurrence per block
4181              so we just need to check the block number.  */
4182           if (occr_bb == pred_bb)
4183             return 1;
4184
4185           visited[pred_bb] = 1;
4186         }
4187       /* Ignore this predecessor if it kills the expression.  */
4188       else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
4189         visited[pred_bb] = 1;
4190
4191       /* Neither gen nor kill.  */
4192       else
4193         {
4194           visited[pred_bb] = 1;
4195           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
4196             return 1;
4197         }
4198     }
4199
4200   /* All paths have been checked.  */
4201   return 0;
4202 }
4203
4204 /* The wrapper for pre_expr_reaches_here_work that ensures that any
4205    memory allocated for that function is returned. */
4206
4207 static int
4208 pre_expr_reaches_here_p (occr_bb, expr, bb)
4209      int occr_bb;
4210      struct expr *expr;
4211      int bb;
4212 {
4213   int rval;
4214   char *visited = (char *) xcalloc (n_basic_blocks, 1);
4215
4216   rval = pre_expr_reaches_here_p_work(occr_bb, expr, bb, visited);
4217
4218   free (visited);
4219   return rval;
4220 }
4221 \f
4222
4223 /* Given an expr, generate RTL which we can insert at the end of a BB,
4224    or on an edge.  Set the block number of any insns generated to
4225    the value of BB.  */
4226
4227 static rtx
4228 process_insert_insn (expr)
4229      struct expr *expr;
4230 {
4231   rtx reg = expr->reaching_reg;
4232   rtx pat, copied_expr;
4233   rtx first_new_insn;
4234
4235   start_sequence ();
4236   copied_expr = copy_rtx (expr->expr);
4237   emit_move_insn (reg, copied_expr);
4238   first_new_insn = get_insns ();
4239   pat = gen_sequence ();
4240   end_sequence ();
4241
4242   return pat;
4243 }
4244
4245 /* Add EXPR to the end of basic block BB.
4246
4247    This is used by both the PRE and code hoisting.
4248
4249    For PRE, we want to verify that the expr is either transparent
4250    or locally anticipatable in the target block.  This check makes
4251    no sense for code hoisting.  */
4252
4253 static void
4254 insert_insn_end_bb (expr, bb, pre)
4255      struct expr *expr;
4256      int bb;
4257      int pre;
4258 {
4259   rtx insn = BLOCK_END (bb);
4260   rtx new_insn;
4261   rtx reg = expr->reaching_reg;
4262   int regno = REGNO (reg);
4263   rtx pat;
4264   int i;
4265
4266   pat = process_insert_insn (expr);
4267
4268   /* If the last insn is a jump, insert EXPR in front [taking care to
4269      handle cc0, etc. properly].  */
4270
4271   if (GET_CODE (insn) == JUMP_INSN)
4272     {
4273 #ifdef HAVE_cc0
4274       rtx note;
4275 #endif
4276
4277       /* If this is a jump table, then we can't insert stuff here.  Since
4278          we know the previous real insn must be the tablejump, we insert
4279          the new instruction just before the tablejump.  */
4280       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4281           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4282         insn = prev_real_insn (insn);
4283
4284 #ifdef HAVE_cc0
4285       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4286          if cc0 isn't set.  */
4287       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4288       if (note)
4289         insn = XEXP (note, 0);
4290       else
4291         {
4292           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4293           if (maybe_cc0_setter
4294               && GET_RTX_CLASS (GET_CODE (maybe_cc0_setter)) == 'i'
4295               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4296             insn = maybe_cc0_setter;
4297         }
4298 #endif
4299       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
4300       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4301     }
4302
4303   /* Likewise if the last insn is a call, as will happen in the presence
4304      of exception handling.  */
4305   else if (GET_CODE (insn) == CALL_INSN)
4306     {
4307       HARD_REG_SET parm_regs;
4308       int nparm_regs;
4309       rtx p;
4310
4311       /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4312          we search backward and place the instructions before the first
4313          parameter is loaded.  Do this for everyone for consistency and a
4314          presumtion that we'll get better code elsewhere as well.
4315
4316          It should always be the case that we can put these instructions
4317          anywhere in the basic block with performing PRE optimizations.
4318          Check this.  */
4319
4320       if (pre
4321           && !TEST_BIT (antloc[bb], expr->bitmap_index)
4322           && !TEST_BIT (transp[bb], expr->bitmap_index))
4323         abort ();
4324
4325       /* Since different machines initialize their parameter registers
4326          in different orders, assume nothing.  Collect the set of all
4327          parameter registers.  */
4328       CLEAR_HARD_REG_SET (parm_regs);
4329       nparm_regs = 0;
4330       for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4331         if (GET_CODE (XEXP (p, 0)) == USE
4332             && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4333           {
4334             if (REGNO (XEXP (XEXP (p, 0), 0)) >= FIRST_PSEUDO_REGISTER)
4335               abort ();
4336
4337             SET_HARD_REG_BIT (parm_regs, REGNO (XEXP (XEXP (p, 0), 0)));
4338             nparm_regs++;
4339           }
4340
4341       /* Search backward for the first set of a register in this set.  */
4342       while (nparm_regs && BLOCK_HEAD (bb) != insn)
4343         {
4344           insn = PREV_INSN (insn);
4345           p = single_set (insn);
4346           if (p && GET_CODE (SET_DEST (p)) == REG
4347               && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4348               && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4349             {
4350               CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4351               nparm_regs--;
4352             }
4353         }
4354
4355       /* If we found all the parameter loads, then we want to insert
4356          before the first parameter load.
4357
4358          If we did not find all the parameter loads, then we might have
4359          stopped on the head of the block, which could be a CODE_LABEL.
4360          If we inserted before the CODE_LABEL, then we would be putting
4361          the insn in the wrong basic block.  In that case, put the insn
4362          after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
4363       while (GET_CODE (insn) == CODE_LABEL
4364              || (GET_CODE (insn) == NOTE
4365                  && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK))
4366         insn = NEXT_INSN (insn);
4367
4368       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4369     }
4370   else
4371     {
4372       new_insn = emit_insn_after (pat, insn);
4373       BLOCK_END (bb) = new_insn;
4374     }
4375
4376   /* Keep block number table up to date.
4377      Note, PAT could be a multiple insn sequence, we have to make
4378      sure that each insn in the sequence is handled.  */
4379   if (GET_CODE (pat) == SEQUENCE)
4380     {
4381       for (i = 0; i < XVECLEN (pat, 0); i++)
4382         {
4383           rtx insn = XVECEXP (pat, 0, i);
4384
4385           set_block_num (insn, bb);
4386           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4387             add_label_notes (PATTERN (insn), new_insn);
4388
4389           note_stores (PATTERN (insn), record_set_info, insn);
4390         }
4391     }
4392   else
4393     {
4394       add_label_notes (SET_SRC (pat), new_insn);
4395       set_block_num (new_insn, bb);
4396
4397       /* Keep register set table up to date.  */
4398       record_one_set (regno, new_insn);
4399     }
4400
4401   gcse_create_count++;
4402
4403   if (gcse_file)
4404     {
4405       fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, ",
4406                bb, INSN_UID (new_insn));
4407       fprintf (gcse_file, "copying expression %d to reg %d\n",
4408                expr->bitmap_index, regno);
4409     }
4410 }
4411
4412 /* Insert partially redundant expressions on edges in the CFG to make
4413    the expressions fully redundant.  */
4414
4415 static int
4416 pre_edge_insert (edge_list, index_map)
4417      struct edge_list *edge_list;
4418      struct expr **index_map;
4419 {
4420   int e, i, j, num_edges, set_size, did_insert = 0;
4421   sbitmap *inserted;
4422
4423   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4424      if it reaches any of the deleted expressions.  */
4425
4426   set_size = pre_insert_map[0]->size;
4427   num_edges = NUM_EDGES (edge_list);
4428   inserted = sbitmap_vector_alloc (num_edges, n_exprs);
4429   sbitmap_vector_zero (inserted, num_edges);
4430
4431   for (e = 0; e < num_edges; e++)
4432     {
4433       int indx;
4434       basic_block pred = INDEX_EDGE_PRED_BB (edge_list, e);
4435       int bb = pred->index;
4436
4437       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4438         {
4439           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
4440
4441           for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
4442             if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4443               {
4444                 struct expr *expr = index_map[j];
4445                 struct occr *occr;
4446
4447                 /* Now look at each deleted occurence of this expression.  */
4448                 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4449                   {
4450                     if (! occr->deleted_p)
4451                       continue;
4452
4453                     /* Insert this expression on this edge if if it would
4454                        reach the deleted occurence in BB.  */
4455                     if (!TEST_BIT (inserted[e], j))
4456                       {
4457                         rtx insn;
4458                         edge eg = INDEX_EDGE (edge_list, e);
4459
4460                         /* We can't insert anything on an abnormal and
4461                            critical edge, so we insert the insn at the end of
4462                            the previous block. There are several alternatives
4463                            detailed in Morgans book P277 (sec 10.5) for
4464                            handling this situation.  This one is easiest for
4465                            now.  */
4466
4467                         if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
4468                           insert_insn_end_bb (index_map[j], bb, 0);
4469                         else
4470                           {
4471                             insn = process_insert_insn (index_map[j]);
4472                             insert_insn_on_edge (insn, eg);
4473                           }
4474
4475                         if (gcse_file)
4476                           {
4477                             fprintf (gcse_file, "PRE/HOIST: edge (%d,%d), ",
4478                                      bb,
4479                                      INDEX_EDGE_SUCC_BB (edge_list, e)->index);
4480                             fprintf (gcse_file, "copy expression %d\n",
4481                                      expr->bitmap_index);
4482                           }
4483
4484                         SET_BIT (inserted[e], j);
4485                         did_insert = 1;
4486                         gcse_create_count++;
4487                       }
4488                   }
4489               }
4490         }
4491     }
4492
4493   free (inserted);
4494   return did_insert;
4495 }
4496
4497 /* Copy the result of INSN to REG.  INDX is the expression number.  */
4498
4499 static void
4500 pre_insert_copy_insn (expr, insn)
4501      struct expr *expr;
4502      rtx insn;
4503 {
4504   rtx reg = expr->reaching_reg;
4505   int regno = REGNO (reg);
4506   int indx = expr->bitmap_index;
4507   rtx set = single_set (insn);
4508   rtx new_insn;
4509   int bb = BLOCK_NUM (insn);
4510
4511   if (!set)
4512     abort ();
4513
4514   new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4515                               insn);
4516
4517   /* Keep block number table up to date.  */
4518   set_block_num (new_insn, bb);
4519
4520   /* Keep register set table up to date.  */
4521   record_one_set (regno, new_insn);
4522   if (insn == BLOCK_END (bb))
4523     BLOCK_END (bb) = new_insn;
4524
4525   gcse_create_count++;
4526
4527   if (gcse_file)
4528     fprintf (gcse_file,
4529              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4530               BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4531               INSN_UID (insn), regno);
4532 }
4533
4534 /* Copy available expressions that reach the redundant expression
4535    to `reaching_reg'.  */
4536
4537 static void
4538 pre_insert_copies ()
4539 {
4540   unsigned int i;
4541   struct expr *expr;
4542   struct occr *occr;
4543   struct occr *avail;
4544
4545   /* For each available expression in the table, copy the result to
4546      `reaching_reg' if the expression reaches a deleted one.
4547
4548      ??? The current algorithm is rather brute force.
4549      Need to do some profiling.  */
4550
4551   for (i = 0; i < expr_hash_table_size; i++)
4552     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4553       {
4554         /* If the basic block isn't reachable, PPOUT will be TRUE.  However,
4555            we don't want to insert a copy here because the expression may not
4556            really be redundant.  So only insert an insn if the expression was
4557            deleted.  This test also avoids further processing if the
4558            expression wasn't deleted anywhere.  */
4559         if (expr->reaching_reg == NULL)
4560           continue;
4561
4562         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4563           {
4564             if (! occr->deleted_p)
4565               continue;
4566
4567             for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4568               {
4569                 rtx insn = avail->insn;
4570
4571                 /* No need to handle this one if handled already.  */
4572                 if (avail->copied_p)
4573                   continue;
4574
4575                 /* Don't handle this one if it's a redundant one.  */
4576                 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4577                   continue;
4578
4579                 /* Or if the expression doesn't reach the deleted one.  */
4580                 if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
4581                                                BLOCK_NUM (occr->insn)))
4582                   continue;
4583
4584                 /* Copy the result of avail to reaching_reg.  */
4585                 pre_insert_copy_insn (expr, insn);
4586                 avail->copied_p = 1;
4587               }
4588           }
4589       }
4590 }
4591
4592 /* Delete redundant computations.
4593    Deletion is done by changing the insn to copy the `reaching_reg' of
4594    the expression into the result of the SET.  It is left to later passes
4595    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4596
4597    Returns non-zero if a change is made.  */
4598
4599 static int
4600 pre_delete ()
4601 {
4602   unsigned int i;
4603   int bb, changed;
4604   struct expr *expr;
4605   struct occr *occr;
4606
4607   /* Compute the expressions which are redundant and need to be replaced by
4608      copies from the reaching reg to the target reg.  */
4609   for (bb = 0; bb < n_basic_blocks; bb++)
4610     sbitmap_copy (temp_bitmap[bb], pre_delete_map[bb]);
4611
4612   changed = 0;
4613   for (i = 0; i < expr_hash_table_size; i++)
4614     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4615       {
4616         int indx = expr->bitmap_index;
4617
4618         /* We only need to search antic_occr since we require
4619            ANTLOC != 0.  */
4620
4621         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4622           {
4623             rtx insn = occr->insn;
4624             rtx set;
4625             int bb = BLOCK_NUM (insn);
4626
4627             if (TEST_BIT (temp_bitmap[bb], indx))
4628               {
4629                 set = single_set (insn);
4630                 if (! set)
4631                   abort ();
4632
4633                 /* Create a pseudo-reg to store the result of reaching
4634                    expressions into.  Get the mode for the new pseudo from
4635                    the mode of the original destination pseudo.  */
4636                 if (expr->reaching_reg == NULL)
4637                   expr->reaching_reg
4638                     = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4639
4640                 /* In theory this should never fail since we're creating
4641                    a reg->reg copy.
4642
4643                    However, on the x86 some of the movXX patterns actually
4644                    contain clobbers of scratch regs.  This may cause the
4645                    insn created by validate_change to not match any pattern
4646                    and thus cause validate_change to fail.   */
4647                 if (validate_change (insn, &SET_SRC (set),
4648                                      expr->reaching_reg, 0))
4649                   {
4650                     occr->deleted_p = 1;
4651                     SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4652                     changed = 1;
4653                     gcse_subst_count++;
4654                   }
4655
4656                 if (gcse_file)
4657                   {
4658                     fprintf (gcse_file,
4659                              "PRE: redundant insn %d (expression %d) in ",
4660                                INSN_UID (insn), indx);
4661                     fprintf (gcse_file, "bb %d, reaching reg is %d\n",
4662                              bb, REGNO (expr->reaching_reg));
4663                   }
4664               }
4665           }
4666       }
4667
4668   return changed;
4669 }
4670
4671 /* Perform GCSE optimizations using PRE.
4672    This is called by one_pre_gcse_pass after all the dataflow analysis
4673    has been done.
4674
4675    This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
4676    lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
4677    Compiler Design and Implementation.
4678
4679    ??? A new pseudo reg is created to hold the reaching expression.  The nice
4680    thing about the classical approach is that it would try to use an existing
4681    reg.  If the register can't be adequately optimized [i.e. we introduce
4682    reload problems], one could add a pass here to propagate the new register
4683    through the block.
4684
4685    ??? We don't handle single sets in PARALLELs because we're [currently] not
4686    able to copy the rest of the parallel when we insert copies to create full
4687    redundancies from partial redundancies.  However, there's no reason why we
4688    can't handle PARALLELs in the cases where there are no partial
4689    redundancies.  */
4690
4691 static int
4692 pre_gcse ()
4693 {
4694   unsigned int i;
4695   int did_insert, changed;
4696   struct expr **index_map;
4697   struct expr *expr;
4698
4699   /* Compute a mapping from expression number (`bitmap_index') to
4700      hash table entry.  */
4701
4702   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
4703   for (i = 0; i < expr_hash_table_size; i++)
4704     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4705       index_map[expr->bitmap_index] = expr;
4706
4707   /* Reset bitmap used to track which insns are redundant.  */
4708   pre_redundant_insns = sbitmap_alloc (max_cuid);
4709   sbitmap_zero (pre_redundant_insns);
4710
4711   /* Delete the redundant insns first so that
4712      - we know what register to use for the new insns and for the other
4713        ones with reaching expressions
4714      - we know which insns are redundant when we go to create copies  */
4715
4716   changed = pre_delete ();
4717
4718   did_insert = pre_edge_insert (edge_list, index_map);
4719
4720   /* In other places with reaching expressions, copy the expression to the
4721      specially allocated pseudo-reg that reaches the redundant expr.  */
4722   pre_insert_copies ();
4723   if (did_insert)
4724     {
4725       commit_edge_insertions ();
4726       changed = 1;
4727     }
4728
4729   free (index_map);
4730   free (pre_redundant_insns);
4731   return changed;
4732 }
4733
4734 /* Top level routine to perform one PRE GCSE pass.
4735
4736    Return non-zero if a change was made.  */
4737
4738 static int
4739 one_pre_gcse_pass (pass)
4740      int pass;
4741 {
4742   int changed = 0;
4743
4744   gcse_subst_count = 0;
4745   gcse_create_count = 0;
4746
4747   alloc_expr_hash_table (max_cuid);
4748   add_noreturn_fake_exit_edges ();
4749   compute_expr_hash_table ();
4750   if (gcse_file)
4751     dump_hash_table (gcse_file, "Expression", expr_hash_table,
4752                      expr_hash_table_size, n_exprs);
4753
4754   if (n_exprs > 0)
4755     {
4756       alloc_pre_mem (n_basic_blocks, n_exprs);
4757       compute_pre_data ();
4758       changed |= pre_gcse ();
4759       free_edge_list (edge_list);
4760       free_pre_mem ();
4761     }
4762
4763   remove_fake_edges ();
4764   free_expr_hash_table ();
4765
4766   if (gcse_file)
4767     {
4768       fprintf (gcse_file, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
4769                current_function_name, pass, bytes_used);
4770       fprintf (gcse_file, "%d substs, %d insns created\n",
4771                gcse_subst_count, gcse_create_count);
4772     }
4773
4774   return changed;
4775 }
4776 \f
4777 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4778    We have to add REG_LABEL notes, because the following loop optimization
4779    pass requires them.  */
4780
4781 /* ??? This is very similar to the loop.c add_label_notes function.  We
4782    could probably share code here.  */
4783
4784 /* ??? If there was a jump optimization pass after gcse and before loop,
4785    then we would not need to do this here, because jump would add the
4786    necessary REG_LABEL notes.  */
4787
4788 static void
4789 add_label_notes (x, insn)
4790      rtx x;
4791      rtx insn;
4792 {
4793   enum rtx_code code = GET_CODE (x);
4794   int i, j;
4795   const char *fmt;
4796
4797   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4798     {
4799       /* This code used to ignore labels that referred to dispatch tables to
4800          avoid flow generating (slighly) worse code.
4801
4802          We no longer ignore such label references (see LABEL_REF handling in
4803          mark_jump_label for additional information).  */
4804
4805       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4806                                             REG_NOTES (insn));
4807       return;
4808     }
4809
4810   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
4811     {
4812       if (fmt[i] == 'e')
4813         add_label_notes (XEXP (x, i), insn);
4814       else if (fmt[i] == 'E')
4815         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4816           add_label_notes (XVECEXP (x, i, j), insn);
4817     }
4818 }
4819
4820 /* Compute transparent outgoing information for each block.
4821
4822    An expression is transparent to an edge unless it is killed by
4823    the edge itself.  This can only happen with abnormal control flow,
4824    when the edge is traversed through a call.  This happens with
4825    non-local labels and exceptions.
4826
4827    This would not be necessary if we split the edge.  While this is
4828    normally impossible for abnormal critical edges, with some effort
4829    it should be possible with exception handling, since we still have
4830    control over which handler should be invoked.  But due to increased
4831    EH table sizes, this may not be worthwhile.  */
4832
4833 static void
4834 compute_transpout ()
4835 {
4836   int bb;
4837   unsigned int i;
4838   struct expr *expr;
4839
4840   sbitmap_vector_ones (transpout, n_basic_blocks);
4841
4842   for (bb = 0; bb < n_basic_blocks; ++bb)
4843     {
4844       /* Note that flow inserted a nop a the end of basic blocks that
4845          end in call instructions for reasons other than abnormal
4846          control flow.  */
4847       if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4848         continue;
4849
4850       for (i = 0; i < expr_hash_table_size; i++)
4851         for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4852           if (GET_CODE (expr->expr) == MEM)
4853             {
4854               if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
4855                   && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
4856                 continue;
4857
4858               /* ??? Optimally, we would use interprocedural alias
4859                  analysis to determine if this mem is actually killed
4860                  by this call.  */
4861               RESET_BIT (transpout[bb], expr->bitmap_index);
4862             }
4863     }
4864 }
4865
4866 /* Removal of useless null pointer checks */
4867
4868 /* Called via note_stores.  X is set by SETTER.  If X is a register we must
4869    invalidate nonnull_local and set nonnull_killed.  DATA is really a
4870    `null_pointer_info *'.
4871
4872    We ignore hard registers.  */
4873
4874 static void
4875 invalidate_nonnull_info (x, setter, data)
4876      rtx x;
4877      rtx setter ATTRIBUTE_UNUSED;
4878      void *data;
4879 {
4880   unsigned int regno;
4881   struct null_pointer_info *npi = (struct null_pointer_info *) data;
4882
4883   while (GET_CODE (x) == SUBREG)
4884     x = SUBREG_REG (x);
4885
4886   /* Ignore anything that is not a register or is a hard register.  */
4887   if (GET_CODE (x) != REG
4888       || REGNO (x) < npi->min_reg
4889       || REGNO (x) >= npi->max_reg)
4890     return;
4891
4892   regno = REGNO (x) - npi->min_reg;
4893
4894   RESET_BIT (npi->nonnull_local[npi->current_block], regno);
4895   SET_BIT (npi->nonnull_killed[npi->current_block], regno);
4896 }
4897
4898 /* Do null-pointer check elimination for the registers indicated in
4899    NPI.  NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
4900    they are not our responsibility to free.  */
4901
4902 static void
4903 delete_null_pointer_checks_1 (block_reg, nonnull_avin, nonnull_avout, npi)
4904      unsigned int *block_reg;
4905      sbitmap *nonnull_avin;
4906      sbitmap *nonnull_avout;
4907      struct null_pointer_info *npi;
4908 {
4909   int bb;
4910   int current_block;
4911   sbitmap *nonnull_local = npi->nonnull_local;
4912   sbitmap *nonnull_killed = npi->nonnull_killed;
4913
4914   /* Compute local properties, nonnull and killed.  A register will have
4915      the nonnull property if at the end of the current block its value is
4916      known to be nonnull.  The killed property indicates that somewhere in
4917      the block any information we had about the register is killed.
4918
4919      Note that a register can have both properties in a single block.  That
4920      indicates that it's killed, then later in the block a new value is
4921      computed.  */
4922   sbitmap_vector_zero (nonnull_local, n_basic_blocks);
4923   sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
4924
4925   for (current_block = 0; current_block < n_basic_blocks; current_block++)
4926     {
4927       rtx insn, stop_insn;
4928
4929       /* Set the current block for invalidate_nonnull_info.  */
4930       npi->current_block = current_block;
4931
4932       /* Scan each insn in the basic block looking for memory references and
4933          register sets.  */
4934       stop_insn = NEXT_INSN (BLOCK_END (current_block));
4935       for (insn = BLOCK_HEAD (current_block);
4936            insn != stop_insn;
4937            insn = NEXT_INSN (insn))
4938         {
4939           rtx set;
4940           rtx reg;
4941
4942           /* Ignore anything that is not a normal insn.  */
4943           if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
4944             continue;
4945
4946           /* Basically ignore anything that is not a simple SET.  We do have
4947              to make sure to invalidate nonnull_local and set nonnull_killed
4948              for such insns though.  */
4949           set = single_set (insn);
4950           if (!set)
4951             {
4952               note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
4953               continue;
4954             }
4955
4956           /* See if we've got a useable memory load.  We handle it first
4957              in case it uses its address register as a dest (which kills
4958              the nonnull property).  */
4959           if (GET_CODE (SET_SRC (set)) == MEM
4960               && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
4961               && REGNO (reg) >= npi->min_reg
4962               && REGNO (reg) < npi->max_reg)
4963             SET_BIT (nonnull_local[current_block],
4964                      REGNO (reg) - npi->min_reg);
4965
4966           /* Now invalidate stuff clobbered by this insn.  */
4967           note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
4968
4969           /* And handle stores, we do these last since any sets in INSN can
4970              not kill the nonnull property if it is derived from a MEM
4971              appearing in a SET_DEST.  */
4972           if (GET_CODE (SET_DEST (set)) == MEM
4973               && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
4974               && REGNO (reg) >= npi->min_reg
4975               && REGNO (reg) < npi->max_reg)
4976             SET_BIT (nonnull_local[current_block],
4977                      REGNO (reg) - npi->min_reg);
4978         }
4979     }
4980
4981   /* Now compute global properties based on the local properties.   This
4982      is a classic global availablity algorithm.  */
4983   compute_available (nonnull_local, nonnull_killed,
4984                      nonnull_avout, nonnull_avin);
4985
4986   /* Now look at each bb and see if it ends with a compare of a value
4987      against zero.  */
4988   for (bb = 0; bb < n_basic_blocks; bb++)
4989     {
4990       rtx last_insn = BLOCK_END (bb);
4991       rtx condition, earliest;
4992       int compare_and_branch;
4993
4994       /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
4995          since BLOCK_REG[BB] is zero if this block did not end with a
4996          comparison against zero, this condition works.  */
4997       if (block_reg[bb] < npi->min_reg
4998           || block_reg[bb] >= npi->max_reg)
4999         continue;
5000
5001       /* LAST_INSN is a conditional jump.  Get its condition.  */
5002       condition = get_condition (last_insn, &earliest);
5003
5004       /* If we can't determine the condition then skip.  */
5005       if (! condition)
5006         continue;
5007
5008       /* Is the register known to have a nonzero value?  */
5009       if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
5010         continue;
5011
5012       /* Try to compute whether the compare/branch at the loop end is one or
5013          two instructions.  */
5014       if (earliest == last_insn)
5015         compare_and_branch = 1;
5016       else if (earliest == prev_nonnote_insn (last_insn))
5017         compare_and_branch = 2;
5018       else
5019         continue;
5020
5021       /* We know the register in this comparison is nonnull at exit from
5022          this block.  We can optimize this comparison.  */
5023       if (GET_CODE (condition) == NE)
5024         {
5025           rtx new_jump;
5026
5027           new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
5028                                             last_insn);
5029           JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5030           LABEL_NUSES (JUMP_LABEL (new_jump))++;
5031           emit_barrier_after (new_jump);
5032         }
5033       delete_insn (last_insn);
5034       if (compare_and_branch == 2)
5035         delete_insn (earliest);
5036
5037       /* Don't check this block again.  (Note that BLOCK_END is
5038          invalid here; we deleted the last instruction in the
5039          block.)  */
5040       block_reg[bb] = 0;
5041     }
5042 }
5043
5044 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5045    at compile time.
5046
5047    This is conceptually similar to global constant/copy propagation and
5048    classic global CSE (it even uses the same dataflow equations as cprop).
5049
5050    If a register is used as memory address with the form (mem (reg)), then we
5051    know that REG can not be zero at that point in the program.  Any instruction
5052    which sets REG "kills" this property.
5053
5054    So, if every path leading to a conditional branch has an available memory
5055    reference of that form, then we know the register can not have the value
5056    zero at the conditional branch.
5057
5058    So we merely need to compute the local properies and propagate that data
5059    around the cfg, then optimize where possible.
5060
5061    We run this pass two times.  Once before CSE, then again after CSE.  This
5062    has proven to be the most profitable approach.  It is rare for new
5063    optimization opportunities of this nature to appear after the first CSE
5064    pass.
5065
5066    This could probably be integrated with global cprop with a little work.  */
5067
5068 void
5069 delete_null_pointer_checks (f)
5070      rtx f ATTRIBUTE_UNUSED;
5071 {
5072   sbitmap *nonnull_avin, *nonnull_avout;
5073   unsigned int *block_reg;
5074   int bb;
5075   int reg;
5076   int regs_per_pass;
5077   int max_reg;
5078   struct null_pointer_info npi;
5079
5080   /* If we have only a single block, then there's nothing to do.  */
5081   if (n_basic_blocks <= 1)
5082     return;
5083
5084   /* Trying to perform global optimizations on flow graphs which have
5085      a high connectivity will take a long time and is unlikely to be
5086      particularly useful.
5087
5088      In normal circumstances a cfg should have about twice has many edges
5089      as blocks.  But we do not want to punish small functions which have
5090      a couple switch statements.  So we require a relatively large number
5091      of basic blocks and the ratio of edges to blocks to be high.  */
5092   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
5093     return;
5094
5095   /* We need four bitmaps, each with a bit for each register in each
5096      basic block.  */
5097   max_reg = max_reg_num ();
5098   regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
5099
5100   /* Allocate bitmaps to hold local and global properties.  */
5101   npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5102   npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5103   nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5104   nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5105
5106   /* Go through the basic blocks, seeing whether or not each block
5107      ends with a conditional branch whose condition is a comparison
5108      against zero.  Record the register compared in BLOCK_REG.  */
5109   block_reg = (unsigned int *) xcalloc (n_basic_blocks, sizeof (int));
5110   for (bb = 0; bb < n_basic_blocks; bb++)
5111     {
5112       rtx last_insn = BLOCK_END (bb);
5113       rtx condition, earliest, reg;
5114
5115       /* We only want conditional branches.  */
5116       if (GET_CODE (last_insn) != JUMP_INSN
5117           || !any_condjump_p (last_insn)
5118           || !onlyjump_p (last_insn))
5119         continue;
5120
5121       /* LAST_INSN is a conditional jump.  Get its condition.  */
5122       condition = get_condition (last_insn, &earliest);
5123
5124       /* If we were unable to get the condition, or it is not a equality
5125          comparison against zero then there's nothing we can do.  */
5126       if (!condition
5127           || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5128           || GET_CODE (XEXP (condition, 1)) != CONST_INT
5129           || (XEXP (condition, 1)
5130               != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
5131         continue;
5132
5133       /* We must be checking a register against zero.  */
5134       reg = XEXP (condition, 0);
5135       if (GET_CODE (reg) != REG)
5136         continue;
5137
5138       block_reg[bb] = REGNO (reg);
5139     }
5140
5141   /* Go through the algorithm for each block of registers.  */
5142   for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
5143     {
5144       npi.min_reg = reg;
5145       npi.max_reg = MIN (reg + regs_per_pass, max_reg);
5146       delete_null_pointer_checks_1 (block_reg, nonnull_avin,
5147                                     nonnull_avout, &npi);
5148     }
5149
5150   /* Free the table of registers compared at the end of every block.  */
5151   free (block_reg);
5152
5153   /* Free bitmaps.  */
5154   free (npi.nonnull_local);
5155   free (npi.nonnull_killed);
5156   free (nonnull_avin);
5157   free (nonnull_avout);
5158 }
5159
5160 /* Code Hoisting variables and subroutines.  */
5161
5162 /* Very busy expressions.  */
5163 static sbitmap *hoist_vbein;
5164 static sbitmap *hoist_vbeout;
5165
5166 /* Hoistable expressions.  */
5167 static sbitmap *hoist_exprs;
5168
5169 /* Dominator bitmaps.  */
5170 static sbitmap *dominators;
5171
5172 /* ??? We could compute post dominators and run this algorithm in
5173    reverse to to perform tail merging, doing so would probably be
5174    more effective than the tail merging code in jump.c.
5175
5176    It's unclear if tail merging could be run in parallel with
5177    code hoisting.  It would be nice.  */
5178
5179 /* Allocate vars used for code hoisting analysis.  */
5180
5181 static void
5182 alloc_code_hoist_mem (n_blocks, n_exprs)
5183      int n_blocks, n_exprs;
5184 {
5185   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5186   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5187   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5188
5189   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5190   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5191   hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5192   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5193
5194   dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5195 }
5196
5197 /* Free vars used for code hoisting analysis.  */
5198
5199 static void
5200 free_code_hoist_mem ()
5201 {
5202   free (antloc);
5203   free (transp);
5204   free (comp);
5205
5206   free (hoist_vbein);
5207   free (hoist_vbeout);
5208   free (hoist_exprs);
5209   free (transpout);
5210
5211   free (dominators);
5212 }
5213
5214 /* Compute the very busy expressions at entry/exit from each block.
5215
5216    An expression is very busy if all paths from a given point
5217    compute the expression.  */
5218
5219 static void
5220 compute_code_hoist_vbeinout ()
5221 {
5222   int bb, changed, passes;
5223
5224   sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5225   sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5226
5227   passes = 0;
5228   changed = 1;
5229
5230   while (changed)
5231     {
5232       changed = 0;
5233
5234       /* We scan the blocks in the reverse order to speed up
5235          the convergence.  */
5236       for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5237         {
5238           changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5239                                            hoist_vbeout[bb], transp[bb]);
5240           if (bb != n_basic_blocks - 1)
5241             sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
5242         }
5243
5244       passes++;
5245     }
5246
5247   if (gcse_file)
5248     fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5249 }
5250
5251 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
5252
5253 static void
5254 compute_code_hoist_data ()
5255 {
5256   compute_local_properties (transp, comp, antloc, 0);
5257   compute_transpout ();
5258   compute_code_hoist_vbeinout ();
5259   compute_flow_dominators (dominators, NULL);
5260   if (gcse_file)
5261     fprintf (gcse_file, "\n");
5262 }
5263
5264 /* Determine if the expression identified by EXPR_INDEX would
5265    reach BB unimpared if it was placed at the end of EXPR_BB.
5266
5267    It's unclear exactly what Muchnick meant by "unimpared".  It seems
5268    to me that the expression must either be computed or transparent in
5269    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
5270    would allow the expression to be hoisted out of loops, even if
5271    the expression wasn't a loop invariant.
5272
5273    Contrast this to reachability for PRE where an expression is
5274    considered reachable if *any* path reaches instead of *all*
5275    paths.  */
5276
5277 static int
5278 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5279      int expr_bb;
5280      int expr_index;
5281      int bb;
5282      char *visited;
5283 {
5284   edge pred;
5285   int visited_allocated_locally = 0;
5286
5287
5288   if (visited == NULL)
5289     {
5290        visited_allocated_locally = 1;
5291        visited = xcalloc (n_basic_blocks, 1);
5292     }
5293
5294   visited[expr_bb] = 1;
5295   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5296     {
5297       int pred_bb = pred->src->index;
5298
5299       if (pred->src == ENTRY_BLOCK_PTR)
5300         break;
5301       else if (visited[pred_bb])
5302         continue;
5303
5304       /* Does this predecessor generate this expression?  */
5305       else if (TEST_BIT (comp[pred_bb], expr_index))
5306         break;
5307       else if (! TEST_BIT (transp[pred_bb], expr_index))
5308         break;
5309
5310       /* Not killed.  */
5311       else
5312         {
5313           visited[pred_bb] = 1;
5314           if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5315                                            pred_bb, visited))
5316             break;
5317         }
5318     }
5319   if (visited_allocated_locally)
5320     free (visited);
5321
5322   return (pred == NULL);
5323 }
5324 \f
5325 /* Actually perform code hoisting.  */
5326
5327 static void
5328 hoist_code ()
5329 {
5330   int bb, dominated;
5331   unsigned int i;
5332   struct expr **index_map;
5333   struct expr *expr;
5334
5335   sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5336
5337   /* Compute a mapping from expression number (`bitmap_index') to
5338      hash table entry.  */
5339
5340   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
5341   for (i = 0; i < expr_hash_table_size; i++)
5342     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5343       index_map[expr->bitmap_index] = expr;
5344
5345   /* Walk over each basic block looking for potentially hoistable
5346      expressions, nothing gets hoisted from the entry block.  */
5347   for (bb = 0; bb < n_basic_blocks; bb++)
5348     {
5349       int found = 0;
5350       int insn_inserted_p;
5351
5352       /* Examine each expression that is very busy at the exit of this
5353          block.  These are the potentially hoistable expressions.  */
5354       for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5355         {
5356           int hoistable = 0;
5357
5358           if (TEST_BIT (hoist_vbeout[bb], i) && TEST_BIT (transpout[bb], i))
5359             {
5360               /* We've found a potentially hoistable expression, now
5361                  we look at every block BB dominates to see if it
5362                  computes the expression.  */
5363               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5364                 {
5365                   /* Ignore self dominance.  */
5366                   if (bb == dominated
5367                       || ! TEST_BIT (dominators[dominated], bb))
5368                     continue;
5369
5370                   /* We've found a dominated block, now see if it computes
5371                      the busy expression and whether or not moving that
5372                      expression to the "beginning" of that block is safe.  */
5373                   if (!TEST_BIT (antloc[dominated], i))
5374                     continue;
5375
5376                   /* Note if the expression would reach the dominated block
5377                      unimpared if it was placed at the end of BB.
5378
5379                      Keep track of how many times this expression is hoistable
5380                      from a dominated block into BB.  */
5381                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5382                     hoistable++;
5383                 }
5384
5385               /* If we found more than one hoistable occurence of this
5386                  expression, then note it in the bitmap of expressions to
5387                  hoist.  It makes no sense to hoist things which are computed
5388                  in only one BB, and doing so tends to pessimize register
5389                  allocation.  One could increase this value to try harder
5390                  to avoid any possible code expansion due to register
5391                  allocation issues; however experiments have shown that
5392                  the vast majority of hoistable expressions are only movable
5393                  from two successors, so raising this threshhold is likely
5394                  to nullify any benefit we get from code hoisting.  */
5395               if (hoistable > 1)
5396                 {
5397                   SET_BIT (hoist_exprs[bb], i);
5398                   found = 1;
5399                 }
5400             }
5401         }
5402
5403       /* If we found nothing to hoist, then quit now.  */
5404       if (! found)
5405         continue;
5406
5407       /* Loop over all the hoistable expressions.  */
5408       for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5409         {
5410           /* We want to insert the expression into BB only once, so
5411              note when we've inserted it.  */
5412           insn_inserted_p = 0;
5413
5414           /* These tests should be the same as the tests above.  */
5415           if (TEST_BIT (hoist_vbeout[bb], i))
5416             {
5417               /* We've found a potentially hoistable expression, now
5418                  we look at every block BB dominates to see if it
5419                  computes the expression.  */
5420               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5421                 {
5422                   /* Ignore self dominance.  */
5423                   if (bb == dominated
5424                       || ! TEST_BIT (dominators[dominated], bb))
5425                     continue;
5426
5427                   /* We've found a dominated block, now see if it computes
5428                      the busy expression and whether or not moving that
5429                      expression to the "beginning" of that block is safe.  */
5430                   if (!TEST_BIT (antloc[dominated], i))
5431                     continue;
5432
5433                   /* The expression is computed in the dominated block and
5434                      it would be safe to compute it at the start of the
5435                      dominated block.  Now we have to determine if the
5436                      expresion would reach the dominated block if it was
5437                      placed at the end of BB.  */
5438                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5439                     {
5440                       struct expr *expr = index_map[i];
5441                       struct occr *occr = expr->antic_occr;
5442                       rtx insn;
5443                       rtx set;
5444
5445                       /* Find the right occurence of this expression.  */
5446                       while (BLOCK_NUM (occr->insn) != dominated && occr)
5447                         occr = occr->next;
5448
5449                       /* Should never happen.  */
5450                       if (!occr)
5451                         abort ();
5452
5453                       insn = occr->insn;
5454
5455                       set = single_set (insn);
5456                       if (! set)
5457                         abort ();
5458
5459                       /* Create a pseudo-reg to store the result of reaching
5460                          expressions into.  Get the mode for the new pseudo
5461                          from the mode of the original destination pseudo.  */
5462                       if (expr->reaching_reg == NULL)
5463                         expr->reaching_reg
5464                           = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5465
5466                       /* In theory this should never fail since we're creating
5467                          a reg->reg copy.
5468
5469                          However, on the x86 some of the movXX patterns
5470                          actually contain clobbers of scratch regs.  This may
5471                          cause the insn created by validate_change to not
5472                          match any pattern and thus cause validate_change to
5473                          fail.  */
5474                       if (validate_change (insn, &SET_SRC (set),
5475                                            expr->reaching_reg, 0))
5476                         {
5477                           occr->deleted_p = 1;
5478                           if (!insn_inserted_p)
5479                             {
5480                               insert_insn_end_bb (index_map[i], bb, 0);
5481                               insn_inserted_p = 1;
5482                             }
5483                         }
5484                     }
5485                 }
5486             }
5487         }
5488     }
5489
5490     free (index_map);
5491 }
5492
5493 /* Top level routine to perform one code hoisting (aka unification) pass
5494
5495    Return non-zero if a change was made.  */
5496
5497 static int
5498 one_code_hoisting_pass ()
5499 {
5500   int changed = 0;
5501
5502   alloc_expr_hash_table (max_cuid);
5503   compute_expr_hash_table ();
5504   if (gcse_file)
5505     dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5506                      expr_hash_table_size, n_exprs);
5507
5508   if (n_exprs > 0)
5509     {
5510       alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5511       compute_code_hoist_data ();
5512       hoist_code ();
5513       free_code_hoist_mem ();
5514     }
5515
5516   free_expr_hash_table ();
5517
5518   return changed;
5519 }