gcc/gcse.c

   1 /* Global common subexpression elimination/Partial redundancy elimination
   2    and global constant/copy propagation for GNU compiler.
   3    Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* TODO
  23    - reordering of memory allocation and freeing to be more space efficient
  24    - do rough calc of how many regs are needed in each block, and a rough
  25      calc of how many regs are available in each class and use that to
  26      throttle back the code in cases where RTX_COST is minimal.
  27    - dead store elimination
  28    - a store to the same address as a load does not kill the load if the
  29      source of the store is also the destination of the load.  Handling this
  30      allows more load motion, particularly out of loops.
  31    - ability to realloc sbitmap vectors would allow one initial computation
  32      of reg_set_in_block with only subsequent additions, rather than
  33      recomputing it for each pass
  34
  35 */
  36
  37 /* References searched while implementing this.
  38
  39    Compilers Principles, Techniques and Tools
  40    Aho, Sethi, Ullman
  41    Addison-Wesley, 1988
  42
  43    Global Optimization by Suppression of Partial Redundancies
  44    E. Morel, C. Renvoise
  45    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  46
  47    A Portable Machine-Independent Global Optimizer - Design and Measurements
  48    Frederick Chow
  49    Stanford Ph.D. thesis, Dec. 1983
  50
  51    A Fast Algorithm for Code Movement Optimization
  52    D.M. Dhamdhere
  53    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  54
  55    A Solution to a Problem with Morel and Renvoise's
  56    Global Optimization by Suppression of Partial Redundancies
  57    K-H Drechsler, M.P. Stadel
  58    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  59
  60    Practical Adaptation of the Global Optimization
  61    Algorithm of Morel and Renvoise
  62    D.M. Dhamdhere
  63    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  64
  65    Efficiently Computing Static Single Assignment Form and the Control
  66    Dependence Graph
  67    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  68    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  69
  70    Lazy Code Motion
  71    J. Knoop, O. Ruthing, B. Steffen
  72    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  73
  74    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  75    Time for Reducible Flow Control
  76    Thomas Ball
  77    ACM Letters on Programming Languages and Systems,
  78    Vol. 2, Num. 1-4, Mar-Dec 1993
  79
  80    An Efficient Representation for Sparse Sets
  81    Preston Briggs, Linda Torczon
  82    ACM Letters on Programming Languages and Systems,
  83    Vol. 2, Num. 1-4, Mar-Dec 1993
  84
  85    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  86    K-H Drechsler, M.P. Stadel
  87    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  88
  89    Partial Dead Code Elimination
  90    J. Knoop, O. Ruthing, B. Steffen
  91    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  92
  93    Effective Partial Redundancy Elimination
  94    P. Briggs, K.D. Cooper
  95    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  96
  97    The Program Structure Tree: Computing Control Regions in Linear Time
  98    R. Johnson, D. Pearson, K. Pingali
  99    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
 100
 101    Optimal Code Motion: Theory and Practice
 102    J. Knoop, O. Ruthing, B. Steffen
 103    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
 104
 105    The power of assignment motion
 106    J. Knoop, O. Ruthing, B. Steffen
 107    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 108
 109    Global code motion / global value numbering
 110    C. Click
 111    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 112
 113    Value Driven Redundancy Elimination
 114    L.T. Simpson
 115    Rice University Ph.D. thesis, Apr. 1996
 116
 117    Value Numbering
 118    L.T. Simpson
 119    Massively Scalar Compiler Project, Rice University, Sep. 1996
 120
 121    High Performance Compilers for Parallel Computing
 122    Michael Wolfe
 123    Addison-Wesley, 1996
 124
 125    Advanced Compiler Design and Implementation
 126    Steven Muchnick
 127    Morgan Kaufmann, 1997
 128
 129    Building an Optimizing Compiler
 130    Robert Morgan
 131    Digital Press, 1998
 132
 133    People wishing to speed up the code here should read:
 134      Elimination Algorithms for Data Flow Analysis
 135      B.G. Ryder, M.C. Paull
 136      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 137
 138      How to Analyze Large Programs Efficiently and Informatively
 139      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 140      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 141
 142    People wishing to do something different can find various possibilities
 143    in the above papers and elsewhere.
 144 */
 145
 146 #include "config.h"
 147 #include "system.h"
 148 #include "toplev.h"
 149
 150 #include "rtl.h"
 151 #include "tm_p.h"
 152 #include "regs.h"
 153 #include "hard-reg-set.h"
 154 #include "flags.h"
 155 #include "real.h"
 156 #include "insn-config.h"
 157 #include "recog.h"
 158 #include "basic-block.h"
 159 #include "output.h"
 160 #include "function.h"
 161 #include "expr.h"
 162
 163 #include "obstack.h"
 164 #define obstack_chunk_alloc gmalloc
 165 #define obstack_chunk_free free
 166
 167 /* Maximum number of passes to perform.  */
 168 #define MAX_PASSES 1
 169
 170 /* Propagate flow information through back edges and thus enable PRE's
 171    moving loop invariant calculations out of loops.
 172
 173    Originally this tended to create worse overall code, but several
 174    improvements during the development of PRE seem to have made following
 175    back edges generally a win.
 176
 177    Note much of the loop invariant code motion done here would normally
 178    be done by loop.c, which has more heuristics for when to move invariants
 179    out of loops.  At some point we might need to move some of those
 180    heuristics into gcse.c.  */
 181 #define FOLLOW_BACK_EDGES 1
 182
 183 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 184    are a superset of those done by GCSE.
 185
 186    We perform the following steps:
 187
 188    1) Compute basic block information.
 189
 190    2) Compute table of places where registers are set.
 191
 192    3) Perform copy/constant propagation.
 193
 194    4) Perform global cse.
 195
 196    5) Perform another pass of copy/constant propagation.
 197
 198    Two passes of copy/constant propagation are done because the first one
 199    enables more GCSE and the second one helps to clean up the copies that
 200    GCSE creates.  This is needed more for PRE than for Classic because Classic
 201    GCSE will try to use an existing register containing the common
 202    subexpression rather than create a new one.  This is harder to do for PRE
 203    because of the code motion (which Classic GCSE doesn't do).
 204
 205    Expressions we are interested in GCSE-ing are of the form
 206    (set (pseudo-reg) (expression)).
 207    Function want_to_gcse_p says what these are.
 208
 209    PRE handles moving invariant expressions out of loops (by treating them as
 210    partially redundant).
 211
 212    Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
 213    assignment) based GVN (global value numbering).  L. T. Simpson's paper
 214    (Rice University) on value numbering is a useful reference for this.
 215
 216    **********************
 217
 218    We used to support multiple passes but there are diminishing returns in
 219    doing so.  The first pass usually makes 90% of the changes that are doable.
 220    A second pass can make a few more changes made possible by the first pass.
 221    Experiments show any further passes don't make enough changes to justify
 222    the expense.
 223
 224    A study of spec92 using an unlimited number of passes:
 225    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 226    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 227    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 228
 229    It was found doing copy propagation between each pass enables further
 230    substitutions.
 231
 232    PRE is quite expensive in complicated functions because the DFA can take
 233    awhile to converge.  Hence we only perform one pass.  Macro MAX_PASSES can
 234    be modified if one wants to experiment.
 235
 236    **********************
 237
 238    The steps for PRE are:
 239
 240    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 241
 242    2) Perform the data flow analysis for PRE.
 243
 244    3) Delete the redundant instructions
 245
 246    4) Insert the required copies [if any] that make the partially
 247       redundant instructions fully redundant.
 248
 249    5) For other reaching expressions, insert an instruction to copy the value
 250       to a newly created pseudo that will reach the redundant instruction.
 251
 252    The deletion is done first so that when we do insertions we
 253    know which pseudo reg to use.
 254
 255    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 256    argue it is not.  The number of iterations for the algorithm to converge
 257    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 258
 259    PRE GCSE depends heavily on the second CSE pass to clean up the copies
 260    we create.  To make an expression reach the place where it's redundant,
 261    the result of the expression is copied to a new register, and the redundant
 262    expression is deleted by replacing it with this new register.  Classic GCSE
 263    doesn't have this problem as much as it computes the reaching defs of
 264    each register in each block and thus can try to use an existing register.
 265
 266    **********************
 267
 268    A fair bit of simplicity is created by creating small functions for simple
 269    tasks, even when the function is only called in one place.  This may
 270    measurably slow things down [or may not] by creating more function call
 271    overhead than is necessary.  The source is laid out so that it's trivial
 272    to make the affected functions inline so that one can measure what speed
 273    up, if any, can be achieved, and maybe later when things settle things can
 274    be rearranged.
 275
 276    Help stamp out big monolithic functions!  */
 277 \f
 278 /* GCSE global vars.  */
 279
 280 /* -dG dump file.  */
 281 static FILE *gcse_file;
 282
 283 /* Note whether or not we should run jump optimization after gcse.  We
 284    want to do this for two cases.
 285
 286     * If we changed any jumps via cprop.
 287
 288     * If we added any labels via edge splitting.  */
 289
 290 static int run_jump_opt_after_gcse;
 291
 292 /* Bitmaps are normally not included in debugging dumps.
 293    However it's useful to be able to print them from GDB.
 294    We could create special functions for this, but it's simpler to
 295    just allow passing stderr to the dump_foo fns.  Since stderr can
 296    be a macro, we store a copy here.  */
 297 static FILE *debug_stderr;
 298
 299 /* An obstack for our working variables.  */
 300 static struct obstack gcse_obstack;
 301
 302 /* Non-zero for each mode that supports (set (reg) (reg)).
 303    This is trivially true for integer and floating point values.
 304    It may or may not be true for condition codes.  */
 305 static char can_copy_p[(int) NUM_MACHINE_MODES];
 306
 307 /* Non-zero if can_copy_p has been initialized.  */
 308 static int can_copy_init_p;
 309
 310 struct reg_use {rtx reg_rtx; };
 311
 312 /* Hash table of expressions.  */
 313
 314 struct expr
 315 {
 316   /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
 317   rtx expr;
 318   /* Index in the available expression bitmaps.  */
 319   int bitmap_index;
 320   /* Next entry with the same hash.  */
 321   struct expr *next_same_hash;
 322   /* List of anticipatable occurrences in basic blocks in the function.
 323      An "anticipatable occurrence" is one that is the first occurrence in the
 324      basic block, the operands are not modified in the basic block prior
 325      to the occurrence and the output is not used between the start of
 326      the block and the occurrence.  */
 327   struct occr *antic_occr;
 328   /* List of available occurrence in basic blocks in the function.
 329      An "available occurrence" is one that is the last occurrence in the
 330      basic block and the operands are not modified by following statements in
 331      the basic block [including this insn].  */
 332   struct occr *avail_occr;
 333   /* Non-null if the computation is PRE redundant.
 334      The value is the newly created pseudo-reg to record a copy of the
 335      expression in all the places that reach the redundant copy.  */
 336   rtx reaching_reg;
 337 };
 338
 339 /* Occurrence of an expression.
 340    There is one per basic block.  If a pattern appears more than once the
 341    last appearance is used [or first for anticipatable expressions].  */
 342
 343 struct occr
 344 {
 345   /* Next occurrence of this expression.  */
 346   struct occr *next;
 347   /* The insn that computes the expression.  */
 348   rtx insn;
 349   /* Non-zero if this [anticipatable] occurrence has been deleted.  */
 350   char deleted_p;
 351   /* Non-zero if this [available] occurrence has been copied to
 352      reaching_reg.  */
 353   /* ??? This is mutually exclusive with deleted_p, so they could share
 354      the same byte.  */
 355   char copied_p;
 356 };
 357
 358 /* Expression and copy propagation hash tables.
 359    Each hash table is an array of buckets.
 360    ??? It is known that if it were an array of entries, structure elements
 361    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 362    not clear whether in the final analysis a sufficient amount of memory would
 363    be saved as the size of the available expression bitmaps would be larger
 364    [one could build a mapping table without holes afterwards though].
 365    Someday I'll perform the computation and figure it out.  */
 366
 367 /* Total size of the expression hash table, in elements.  */
 368 static unsigned int expr_hash_table_size;
 369
 370 /* The table itself.
 371    This is an array of `expr_hash_table_size' elements.  */
 372 static struct expr **expr_hash_table;
 373
 374 /* Total size of the copy propagation hash table, in elements.  */
 375 static int set_hash_table_size;
 376
 377 /* The table itself.
 378    This is an array of `set_hash_table_size' elements.  */
 379 static struct expr **set_hash_table;
 380
 381 /* Mapping of uids to cuids.
 382    Only real insns get cuids.  */
 383 static int *uid_cuid;
 384
 385 /* Highest UID in UID_CUID.  */
 386 static int max_uid;
 387
 388 /* Get the cuid of an insn.  */
 389 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
 390
 391 /* Number of cuids.  */
 392 static int max_cuid;
 393
 394 /* Mapping of cuids to insns.  */
 395 static rtx *cuid_insn;
 396
 397 /* Get insn from cuid.  */
 398 #define CUID_INSN(CUID) (cuid_insn[CUID])
 399
 400 /* Maximum register number in function prior to doing gcse + 1.
 401    Registers created during this pass have regno >= max_gcse_regno.
 402    This is named with "gcse" to not collide with global of same name.  */
 403 static unsigned int max_gcse_regno;
 404
 405 /* Maximum number of cse-able expressions found.  */
 406 static int n_exprs;
 407
 408 /* Maximum number of assignments for copy propagation found.  */
 409 static int n_sets;
 410
 411 /* Table of registers that are modified.
 412
 413    For each register, each element is a list of places where the pseudo-reg
 414    is set.
 415
 416    For simplicity, GCSE is done on sets of pseudo-regs only.  PRE GCSE only
 417    requires knowledge of which blocks kill which regs [and thus could use
 418    a bitmap instead of the lists `reg_set_table' uses].
 419
 420    `reg_set_table' and could be turned into an array of bitmaps (num-bbs x
 421    num-regs) [however perhaps it may be useful to keep the data as is].  One
 422    advantage of recording things this way is that `reg_set_table' is fairly
 423    sparse with respect to pseudo regs but for hard regs could be fairly dense
 424    [relatively speaking].  And recording sets of pseudo-regs in lists speeds
 425    up functions like compute_transp since in the case of pseudo-regs we only
 426    need to iterate over the number of times a pseudo-reg is set, not over the
 427    number of basic blocks [clearly there is a bit of a slow down in the cases
 428    where a pseudo is set more than once in a block, however it is believed
 429    that the net effect is to speed things up].  This isn't done for hard-regs
 430    because recording call-clobbered hard-regs in `reg_set_table' at each
 431    function call can consume a fair bit of memory, and iterating over
 432    hard-regs stored this way in compute_transp will be more expensive.  */
 433
 434 typedef struct reg_set
 435 {
 436   /* The next setting of this register.  */
 437   struct reg_set *next;
 438   /* The insn where it was set.  */
 439   rtx insn;
 440 } reg_set;
 441
 442 static reg_set **reg_set_table;
 443
 444 /* Size of `reg_set_table'.
 445    The table starts out at max_gcse_regno + slop, and is enlarged as
 446    necessary.  */
 447 static int reg_set_table_size;
 448
 449 /* Amount to grow `reg_set_table' by when it's full.  */
 450 #define REG_SET_TABLE_SLOP 100
 451
 452 /* Bitmap containing one bit for each register in the program.
 453    Used when performing GCSE to track which registers have been set since
 454    the start of the basic block.  */
 455 static sbitmap reg_set_bitmap;
 456
 457 /* For each block, a bitmap of registers set in the block.
 458    This is used by expr_killed_p and compute_transp.
 459    It is computed during hash table computation and not by compute_sets
 460    as it includes registers added since the last pass (or between cprop and
 461    gcse) and it's currently not easy to realloc sbitmap vectors.  */
 462 static sbitmap *reg_set_in_block;
 463
 464 /* For each block, non-zero if memory is set in that block.
 465    This is computed during hash table computation and is used by
 466    expr_killed_p and compute_transp.
 467    ??? Handling of memory is very simple, we don't make any attempt
 468    to optimize things (later).
 469    ??? This can be computed by compute_sets since the information
 470    doesn't change.  */
 471 static char *mem_set_in_block;
 472
 473 /* Various variables for statistics gathering.  */
 474
 475 /* Memory used in a pass.
 476    This isn't intended to be absolutely precise.  Its intent is only
 477    to keep an eye on memory usage.  */
 478 static int bytes_used;
 479
 480 /* GCSE substitutions made.  */
 481 static int gcse_subst_count;
 482 /* Number of copy instructions created.  */
 483 static int gcse_create_count;
 484 /* Number of constants propagated.  */
 485 static int const_prop_count;
 486 /* Number of copys propagated.  */
 487 static int copy_prop_count;
 488 \f
 489 /* These variables are used by classic GCSE.
 490    Normally they'd be defined a bit later, but `rd_gen' needs to
 491    be declared sooner.  */
 492
 493 /* A bitmap of all ones for implementing the algorithm for available
 494    expressions and reaching definitions.  */
 495 /* ??? Available expression bitmaps have a different size than reaching
 496    definition bitmaps.  This should be the larger of the two, however, it
 497    is not currently used for reaching definitions.  */
 498 static sbitmap u_bitmap;
 499
 500 /* Each block has a bitmap of each type.
 501    The length of each blocks bitmap is:
 502
 503        max_cuid  - for reaching definitions
 504        n_exprs - for available expressions
 505
 506    Thus we view the bitmaps as 2 dimensional arrays.  i.e.
 507    rd_kill[block_num][cuid_num]
 508    ae_kill[block_num][expr_num]                  */
 509
 510 /* For reaching defs */
 511 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
 512
 513 /* for available exprs */
 514 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
 515
 516 /* Objects of this type are passed around by the null-pointer check
 517    removal routines.  */
 518 struct null_pointer_info
 519 {
 520   /* The basic block being processed.  */
 521   int current_block;
 522   /* The first register to be handled in this pass.  */
 523   unsigned int min_reg;
 524   /* One greater than the last register to be handled in this pass.  */
 525   unsigned int max_reg;
 526   sbitmap *nonnull_local;
 527   sbitmap *nonnull_killed;
 528 };
 529 \f
 530 static void compute_can_copy    PARAMS ((void));
 531 static char *gmalloc            PARAMS ((unsigned int));
 532 static char *grealloc           PARAMS ((char *, unsigned int));
 533 static char *gcse_alloc         PARAMS ((unsigned long));
 534 static void alloc_gcse_mem      PARAMS ((rtx));
 535 static void free_gcse_mem       PARAMS ((void));
 536 static void alloc_reg_set_mem   PARAMS ((int));
 537 static void free_reg_set_mem    PARAMS ((void));
 538 static int get_bitmap_width     PARAMS ((int, int, int));
 539 static void record_one_set      PARAMS ((int, rtx));
 540 static void record_set_info     PARAMS ((rtx, rtx, void *));
 541 static void compute_sets        PARAMS ((rtx));
 542 static void hash_scan_insn      PARAMS ((rtx, int, int));
 543 static void hash_scan_set       PARAMS ((rtx, rtx, int));
 544 static void hash_scan_clobber   PARAMS ((rtx, rtx));
 545 static void hash_scan_call      PARAMS ((rtx, rtx));
 546 static int want_to_gcse_p       PARAMS ((rtx));
 547 static int oprs_unchanged_p     PARAMS ((rtx, rtx, int));
 548 static int oprs_anticipatable_p PARAMS ((rtx, rtx));
 549 static int oprs_available_p     PARAMS ((rtx, rtx));
 550 static void insert_expr_in_table PARAMS ((rtx, enum machine_mode, rtx,
 551                                           int, int));
 552 static void insert_set_in_table PARAMS ((rtx, rtx));
 553 static unsigned int hash_expr   PARAMS ((rtx, enum machine_mode, int *, int));
 554 static unsigned int hash_expr_1 PARAMS ((rtx, enum machine_mode, int *));
 555 static unsigned int hash_set    PARAMS ((int, int));
 556 static int expr_equiv_p         PARAMS ((rtx, rtx));
 557 static void record_last_reg_set_info PARAMS ((rtx, int));
 558 static void record_last_mem_set_info PARAMS ((rtx));
 559 static void record_last_set_info PARAMS ((rtx, rtx, void *));
 560 static void compute_hash_table  PARAMS ((int));
 561 static void alloc_set_hash_table PARAMS ((int));
 562 static void free_set_hash_table PARAMS ((void));
 563 static void compute_set_hash_table PARAMS ((void));
 564 static void alloc_expr_hash_table PARAMS ((unsigned int));
 565 static void free_expr_hash_table PARAMS ((void));
 566 static void compute_expr_hash_table PARAMS ((void));
 567 static void dump_hash_table     PARAMS ((FILE *, const char *, struct expr **,
 568                                          int, int));
 569 static struct expr *lookup_expr PARAMS ((rtx));
 570 static struct expr *lookup_set  PARAMS ((unsigned int, rtx));
 571 static struct expr *next_set    PARAMS ((unsigned int, struct expr *));
 572 static void reset_opr_set_tables PARAMS ((void));
 573 static int oprs_not_set_p       PARAMS ((rtx, rtx));
 574 static void mark_call           PARAMS ((rtx));
 575 static void mark_set            PARAMS ((rtx, rtx));
 576 static void mark_clobber        PARAMS ((rtx, rtx));
 577 static void mark_oprs_set       PARAMS ((rtx));
 578 static void alloc_cprop_mem     PARAMS ((int, int));
 579 static void free_cprop_mem      PARAMS ((void));
 580 static void compute_transp      PARAMS ((rtx, int, sbitmap *, int));
 581 static void compute_transpout   PARAMS ((void));
 582 static void compute_local_properties PARAMS ((sbitmap *, sbitmap *, sbitmap *,
 583                                               int));
 584 static void compute_cprop_data  PARAMS ((void));
 585 static void find_used_regs      PARAMS ((rtx));
 586 static int try_replace_reg      PARAMS ((rtx, rtx, rtx));
 587 static struct expr *find_avail_set PARAMS ((int, rtx));
 588 static int cprop_jump           PARAMS ((rtx, rtx, struct reg_use *, rtx));
 589 #ifdef HAVE_cc0
 590 static int cprop_cc0_jump       PARAMS ((rtx, struct reg_use *, rtx));
 591 #endif
 592 static int cprop_insn           PARAMS ((rtx, int));
 593 static int cprop                PARAMS ((int));
 594 static int one_cprop_pass       PARAMS ((int, int));
 595 static void alloc_pre_mem       PARAMS ((int, int));
 596 static void free_pre_mem        PARAMS ((void));
 597 static void compute_pre_data    PARAMS ((void));
 598 static int pre_expr_reaches_here_p PARAMS ((int, struct expr *, int));
 599 static void insert_insn_end_bb  PARAMS ((struct expr *, int, int));
 600 static void pre_insert_copy_insn PARAMS ((struct expr *, rtx));
 601 static void pre_insert_copies   PARAMS ((void));
 602 static int pre_delete           PARAMS ((void));
 603 static int pre_gcse             PARAMS ((void));
 604 static int one_pre_gcse_pass    PARAMS ((int));
 605 static void add_label_notes     PARAMS ((rtx, rtx));
 606 static void alloc_code_hoist_mem PARAMS ((int, int));
 607 static void free_code_hoist_mem PARAMS ((void));
 608 static void compute_code_hoist_vbeinout PARAMS ((void));
 609 static void compute_code_hoist_data PARAMS ((void));
 610 static int hoist_expr_reaches_here_p PARAMS ((int, int, int, char *));
 611 static void hoist_code          PARAMS ((void));
 612 static int one_code_hoisting_pass PARAMS ((void));
 613 static void alloc_rd_mem        PARAMS ((int, int));
 614 static void free_rd_mem         PARAMS ((void));
 615 static void handle_rd_kill_set  PARAMS ((rtx, int, int));
 616 static void compute_kill_rd     PARAMS ((void));
 617 static void compute_rd          PARAMS ((void));
 618 static void alloc_avail_expr_mem PARAMS ((int, int));
 619 static void free_avail_expr_mem PARAMS ((void));
 620 static void compute_ae_gen      PARAMS ((void));
 621 static int expr_killed_p        PARAMS ((rtx, int));
 622 static void compute_ae_kill     PARAMS ((sbitmap *, sbitmap *));
 623 static int expr_reaches_here_p  PARAMS ((struct occr *, struct expr *,
 624                                          int, int));
 625 static rtx computing_insn       PARAMS ((struct expr *, rtx));
 626 static int def_reaches_here_p   PARAMS ((rtx, rtx));
 627 static int can_disregard_other_sets PARAMS ((struct reg_set **, rtx, int));
 628 static int handle_avail_expr    PARAMS ((rtx, struct expr *));
 629 static int classic_gcse         PARAMS ((void));
 630 static int one_classic_gcse_pass PARAMS ((int));
 631 static void invalidate_nonnull_info PARAMS ((rtx, rtx, void *));
 632 static void delete_null_pointer_checks_1 PARAMS ((unsigned int *, sbitmap *,
 633                                                   sbitmap *,
 634                                                   struct null_pointer_info *));
 635 static rtx process_insert_insn  PARAMS ((struct expr *));
 636 static int pre_edge_insert      PARAMS ((struct edge_list *, struct expr **));
 637 static int expr_reaches_here_p_work PARAMS ((struct occr *, struct expr *,
 638                                              int, int, char *));
 639 static int pre_expr_reaches_here_p_work PARAMS ((int, struct expr *,
 640                                                  int, char *));
 641 \f
 642 /* Entry point for global common subexpression elimination.
 643    F is the first instruction in the function.  */
 644
 645 int
 646 gcse_main (f, file)
 647      rtx f;
 648      FILE *file;
 649 {
 650   int changed, pass;
 651   /* Bytes used at start of pass.  */
 652   int initial_bytes_used;
 653   /* Maximum number of bytes used by a pass.  */
 654   int max_pass_bytes;
 655   /* Point to release obstack data from for each pass.  */
 656   char *gcse_obstack_bottom;
 657
 658   /* We do not construct an accurate cfg in functions which call
 659      setjmp, so just punt to be safe.  */
 660   if (current_function_calls_setjmp)
 661     return 0;
 662
 663   /* Assume that we do not need to run jump optimizations after gcse.  */
 664   run_jump_opt_after_gcse = 0;
 665
 666   /* For calling dump_foo fns from gdb.  */
 667   debug_stderr = stderr;
 668   gcse_file = file;
 669
 670   /* Identify the basic block information for this function, including
 671      successors and predecessors.  */
 672   max_gcse_regno = max_reg_num ();
 673
 674   if (file)
 675     dump_flow_info (file);
 676
 677   /* Return if there's nothing to do.  */
 678   if (n_basic_blocks <= 1)
 679     return 0;
 680
 681   /* Trying to perform global optimizations on flow graphs which have
 682      a high connectivity will take a long time and is unlikely to be
 683      particularly useful.
 684
 685      In normal circumstances a cfg should have about twice has many edges
 686      as blocks.  But we do not want to punish small functions which have
 687      a couple switch statements.  So we require a relatively large number
 688      of basic blocks and the ratio of edges to blocks to be high.  */
 689   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
 690     return 0;
 691
 692   /* See what modes support reg/reg copy operations.  */
 693   if (! can_copy_init_p)
 694     {
 695       compute_can_copy ();
 696       can_copy_init_p = 1;
 697     }
 698
 699   gcc_obstack_init (&gcse_obstack);
 700   bytes_used = 0;
 701
 702   /* Record where pseudo-registers are set.  This data is kept accurate
 703      during each pass.  ??? We could also record hard-reg information here
 704      [since it's unchanging], however it is currently done during hash table
 705      computation.
 706
 707      It may be tempting to compute MEM set information here too, but MEM sets
 708      will be subject to code motion one day and thus we need to compute
 709      information about memory sets when we build the hash tables.  */
 710
 711   alloc_reg_set_mem (max_gcse_regno);
 712   compute_sets (f);
 713
 714   pass = 0;
 715   initial_bytes_used = bytes_used;
 716   max_pass_bytes = 0;
 717   gcse_obstack_bottom = gcse_alloc (1);
 718   changed = 1;
 719   while (changed && pass < MAX_PASSES)
 720     {
 721       changed = 0;
 722       if (file)
 723         fprintf (file, "GCSE pass %d\n\n", pass + 1);
 724
 725       /* Initialize bytes_used to the space for the pred/succ lists,
 726          and the reg_set_table data.  */
 727       bytes_used = initial_bytes_used;
 728
 729       /* Each pass may create new registers, so recalculate each time.  */
 730       max_gcse_regno = max_reg_num ();
 731
 732       alloc_gcse_mem (f);
 733
 734       /* Don't allow constant propagation to modify jumps
 735          during this pass.  */
 736       changed = one_cprop_pass (pass + 1, 0);
 737
 738       if (optimize_size)
 739         changed |= one_classic_gcse_pass (pass + 1);
 740       else
 741         {
 742           changed |= one_pre_gcse_pass (pass + 1);
 743           free_reg_set_mem ();
 744           alloc_reg_set_mem (max_reg_num ());
 745           compute_sets (f);
 746           run_jump_opt_after_gcse = 1;
 747         }
 748
 749       if (max_pass_bytes < bytes_used)
 750         max_pass_bytes = bytes_used;
 751
 752       /* Free up memory, then reallocate for code hoisting.  We can
 753          not re-use the existing allocated memory because the tables
 754          will not have info for the insns or registers created by
 755          partial redundancy elimination.  */
 756       free_gcse_mem ();
 757
 758       /* It does not make sense to run code hoisting unless we optimizing
 759          for code size -- it rarely makes programs faster, and can make
 760          them bigger if we did partial redundancy elimination (when optimizing
 761          for space, we use a classic gcse algorithm instead of partial
 762          redundancy algorithms).  */
 763       if (optimize_size)
 764         {
 765           max_gcse_regno = max_reg_num ();
 766           alloc_gcse_mem (f);
 767           changed |= one_code_hoisting_pass ();
 768           free_gcse_mem ();
 769
 770           if (max_pass_bytes < bytes_used)
 771             max_pass_bytes = bytes_used;
 772         }
 773
 774       if (file)
 775         {
 776           fprintf (file, "\n");
 777           fflush (file);
 778         }
 779
 780       obstack_free (&gcse_obstack, gcse_obstack_bottom);
 781       pass++;
 782     }
 783
 784   /* Do one last pass of copy propagation, including cprop into
 785      conditional jumps.  */
 786
 787   max_gcse_regno = max_reg_num ();
 788   alloc_gcse_mem (f);
 789   /* This time, go ahead and allow cprop to alter jumps.  */
 790   one_cprop_pass (pass + 1, 1);
 791   free_gcse_mem ();
 792
 793   if (file)
 794     {
 795       fprintf (file, "GCSE of %s: %d basic blocks, ",
 796                current_function_name, n_basic_blocks);
 797       fprintf (file, "%d pass%s, %d bytes\n\n",
 798                pass, pass > 1 ? "es" : "", max_pass_bytes);
 799     }
 800
 801   obstack_free (&gcse_obstack, NULL_PTR);
 802   free_reg_set_mem ();
 803   return run_jump_opt_after_gcse;
 804 }
 805 \f
 806 /* Misc. utilities.  */
 807
 808 /* Compute which modes support reg/reg copy operations.  */
 809
 810 static void
 811 compute_can_copy ()
 812 {
 813   int i;
 814 #ifndef AVOID_CCMODE_COPIES
 815   rtx reg,insn;
 816 #endif
 817   char *free_point = (char *) oballoc (1);
 818
 819   bzero (can_copy_p, NUM_MACHINE_MODES);
 820
 821   start_sequence ();
 822   for (i = 0; i < NUM_MACHINE_MODES; i++)
 823     if (GET_MODE_CLASS (i) == MODE_CC)
 824       {
 825 #ifdef AVOID_CCMODE_COPIES
 826         can_copy_p[i] = 0;
 827 #else
 828         reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 829         insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 830         if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
 831           can_copy_p[i] = 1;
 832 #endif
 833       }
 834     else
 835       can_copy_p[i] = 1;
 836
 837   end_sequence ();
 838
 839   /* Free the objects we just allocated.  */
 840   obfree (free_point);
 841 }
 842 \f
 843 /* Cover function to xmalloc to record bytes allocated.  */
 844
 845 static char *
 846 gmalloc (size)
 847      unsigned int size;
 848 {
 849   bytes_used += size;
 850   return xmalloc (size);
 851 }
 852
 853 /* Cover function to xrealloc.
 854    We don't record the additional size since we don't know it.
 855    It won't affect memory usage stats much anyway.  */
 856
 857 static char *
 858 grealloc (ptr, size)
 859      char *ptr;
 860      unsigned int size;
 861 {
 862   return xrealloc (ptr, size);
 863 }
 864
 865 /* Cover function to obstack_alloc.
 866    We don't need to record the bytes allocated here since
 867    obstack_chunk_alloc is set to gmalloc.  */
 868
 869 static char *
 870 gcse_alloc (size)
 871      unsigned long size;
 872 {
 873   return (char *) obstack_alloc (&gcse_obstack, size);
 874 }
 875
 876 /* Allocate memory for the cuid mapping array,
 877    and reg/memory set tracking tables.
 878
 879    This is called at the start of each pass.  */
 880
 881 static void
 882 alloc_gcse_mem (f)
 883      rtx f;
 884 {
 885   int i,n;
 886   rtx insn;
 887
 888   /* Find the largest UID and create a mapping from UIDs to CUIDs.
 889      CUIDs are like UIDs except they increase monotonically, have no gaps,
 890      and only apply to real insns.  */
 891
 892   max_uid = get_max_uid ();
 893   n = (max_uid + 1) * sizeof (int);
 894   uid_cuid = (int *) gmalloc (n);
 895   bzero ((char *) uid_cuid, n);
 896   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 897     {
 898       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
 899         INSN_CUID (insn) = i++;
 900       else
 901         INSN_CUID (insn) = i;
 902     }
 903
 904   /* Create a table mapping cuids to insns.  */
 905
 906   max_cuid = i;
 907   n = (max_cuid + 1) * sizeof (rtx);
 908   cuid_insn = (rtx *) gmalloc (n);
 909   bzero ((char *) cuid_insn, n);
 910   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 911     if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
 912       CUID_INSN (i++) = insn;
 913
 914   /* Allocate vars to track sets of regs.  */
 915   reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
 916
 917   /* Allocate vars to track sets of regs, memory per block.  */
 918   reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
 919                                                        max_gcse_regno);
 920   mem_set_in_block = (char *) gmalloc (n_basic_blocks);
 921 }
 922
 923 /* Free memory allocated by alloc_gcse_mem.  */
 924
 925 static void
 926 free_gcse_mem ()
 927 {
 928   free (uid_cuid);
 929   free (cuid_insn);
 930
 931   free (reg_set_bitmap);
 932
 933   free (reg_set_in_block);
 934   free (mem_set_in_block);
 935 }
 936
 937 /* Many of the global optimization algorithms work by solving dataflow
 938    equations for various expressions.  Initially, some local value is
 939    computed for each expression in each block.  Then, the values across the
 940    various blocks are combined (by following flow graph edges) to arrive at
 941    global values.  Conceptually, each set of equations is independent.  We
 942    may therefore solve all the equations in parallel, solve them one at a
 943    time, or pick any intermediate approach.
 944
 945    When you're going to need N two-dimensional bitmaps, each X (say, the
 946    number of blocks) by Y (say, the number of expressions), call this
 947    function.  It's not important what X and Y represent; only that Y
 948    correspond to the things that can be done in parallel.  This function will
 949    return an appropriate chunking factor C; you should solve C sets of
 950    equations in parallel.  By going through this function, we can easily
 951    trade space against time; by solving fewer equations in parallel we use
 952    less space.  */
 953
 954 static int
 955 get_bitmap_width (n, x, y)
 956      int n;
 957      int x;
 958      int y;
 959 {
 960   /* It's not really worth figuring out *exactly* how much memory will
 961      be used by a particular choice.  The important thing is to get
 962      something approximately right.  */
 963   size_t max_bitmap_memory = 10 * 1024 * 1024;
 964
 965   /* The number of bytes we'd use for a single column of minimum
 966      width.  */
 967   size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
 968
 969   /* Often, it's reasonable just to solve all the equations in
 970      parallel.  */
 971   if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
 972     return y;
 973
 974   /* Otherwise, pick the largest width we can, without going over the
 975      limit.  */
 976   return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
 977                              / column_size);
 978 }
 979 \f
 980 /* Compute the local properties of each recorded expression.
 981
 982    Local properties are those that are defined by the block, irrespective of
 983    other blocks.
 984
 985    An expression is transparent in a block if its operands are not modified
 986    in the block.
 987
 988    An expression is computed (locally available) in a block if it is computed
 989    at least once and expression would contain the same value if the
 990    computation was moved to the end of the block.
 991
 992    An expression is locally anticipatable in a block if it is computed at
 993    least once and expression would contain the same value if the computation
 994    was moved to the beginning of the block.
 995
 996    We call this routine for cprop, pre and code hoisting.  They all compute
 997    basically the same information and thus can easily share this code.
 998
 999    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
1000    properties.  If NULL, then it is not necessary to compute or record that
1001    particular property.
1002
1003    SETP controls which hash table to look at.  If zero, this routine looks at
1004    the expr hash table; if nonzero this routine looks at the set hash table.
1005    Additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
1006    ABSALTERED.  */
1007
1008 static void
1009 compute_local_properties (transp, comp, antloc, setp)
1010      sbitmap *transp;
1011      sbitmap *comp;
1012      sbitmap *antloc;
1013      int setp;
1014 {
1015   unsigned int i, hash_table_size;
1016   struct expr **hash_table;
1017
1018   /* Initialize any bitmaps that were passed in.  */
1019   if (transp)
1020     {
1021       if (setp)
1022         sbitmap_vector_zero (transp, n_basic_blocks);
1023       else
1024         sbitmap_vector_ones (transp, n_basic_blocks);
1025     }
1026
1027   if (comp)
1028     sbitmap_vector_zero (comp, n_basic_blocks);
1029   if (antloc)
1030     sbitmap_vector_zero (antloc, n_basic_blocks);
1031
1032   /* We use the same code for cprop, pre and hoisting.  For cprop
1033      we care about the set hash table, for pre and hoisting we
1034      care about the expr hash table.  */
1035   hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1036   hash_table = setp ? set_hash_table : expr_hash_table;
1037
1038   for (i = 0; i < hash_table_size; i++)
1039     {
1040       struct expr *expr;
1041
1042       for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1043         {
1044           int indx = expr->bitmap_index;
1045           struct occr *occr;
1046
1047           /* The expression is transparent in this block if it is not killed.
1048              We start by assuming all are transparent [none are killed], and
1049              then reset the bits for those that are.  */
1050           if (transp)
1051             compute_transp (expr->expr, indx, transp, setp);
1052
1053           /* The occurrences recorded in antic_occr are exactly those that
1054              we want to set to non-zero in ANTLOC.  */
1055           if (antloc)
1056             for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1057               {
1058                 SET_BIT (antloc[BLOCK_NUM (occr->insn)], indx);
1059
1060                 /* While we're scanning the table, this is a good place to
1061                    initialize this.  */
1062                 occr->deleted_p = 0;
1063               }
1064
1065           /* The occurrences recorded in avail_occr are exactly those that
1066              we want to set to non-zero in COMP.  */
1067           if (comp)
1068             for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1069               {
1070                 SET_BIT (comp[BLOCK_NUM (occr->insn)], indx);
1071
1072                 /* While we're scanning the table, this is a good place to
1073                    initialize this.  */
1074                 occr->copied_p = 0;
1075               }
1076
1077           /* While we're scanning the table, this is a good place to
1078              initialize this.  */
1079           expr->reaching_reg = 0;
1080         }
1081     }
1082 }
1083 \f
1084 /* Register set information.
1085
1086    `reg_set_table' records where each register is set or otherwise
1087    modified.  */
1088
1089 static struct obstack reg_set_obstack;
1090
1091 static void
1092 alloc_reg_set_mem (n_regs)
1093      int n_regs;
1094 {
1095   unsigned int n;
1096
1097   reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1098   n = reg_set_table_size * sizeof (struct reg_set *);
1099   reg_set_table = (struct reg_set **) gmalloc (n);
1100   bzero ((char *) reg_set_table, n);
1101
1102   gcc_obstack_init (&reg_set_obstack);
1103 }
1104
1105 static void
1106 free_reg_set_mem ()
1107 {
1108   free (reg_set_table);
1109   obstack_free (&reg_set_obstack, NULL_PTR);
1110 }
1111
1112 /* Record REGNO in the reg_set table.  */
1113
1114 static void
1115 record_one_set (regno, insn)
1116      int regno;
1117      rtx insn;
1118 {
1119   /* allocate a new reg_set element and link it onto the list */
1120   struct reg_set *new_reg_info, *reg_info_ptr1, *reg_info_ptr2;
1121
1122   /* If the table isn't big enough, enlarge it.  */
1123   if (regno >= reg_set_table_size)
1124     {
1125       int new_size = regno + REG_SET_TABLE_SLOP;
1126
1127       reg_set_table
1128         = (struct reg_set **) grealloc ((char *) reg_set_table,
1129                                         new_size * sizeof (struct reg_set *));
1130       bzero ((char *) (reg_set_table + reg_set_table_size),
1131              (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1132       reg_set_table_size = new_size;
1133     }
1134
1135   new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1136                                                    sizeof (struct reg_set));
1137   bytes_used += sizeof (struct reg_set);
1138   new_reg_info->insn = insn;
1139   new_reg_info->next = NULL;
1140   if (reg_set_table[regno] == NULL)
1141     reg_set_table[regno] = new_reg_info;
1142   else
1143     {
1144       reg_info_ptr1 = reg_info_ptr2 = reg_set_table[regno];
1145       /* ??? One could keep a "last" pointer to speed this up.  */
1146       while (reg_info_ptr1 != NULL)
1147         {
1148           reg_info_ptr2 = reg_info_ptr1;
1149           reg_info_ptr1 = reg_info_ptr1->next;
1150         }
1151
1152       reg_info_ptr2->next = new_reg_info;
1153     }
1154 }
1155
1156 /* Called from compute_sets via note_stores to handle one SET or CLOBBER in
1157    an insn.  The DATA is really the instruction in which the SET is
1158    occurring.  */
1159
1160 static void
1161 record_set_info (dest, setter, data)
1162      rtx dest, setter ATTRIBUTE_UNUSED;
1163      void *data;
1164 {
1165   rtx record_set_insn = (rtx) data;
1166
1167   if (GET_CODE (dest) == REG && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1168     record_one_set (REGNO (dest), record_set_insn);
1169 }
1170
1171 /* Scan the function and record each set of each pseudo-register.
1172
1173    This is called once, at the start of the gcse pass.  See the comments for
1174    `reg_set_table' for further documenation.  */
1175
1176 static void
1177 compute_sets (f)
1178      rtx f;
1179 {
1180   rtx insn;
1181
1182   for (insn = f; insn != 0; insn = NEXT_INSN (insn))
1183     if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
1184       note_stores (PATTERN (insn), record_set_info, insn);
1185 }
1186 \f
1187 /* Hash table support.  */
1188
1189 /* For each register, the cuid of the first/last insn in the block to set it,
1190    or -1 if not set.  */
1191 #define NEVER_SET -1
1192 static int *reg_first_set;
1193 static int *reg_last_set;
1194
1195 /* While computing "first/last set" info, this is the CUID of first/last insn
1196    to set memory or -1 if not set.  `mem_last_set' is also used when
1197    performing GCSE to record whether memory has been set since the beginning
1198    of the block.
1199
1200    Note that handling of memory is very simple, we don't make any attempt
1201    to optimize things (later).  */
1202 static int mem_first_set;
1203 static int mem_last_set;
1204
1205 /* Perform a quick check whether X, the source of a set, is something
1206    we want to consider for GCSE.  */
1207
1208 static int
1209 want_to_gcse_p (x)
1210      rtx x;
1211 {
1212   switch (GET_CODE (x))
1213     {
1214     case REG:
1215     case SUBREG:
1216     case CONST_INT:
1217     case CONST_DOUBLE:
1218     case CALL:
1219       return 0;
1220
1221     default:
1222       break;
1223     }
1224
1225   return 1;
1226 }
1227
1228 /* Return non-zero if the operands of expression X are unchanged from the
1229    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1230    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
1231
1232 static int
1233 oprs_unchanged_p (x, insn, avail_p)
1234      rtx x, insn;
1235      int avail_p;
1236 {
1237   int i, j;
1238   enum rtx_code code;
1239   const char *fmt;
1240
1241   if (x == 0)
1242     return 1;
1243
1244   code = GET_CODE (x);
1245   switch (code)
1246     {
1247     case REG:
1248       if (avail_p)
1249         return (reg_last_set[REGNO (x)] == NEVER_SET
1250                 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1251       else
1252         return (reg_first_set[REGNO (x)] == NEVER_SET
1253                 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1254
1255     case MEM:
1256       if (avail_p && mem_last_set != NEVER_SET
1257           && mem_last_set >= INSN_CUID (insn))
1258         return 0;
1259       else if (! avail_p && mem_first_set != NEVER_SET
1260                && mem_first_set < INSN_CUID (insn))
1261         return 0;
1262       else
1263         return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
1264
1265     case PRE_DEC:
1266     case PRE_INC:
1267     case POST_DEC:
1268     case POST_INC:
1269       return 0;
1270
1271     case PC:
1272     case CC0: /*FIXME*/
1273     case CONST:
1274     case CONST_INT:
1275     case CONST_DOUBLE:
1276     case SYMBOL_REF:
1277     case LABEL_REF:
1278     case ADDR_VEC:
1279     case ADDR_DIFF_VEC:
1280       return 1;
1281
1282     default:
1283       break;
1284     }
1285
1286   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1287     {
1288       if (fmt[i] == 'e')
1289         {
1290           /* If we are about to do the last recursive call needed at this
1291              level, change it into iteration.  This function is called enough
1292              to be worth it.  */
1293           if (i == 0)
1294             return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
1295
1296           else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
1297             return 0;
1298         }
1299       else if (fmt[i] == 'E')
1300         for (j = 0; j < XVECLEN (x, i); j++)
1301           if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1302             return 0;
1303     }
1304
1305   return 1;
1306 }
1307
1308 /* Return non-zero if the operands of expression X are unchanged from
1309    the start of INSN's basic block up to but not including INSN.  */
1310
1311 static int
1312 oprs_anticipatable_p (x, insn)
1313      rtx x, insn;
1314 {
1315   return oprs_unchanged_p (x, insn, 0);
1316 }
1317
1318 /* Return non-zero if the operands of expression X are unchanged from
1319    INSN to the end of INSN's basic block.  */
1320
1321 static int
1322 oprs_available_p (x, insn)
1323      rtx x, insn;
1324 {
1325   return oprs_unchanged_p (x, insn, 1);
1326 }
1327
1328 /* Hash expression X.
1329
1330    MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
1331    indicating if a volatile operand is found or if the expression contains
1332    something we don't want to insert in the table.
1333
1334    ??? One might want to merge this with canon_hash.  Later.  */
1335
1336 static unsigned int
1337 hash_expr (x, mode, do_not_record_p, hash_table_size)
1338      rtx x;
1339      enum machine_mode mode;
1340      int *do_not_record_p;
1341      int hash_table_size;
1342 {
1343   unsigned int hash;
1344
1345   *do_not_record_p = 0;
1346
1347   hash = hash_expr_1 (x, mode, do_not_record_p);
1348   return hash % hash_table_size;
1349 }
1350
1351 /* Subroutine of hash_expr to do the actual work.  */
1352
1353 static unsigned int
1354 hash_expr_1 (x, mode, do_not_record_p)
1355      rtx x;
1356      enum machine_mode mode;
1357      int *do_not_record_p;
1358 {
1359   int i, j;
1360   unsigned hash = 0;
1361   enum rtx_code code;
1362   const char *fmt;
1363
1364   /* Used to turn recursion into iteration.  We can't rely on GCC's
1365      tail-recursion eliminatio since we need to keep accumulating values
1366      in HASH.  */
1367
1368   if (x == 0)
1369     return hash;
1370
1371  repeat:
1372   code = GET_CODE (x);
1373   switch (code)
1374     {
1375     case REG:
1376       hash += ((unsigned int) REG << 7) + REGNO (x);
1377       return hash;
1378
1379     case CONST_INT:
1380       hash += (((unsigned int) CONST_INT << 7) + (unsigned int) mode
1381                + (unsigned int) INTVAL (x));
1382       return hash;
1383
1384     case CONST_DOUBLE:
1385       /* This is like the general case, except that it only counts
1386          the integers representing the constant.  */
1387       hash += (unsigned int) code + (unsigned int) GET_MODE (x);
1388       if (GET_MODE (x) != VOIDmode)
1389         for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1390           hash += (unsigned int) XWINT (x, i);
1391       else
1392         hash += ((unsigned int) CONST_DOUBLE_LOW (x)
1393                  + (unsigned int) CONST_DOUBLE_HIGH (x));
1394       return hash;
1395
1396       /* Assume there is only one rtx object for any given label.  */
1397     case LABEL_REF:
1398       /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1399          differences and differences between each stage's debugging dumps.  */
1400       hash += (((unsigned int) LABEL_REF << 7)
1401                + CODE_LABEL_NUMBER (XEXP (x, 0)));
1402       return hash;
1403
1404     case SYMBOL_REF:
1405       {
1406         /* Don't hash on the symbol's address to avoid bootstrap differences.
1407            Different hash values may cause expressions to be recorded in
1408            different orders and thus different registers to be used in the
1409            final assembler.  This also avoids differences in the dump files
1410            between various stages.  */
1411         unsigned int h = 0;
1412         const unsigned char *p = (const unsigned char *) XSTR (x, 0);
1413
1414         while (*p)
1415           h += (h << 7) + *p++; /* ??? revisit */
1416
1417         hash += ((unsigned int) SYMBOL_REF << 7) + h;
1418         return hash;
1419       }
1420
1421     case MEM:
1422       if (MEM_VOLATILE_P (x))
1423         {
1424           *do_not_record_p = 1;
1425           return 0;
1426         }
1427
1428       hash += (unsigned int) MEM;
1429       hash += MEM_ALIAS_SET (x);
1430       x = XEXP (x, 0);
1431       goto repeat;
1432
1433     case PRE_DEC:
1434     case PRE_INC:
1435     case POST_DEC:
1436     case POST_INC:
1437     case PC:
1438     case CC0:
1439     case CALL:
1440     case UNSPEC_VOLATILE:
1441       *do_not_record_p = 1;
1442       return 0;
1443
1444     case ASM_OPERANDS:
1445       if (MEM_VOLATILE_P (x))
1446         {
1447           *do_not_record_p = 1;
1448           return 0;
1449         }
1450
1451     default:
1452       break;
1453     }
1454
1455   hash += (unsigned) code + (unsigned) GET_MODE (x);
1456   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1457     {
1458       if (fmt[i] == 'e')
1459         {
1460           /* If we are about to do the last recursive call
1461              needed at this level, change it into iteration.
1462              This function is called enough to be worth it.  */
1463           if (i == 0)
1464             {
1465               x = XEXP (x, i);
1466               goto repeat;
1467             }
1468
1469           hash += hash_expr_1 (XEXP (x, i), 0, do_not_record_p);
1470           if (*do_not_record_p)
1471             return 0;
1472         }
1473
1474       else if (fmt[i] == 'E')
1475         for (j = 0; j < XVECLEN (x, i); j++)
1476           {
1477             hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1478             if (*do_not_record_p)
1479               return 0;
1480           }
1481
1482       else if (fmt[i] == 's')
1483         {
1484           register const unsigned char *p =
1485             (const unsigned char *) XSTR (x, i);
1486
1487           if (p)
1488             while (*p)
1489               hash += *p++;
1490         }
1491       else if (fmt[i] == 'i')
1492         hash += (unsigned int) XINT (x, i);
1493       else
1494         abort ();
1495     }
1496
1497   return hash;
1498 }
1499
1500 /* Hash a set of register REGNO.
1501
1502    Sets are hashed on the register that is set.  This simplifies the PRE copy
1503    propagation code.
1504
1505    ??? May need to make things more elaborate.  Later, as necessary.  */
1506
1507 static unsigned int
1508 hash_set (regno, hash_table_size)
1509      int regno;
1510      int hash_table_size;
1511 {
1512   unsigned int hash;
1513
1514   hash = regno;
1515   return hash % hash_table_size;
1516 }
1517
1518 /* Return non-zero if exp1 is equivalent to exp2.
1519    ??? Borrowed from cse.c.  Might want to remerge with cse.c.  Later.  */
1520
1521 static int
1522 expr_equiv_p (x, y)
1523      rtx x, y;
1524 {
1525   register int i, j;
1526   register enum rtx_code code;
1527   register const char *fmt;
1528
1529   if (x == y)
1530     return 1;
1531
1532   if (x == 0 || y == 0)
1533     return x == y;
1534
1535   code = GET_CODE (x);
1536   if (code != GET_CODE (y))
1537     return 0;
1538
1539   /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.  */
1540   if (GET_MODE (x) != GET_MODE (y))
1541     return 0;
1542
1543   switch (code)
1544     {
1545     case PC:
1546     case CC0:
1547       return x == y;
1548
1549     case CONST_INT:
1550       return INTVAL (x) == INTVAL (y);
1551
1552     case LABEL_REF:
1553       return XEXP (x, 0) == XEXP (y, 0);
1554
1555     case SYMBOL_REF:
1556       return XSTR (x, 0) == XSTR (y, 0);
1557
1558     case REG:
1559       return REGNO (x) == REGNO (y);
1560
1561     case MEM:
1562       /* Can't merge two expressions in different alias sets, since we can
1563          decide that the expression is transparent in a block when it isn't,
1564          due to it being set with the different alias set.  */
1565       if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1566         return 0;
1567       break;
1568
1569     /*  For commutative operations, check both orders.  */
1570     case PLUS:
1571     case MULT:
1572     case AND:
1573     case IOR:
1574     case XOR:
1575     case NE:
1576     case EQ:
1577       return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1578                && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1579               || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1580                   && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1581
1582     default:
1583       break;
1584     }
1585
1586   /* Compare the elements.  If any pair of corresponding elements
1587      fail to match, return 0 for the whole thing.  */
1588
1589   fmt = GET_RTX_FORMAT (code);
1590   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1591     {
1592       switch (fmt[i])
1593         {
1594         case 'e':
1595           if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1596             return 0;
1597           break;
1598
1599         case 'E':
1600           if (XVECLEN (x, i) != XVECLEN (y, i))
1601             return 0;
1602           for (j = 0; j < XVECLEN (x, i); j++)
1603             if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1604               return 0;
1605           break;
1606
1607         case 's':
1608           if (strcmp (XSTR (x, i), XSTR (y, i)))
1609             return 0;
1610           break;
1611
1612         case 'i':
1613           if (XINT (x, i) != XINT (y, i))
1614             return 0;
1615           break;
1616
1617         case 'w':
1618           if (XWINT (x, i) != XWINT (y, i))
1619             return 0;
1620         break;
1621
1622         case '0':
1623           break;
1624
1625         default:
1626           abort ();
1627         }
1628       }
1629
1630   return 1;
1631 }
1632
1633 /* Insert expression X in INSN in the hash table.
1634    If it is already present, record it as the last occurrence in INSN's
1635    basic block.
1636
1637    MODE is the mode of the value X is being stored into.
1638    It is only used if X is a CONST_INT.
1639
1640    ANTIC_P is non-zero if X is an anticipatable expression.
1641    AVAIL_P is non-zero if X is an available expression.  */
1642
1643 static void
1644 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1645      rtx x;
1646      enum machine_mode mode;
1647      rtx insn;
1648      int antic_p, avail_p;
1649 {
1650   int found, do_not_record_p;
1651   unsigned int hash;
1652   struct expr *cur_expr, *last_expr = NULL;
1653   struct occr *antic_occr, *avail_occr;
1654   struct occr *last_occr = NULL;
1655
1656   hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1657
1658   /* Do not insert expression in table if it contains volatile operands,
1659      or if hash_expr determines the expression is something we don't want
1660      to or can't handle.  */
1661   if (do_not_record_p)
1662     return;
1663
1664   cur_expr = expr_hash_table[hash];
1665   found = 0;
1666
1667   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1668     {
1669       /* If the expression isn't found, save a pointer to the end of
1670          the list.  */
1671       last_expr = cur_expr;
1672       cur_expr = cur_expr->next_same_hash;
1673     }
1674
1675   if (! found)
1676     {
1677       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1678       bytes_used += sizeof (struct expr);
1679       if (expr_hash_table[hash] == NULL)
1680         /* This is the first pattern that hashed to this index.  */
1681         expr_hash_table[hash] = cur_expr;
1682       else
1683         /* Add EXPR to end of this hash chain.  */
1684         last_expr->next_same_hash = cur_expr;
1685
1686       /* Set the fields of the expr element.  */
1687       cur_expr->expr = x;
1688       cur_expr->bitmap_index = n_exprs++;
1689       cur_expr->next_same_hash = NULL;
1690       cur_expr->antic_occr = NULL;
1691       cur_expr->avail_occr = NULL;
1692     }
1693
1694   /* Now record the occurrence(s).  */
1695   if (antic_p)
1696     {
1697       antic_occr = cur_expr->antic_occr;
1698
1699       /* Search for another occurrence in the same basic block.  */
1700       while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1701         {
1702           /* If an occurrence isn't found, save a pointer to the end of
1703              the list.  */
1704           last_occr = antic_occr;
1705           antic_occr = antic_occr->next;
1706         }
1707
1708       if (antic_occr)
1709         /* Found another instance of the expression in the same basic block.
1710            Prefer the currently recorded one.  We want the first one in the
1711            block and the block is scanned from start to end.  */
1712         ; /* nothing to do */
1713       else
1714         {
1715           /* First occurrence of this expression in this basic block.  */
1716           antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1717           bytes_used += sizeof (struct occr);
1718           /* First occurrence of this expression in any block?  */
1719           if (cur_expr->antic_occr == NULL)
1720             cur_expr->antic_occr = antic_occr;
1721           else
1722             last_occr->next = antic_occr;
1723
1724           antic_occr->insn = insn;
1725           antic_occr->next = NULL;
1726         }
1727     }
1728
1729   if (avail_p)
1730     {
1731       avail_occr = cur_expr->avail_occr;
1732
1733       /* Search for another occurrence in the same basic block.  */
1734       while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1735         {
1736           /* If an occurrence isn't found, save a pointer to the end of
1737              the list.  */
1738           last_occr = avail_occr;
1739           avail_occr = avail_occr->next;
1740         }
1741
1742       if (avail_occr)
1743         /* Found another instance of the expression in the same basic block.
1744            Prefer this occurrence to the currently recorded one.  We want
1745            the last one in the block and the block is scanned from start
1746            to end.  */
1747         avail_occr->insn = insn;
1748       else
1749         {
1750           /* First occurrence of this expression in this basic block.  */
1751           avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1752           bytes_used += sizeof (struct occr);
1753
1754           /* First occurrence of this expression in any block?  */
1755           if (cur_expr->avail_occr == NULL)
1756             cur_expr->avail_occr = avail_occr;
1757           else
1758             last_occr->next = avail_occr;
1759
1760           avail_occr->insn = insn;
1761           avail_occr->next = NULL;
1762         }
1763     }
1764 }
1765
1766 /* Insert pattern X in INSN in the hash table.
1767    X is a SET of a reg to either another reg or a constant.
1768    If it is already present, record it as the last occurrence in INSN's
1769    basic block.  */
1770
1771 static void
1772 insert_set_in_table (x, insn)
1773      rtx x;
1774      rtx insn;
1775 {
1776   int found;
1777   unsigned int hash;
1778   struct expr *cur_expr, *last_expr = NULL;
1779   struct occr *cur_occr, *last_occr = NULL;
1780
1781   if (GET_CODE (x) != SET
1782       || GET_CODE (SET_DEST (x)) != REG)
1783     abort ();
1784
1785   hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1786
1787   cur_expr = set_hash_table[hash];
1788   found = 0;
1789
1790   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1791     {
1792       /* If the expression isn't found, save a pointer to the end of
1793          the list.  */
1794       last_expr = cur_expr;
1795       cur_expr = cur_expr->next_same_hash;
1796     }
1797
1798   if (! found)
1799     {
1800       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1801       bytes_used += sizeof (struct expr);
1802       if (set_hash_table[hash] == NULL)
1803         /* This is the first pattern that hashed to this index.  */
1804         set_hash_table[hash] = cur_expr;
1805       else
1806         /* Add EXPR to end of this hash chain.  */
1807         last_expr->next_same_hash = cur_expr;
1808
1809       /* Set the fields of the expr element.
1810          We must copy X because it can be modified when copy propagation is
1811          performed on its operands.  */
1812       /* ??? Should this go in a different obstack?  */
1813       cur_expr->expr = copy_rtx (x);
1814       cur_expr->bitmap_index = n_sets++;
1815       cur_expr->next_same_hash = NULL;
1816       cur_expr->antic_occr = NULL;
1817       cur_expr->avail_occr = NULL;
1818     }
1819
1820   /* Now record the occurrence.  */
1821   cur_occr = cur_expr->avail_occr;
1822
1823   /* Search for another occurrence in the same basic block.  */
1824   while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1825     {
1826       /* If an occurrence isn't found, save a pointer to the end of
1827          the list.  */
1828       last_occr = cur_occr;
1829       cur_occr = cur_occr->next;
1830     }
1831
1832   if (cur_occr)
1833     /* Found another instance of the expression in the same basic block.
1834        Prefer this occurrence to the currently recorded one.  We want the
1835        last one in the block and the block is scanned from start to end.  */
1836     cur_occr->insn = insn;
1837   else
1838     {
1839       /* First occurrence of this expression in this basic block.  */
1840       cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1841       bytes_used += sizeof (struct occr);
1842
1843       /* First occurrence of this expression in any block?  */
1844       if (cur_expr->avail_occr == NULL)
1845         cur_expr->avail_occr = cur_occr;
1846       else
1847         last_occr->next = cur_occr;
1848
1849       cur_occr->insn = insn;
1850       cur_occr->next = NULL;
1851     }
1852 }
1853
1854 /* Scan pattern PAT of INSN and add an entry to the hash table.  If SET_P is
1855    non-zero, this is for the assignment hash table, otherwise it is for the
1856    expression hash table.  */
1857
1858 static void
1859 hash_scan_set (pat, insn, set_p)
1860      rtx pat, insn;
1861      int set_p;
1862 {
1863   rtx src = SET_SRC (pat);
1864   rtx dest = SET_DEST (pat);
1865
1866   if (GET_CODE (src) == CALL)
1867     hash_scan_call (src, insn);
1868
1869   if (GET_CODE (dest) == REG)
1870     {
1871       int regno = REGNO (dest);
1872       rtx tmp;
1873
1874       /* Only record sets of pseudo-regs in the hash table.  */
1875       if (! set_p
1876           && regno >= FIRST_PSEUDO_REGISTER
1877           /* Don't GCSE something if we can't do a reg/reg copy.  */
1878           && can_copy_p [GET_MODE (dest)]
1879           /* Is SET_SRC something we want to gcse?  */
1880           && want_to_gcse_p (src))
1881         {
1882           /* An expression is not anticipatable if its operands are
1883              modified before this insn.  */
1884           int antic_p = oprs_anticipatable_p (src, insn);
1885           /* An expression is not available if its operands are
1886              subsequently modified, including this insn.  */
1887           int avail_p = oprs_available_p (src, insn);
1888
1889           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1890         }
1891
1892       /* Record sets for constant/copy propagation.  */
1893       else if (set_p
1894                && regno >= FIRST_PSEUDO_REGISTER
1895                && ((GET_CODE (src) == REG
1896                     && REGNO (src) >= FIRST_PSEUDO_REGISTER
1897                     && can_copy_p [GET_MODE (dest)])
1898                    || GET_CODE (src) == CONST_INT
1899                    || GET_CODE (src) == SYMBOL_REF
1900                    || GET_CODE (src) == CONST_DOUBLE)
1901                /* A copy is not available if its src or dest is subsequently
1902                   modified.  Here we want to search from INSN+1 on, but
1903                   oprs_available_p searches from INSN on.  */
1904                && (insn == BLOCK_END (BLOCK_NUM (insn))
1905                    || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1906                        && oprs_available_p (pat, tmp))))
1907         insert_set_in_table (pat, insn);
1908     }
1909 }
1910
1911 static void
1912 hash_scan_clobber (x, insn)
1913      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1914 {
1915   /* Currently nothing to do.  */
1916 }
1917
1918 static void
1919 hash_scan_call (x, insn)
1920      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1921 {
1922   /* Currently nothing to do.  */
1923 }
1924
1925 /* Process INSN and add hash table entries as appropriate.
1926
1927    Only available expressions that set a single pseudo-reg are recorded.
1928
1929    Single sets in a PARALLEL could be handled, but it's an extra complication
1930    that isn't dealt with right now.  The trick is handling the CLOBBERs that
1931    are also in the PARALLEL.  Later.
1932
1933    If SET_P is non-zero, this is for the assignment hash table,
1934    otherwise it is for the expression hash table.
1935    If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
1936    not record any expressions.  */
1937
1938 static void
1939 hash_scan_insn (insn, set_p, in_libcall_block)
1940      rtx insn;
1941      int set_p;
1942      int in_libcall_block;
1943 {
1944   rtx pat = PATTERN (insn);
1945   int i;
1946
1947   /* Pick out the sets of INSN and for other forms of instructions record
1948      what's been modified.  */
1949
1950   if (GET_CODE (pat) == SET && ! in_libcall_block)
1951     {
1952       /* Ignore obvious no-ops.  */
1953       if (SET_SRC (pat) != SET_DEST (pat))
1954         hash_scan_set (pat, insn, set_p);
1955     }
1956   else if (GET_CODE (pat) == PARALLEL)
1957     for (i = 0; i < XVECLEN (pat, 0); i++)
1958       {
1959         rtx x = XVECEXP (pat, 0, i);
1960
1961         if (GET_CODE (x) == SET)
1962           {
1963             if (GET_CODE (SET_SRC (x)) == CALL)
1964               hash_scan_call (SET_SRC (x), insn);
1965           }
1966         else if (GET_CODE (x) == CLOBBER)
1967           hash_scan_clobber (x, insn);
1968         else if (GET_CODE (x) == CALL)
1969           hash_scan_call (x, insn);
1970       }
1971
1972   else if (GET_CODE (pat) == CLOBBER)
1973     hash_scan_clobber (pat, insn);
1974   else if (GET_CODE (pat) == CALL)
1975     hash_scan_call (pat, insn);
1976 }
1977
1978 static void
1979 dump_hash_table (file, name, table, table_size, total_size)
1980      FILE *file;
1981      const char *name;
1982      struct expr **table;
1983      int table_size, total_size;
1984 {
1985   int i;
1986   /* Flattened out table, so it's printed in proper order.  */
1987   struct expr **flat_table;
1988   unsigned int *hash_val;
1989   struct expr *expr;
1990
1991   flat_table
1992     = (struct expr **) xcalloc (total_size, sizeof (struct expr *));
1993   hash_val = (unsigned int *) xmalloc (total_size * sizeof (unsigned int));
1994
1995   for (i = 0; i < table_size; i++)
1996     for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
1997       {
1998         flat_table[expr->bitmap_index] = expr;
1999         hash_val[expr->bitmap_index] = i;
2000       }
2001
2002   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2003            name, table_size, total_size);
2004
2005   for (i = 0; i < total_size; i++)
2006     if (flat_table[i] != 0)
2007       {
2008         expr = flat_table[i];
2009         fprintf (file, "Index %d (hash value %d)\n  ",
2010                  expr->bitmap_index, hash_val[i]);
2011         print_rtl (file, expr->expr);
2012         fprintf (file, "\n");
2013       }
2014
2015   fprintf (file, "\n");
2016
2017   free (flat_table);
2018   free (hash_val);
2019 }
2020
2021 /* Record register first/last/block set information for REGNO in INSN.
2022
2023    reg_first_set records the first place in the block where the register
2024    is set and is used to compute "anticipatability".
2025
2026    reg_last_set records the last place in the block where the register
2027    is set and is used to compute "availability".
2028
2029    reg_set_in_block records whether the register is set in the block
2030    and is used to compute "transparency".  */
2031
2032 static void
2033 record_last_reg_set_info (insn, regno)
2034      rtx insn;
2035      int regno;
2036 {
2037   if (reg_first_set[regno] == NEVER_SET)
2038     reg_first_set[regno] = INSN_CUID (insn);
2039
2040   reg_last_set[regno] = INSN_CUID (insn);
2041   SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2042 }
2043
2044 /* Record memory first/last/block set information for INSN.  */
2045
2046 static void
2047 record_last_mem_set_info (insn)
2048      rtx insn;
2049 {
2050   if (mem_first_set == NEVER_SET)
2051     mem_first_set = INSN_CUID (insn);
2052
2053   mem_last_set = INSN_CUID (insn);
2054   mem_set_in_block[BLOCK_NUM (insn)] = 1;
2055 }
2056
2057 /* Called from compute_hash_table via note_stores to handle one
2058    SET or CLOBBER in an insn.  DATA is really the instruction in which
2059    the SET is taking place.  */
2060
2061 static void
2062 record_last_set_info (dest, setter, data)
2063      rtx dest, setter ATTRIBUTE_UNUSED;
2064      void *data;
2065 {
2066   rtx last_set_insn = (rtx) data;
2067
2068   if (GET_CODE (dest) == SUBREG)
2069     dest = SUBREG_REG (dest);
2070
2071   if (GET_CODE (dest) == REG)
2072     record_last_reg_set_info (last_set_insn, REGNO (dest));
2073   else if (GET_CODE (dest) == MEM
2074            /* Ignore pushes, they clobber nothing.  */
2075            && ! push_operand (dest, GET_MODE (dest)))
2076     record_last_mem_set_info (last_set_insn);
2077 }
2078
2079 /* Top level function to create an expression or assignment hash table.
2080
2081    Expression entries are placed in the hash table if
2082    - they are of the form (set (pseudo-reg) src),
2083    - src is something we want to perform GCSE on,
2084    - none of the operands are subsequently modified in the block
2085
2086    Assignment entries are placed in the hash table if
2087    - they are of the form (set (pseudo-reg) src),
2088    - src is something we want to perform const/copy propagation on,
2089    - none of the operands or target are subsequently modified in the block
2090
2091    Currently src must be a pseudo-reg or a const_int.
2092
2093    F is the first insn.
2094    SET_P is non-zero for computing the assignment hash table.  */
2095
2096 static void
2097 compute_hash_table (set_p)
2098      int set_p;
2099 {
2100   int bb;
2101
2102   /* While we compute the hash table we also compute a bit array of which
2103      registers are set in which blocks.
2104      We also compute which blocks set memory, in the absence of aliasing
2105      support [which is TODO].
2106      ??? This isn't needed during const/copy propagation, but it's cheap to
2107      compute.  Later.  */
2108   sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2109   bzero ((char *) mem_set_in_block, n_basic_blocks);
2110
2111   /* Some working arrays used to track first and last set in each block.  */
2112   /* ??? One could use alloca here, but at some size a threshold is crossed
2113      beyond which one should use malloc.  Are we at that threshold here?  */
2114   reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2115   reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2116
2117   for (bb = 0; bb < n_basic_blocks; bb++)
2118     {
2119       rtx insn;
2120       unsigned int regno;
2121       int in_libcall_block;
2122       unsigned int i;
2123
2124       /* First pass over the instructions records information used to
2125          determine when registers and memory are first and last set.
2126          ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2127          could be moved to compute_sets since they currently don't change.  */
2128
2129       for (i = 0; i < max_gcse_regno; i++)
2130         reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2131
2132       mem_first_set = NEVER_SET;
2133       mem_last_set = NEVER_SET;
2134
2135       for (insn = BLOCK_HEAD (bb);
2136            insn && insn != NEXT_INSN (BLOCK_END (bb));
2137            insn = NEXT_INSN (insn))
2138         {
2139 #ifdef NON_SAVING_SETJMP
2140           if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2141               && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2142             {
2143               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2144                 record_last_reg_set_info (insn, regno);
2145               continue;
2146             }
2147 #endif
2148
2149           if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
2150             continue;
2151
2152           if (GET_CODE (insn) == CALL_INSN)
2153             {
2154               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2155                 if ((call_used_regs[regno]
2156                      && regno != STACK_POINTER_REGNUM
2157 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2158                      && regno != HARD_FRAME_POINTER_REGNUM
2159 #endif
2160 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2161                      && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2162 #endif
2163 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2164                      && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2165 #endif
2166
2167                      && regno != FRAME_POINTER_REGNUM)
2168                     || global_regs[regno])
2169                   record_last_reg_set_info (insn, regno);
2170
2171               if (! CONST_CALL_P (insn))
2172                 record_last_mem_set_info (insn);
2173             }
2174
2175           note_stores (PATTERN (insn), record_last_set_info, insn);
2176         }
2177
2178       /* The next pass builds the hash table.  */
2179
2180       for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2181            insn && insn != NEXT_INSN (BLOCK_END (bb));
2182            insn = NEXT_INSN (insn))
2183         if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
2184           {
2185             if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2186               in_libcall_block = 1;
2187             else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2188               in_libcall_block = 0;
2189             hash_scan_insn (insn, set_p, in_libcall_block);
2190         }
2191     }
2192
2193   free (reg_first_set);
2194   free (reg_last_set);
2195
2196   /* Catch bugs early.  */
2197   reg_first_set = reg_last_set = 0;
2198 }
2199
2200 /* Allocate space for the set hash table.
2201    N_INSNS is the number of instructions in the function.
2202    It is used to determine the number of buckets to use.  */
2203
2204 static void
2205 alloc_set_hash_table (n_insns)
2206      int n_insns;
2207 {
2208   int n;
2209
2210   set_hash_table_size = n_insns / 4;
2211   if (set_hash_table_size < 11)
2212     set_hash_table_size = 11;
2213
2214   /* Attempt to maintain efficient use of hash table.
2215      Making it an odd number is simplest for now.
2216      ??? Later take some measurements.  */
2217   set_hash_table_size |= 1;
2218   n = set_hash_table_size * sizeof (struct expr *);
2219   set_hash_table = (struct expr **) gmalloc (n);
2220 }
2221
2222 /* Free things allocated by alloc_set_hash_table.  */
2223
2224 static void
2225 free_set_hash_table ()
2226 {
2227   free (set_hash_table);
2228 }
2229
2230 /* Compute the hash table for doing copy/const propagation.  */
2231
2232 static void
2233 compute_set_hash_table ()
2234 {
2235   /* Initialize count of number of entries in hash table.  */
2236   n_sets = 0;
2237   bzero ((char *) set_hash_table,
2238          set_hash_table_size * sizeof (struct expr *));
2239
2240   compute_hash_table (1);
2241 }
2242
2243 /* Allocate space for the expression hash table.
2244    N_INSNS is the number of instructions in the function.
2245    It is used to determine the number of buckets to use.  */
2246
2247 static void
2248 alloc_expr_hash_table (n_insns)
2249      unsigned int n_insns;
2250 {
2251   int n;
2252
2253   expr_hash_table_size = n_insns / 2;
2254   /* Make sure the amount is usable.  */
2255   if (expr_hash_table_size < 11)
2256     expr_hash_table_size = 11;
2257
2258   /* Attempt to maintain efficient use of hash table.
2259      Making it an odd number is simplest for now.
2260      ??? Later take some measurements.  */
2261   expr_hash_table_size |= 1;
2262   n = expr_hash_table_size * sizeof (struct expr *);
2263   expr_hash_table = (struct expr **) gmalloc (n);
2264 }
2265
2266 /* Free things allocated by alloc_expr_hash_table.  */
2267
2268 static void
2269 free_expr_hash_table ()
2270 {
2271   free (expr_hash_table);
2272 }
2273
2274 /* Compute the hash table for doing GCSE.  */
2275
2276 static void
2277 compute_expr_hash_table ()
2278 {
2279   /* Initialize count of number of entries in hash table.  */
2280   n_exprs = 0;
2281   bzero ((char *) expr_hash_table,
2282          expr_hash_table_size * sizeof (struct expr *));
2283
2284   compute_hash_table (0);
2285 }
2286 \f
2287 /* Expression tracking support.  */
2288
2289 /* Lookup pattern PAT in the expression table.
2290    The result is a pointer to the table entry, or NULL if not found.  */
2291
2292 static struct expr *
2293 lookup_expr (pat)
2294      rtx pat;
2295 {
2296   int do_not_record_p;
2297   unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2298                                  expr_hash_table_size);
2299   struct expr *expr;
2300
2301   if (do_not_record_p)
2302     return NULL;
2303
2304   expr = expr_hash_table[hash];
2305
2306   while (expr && ! expr_equiv_p (expr->expr, pat))
2307     expr = expr->next_same_hash;
2308
2309   return expr;
2310 }
2311
2312 /* Lookup REGNO in the set table.  If PAT is non-NULL look for the entry that
2313    matches it, otherwise return the first entry for REGNO.  The result is a
2314    pointer to the table entry, or NULL if not found.  */
2315
2316 static struct expr *
2317 lookup_set (regno, pat)
2318      unsigned int regno;
2319      rtx pat;
2320 {
2321   unsigned int hash = hash_set (regno, set_hash_table_size);
2322   struct expr *expr;
2323
2324   expr = set_hash_table[hash];
2325
2326   if (pat)
2327     {
2328       while (expr && ! expr_equiv_p (expr->expr, pat))
2329         expr = expr->next_same_hash;
2330     }
2331   else
2332     {
2333       while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2334         expr = expr->next_same_hash;
2335     }
2336
2337   return expr;
2338 }
2339
2340 /* Return the next entry for REGNO in list EXPR.  */
2341
2342 static struct expr *
2343 next_set (regno, expr)
2344      unsigned int regno;
2345      struct expr *expr;
2346 {
2347   do
2348     expr = expr->next_same_hash;
2349   while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2350
2351   return expr;
2352 }
2353
2354 /* Reset tables used to keep track of what's still available [since the
2355    start of the block].  */
2356
2357 static void
2358 reset_opr_set_tables ()
2359 {
2360   /* Maintain a bitmap of which regs have been set since beginning of
2361      the block.  */
2362   sbitmap_zero (reg_set_bitmap);
2363
2364   /* Also keep a record of the last instruction to modify memory.
2365      For now this is very trivial, we only record whether any memory
2366      location has been modified.  */
2367   mem_last_set = 0;
2368 }
2369
2370 /* Return non-zero if the operands of X are not set before INSN in
2371    INSN's basic block.  */
2372
2373 static int
2374 oprs_not_set_p (x, insn)
2375      rtx x, insn;
2376 {
2377   int i, j;
2378   enum rtx_code code;
2379   const char *fmt;
2380
2381   if (x == 0)
2382     return 1;
2383
2384   code = GET_CODE (x);
2385   switch (code)
2386     {
2387     case PC:
2388     case CC0:
2389     case CONST:
2390     case CONST_INT:
2391     case CONST_DOUBLE:
2392     case SYMBOL_REF:
2393     case LABEL_REF:
2394     case ADDR_VEC:
2395     case ADDR_DIFF_VEC:
2396       return 1;
2397
2398     case MEM:
2399       if (mem_last_set != 0)
2400         return 0;
2401       else
2402         return oprs_not_set_p (XEXP (x, 0), insn);
2403
2404     case REG:
2405       return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2406
2407     default:
2408       break;
2409     }
2410
2411   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2412     {
2413       if (fmt[i] == 'e')
2414         {
2415           /* If we are about to do the last recursive call
2416              needed at this level, change it into iteration.
2417              This function is called enough to be worth it.  */
2418           if (i == 0)
2419             return oprs_not_set_p (XEXP (x, i), insn);
2420
2421           if (! oprs_not_set_p (XEXP (x, i), insn))
2422             return 0;
2423         }
2424       else if (fmt[i] == 'E')
2425         for (j = 0; j < XVECLEN (x, i); j++)
2426           if (! oprs_not_set_p (XVECEXP (x, i, j), insn))
2427             return 0;
2428     }
2429
2430   return 1;
2431 }
2432
2433 /* Mark things set by a CALL.  */
2434
2435 static void
2436 mark_call (insn)
2437      rtx insn;
2438 {
2439   mem_last_set = INSN_CUID (insn);
2440 }
2441
2442 /* Mark things set by a SET.  */
2443
2444 static void
2445 mark_set (pat, insn)
2446      rtx pat, insn;
2447 {
2448   rtx dest = SET_DEST (pat);
2449
2450   while (GET_CODE (dest) == SUBREG
2451          || GET_CODE (dest) == ZERO_EXTRACT
2452          || GET_CODE (dest) == SIGN_EXTRACT
2453          || GET_CODE (dest) == STRICT_LOW_PART)
2454     dest = XEXP (dest, 0);
2455
2456   if (GET_CODE (dest) == REG)
2457     SET_BIT (reg_set_bitmap, REGNO (dest));
2458   else if (GET_CODE (dest) == MEM)
2459     mem_last_set = INSN_CUID (insn);
2460
2461   if (GET_CODE (SET_SRC (pat)) == CALL)
2462     mark_call (insn);
2463 }
2464
2465 /* Record things set by a CLOBBER.  */
2466
2467 static void
2468 mark_clobber (pat, insn)
2469      rtx pat, insn;
2470 {
2471   rtx clob = XEXP (pat, 0);
2472
2473   while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2474     clob = XEXP (clob, 0);
2475
2476   if (GET_CODE (clob) == REG)
2477     SET_BIT (reg_set_bitmap, REGNO (clob));
2478   else
2479     mem_last_set = INSN_CUID (insn);
2480 }
2481
2482 /* Record things set by INSN.
2483    This data is used by oprs_not_set_p.  */
2484
2485 static void
2486 mark_oprs_set (insn)
2487      rtx insn;
2488 {
2489   rtx pat = PATTERN (insn);
2490   int i;
2491
2492   if (GET_CODE (pat) == SET)
2493     mark_set (pat, insn);
2494   else if (GET_CODE (pat) == PARALLEL)
2495     for (i = 0; i < XVECLEN (pat, 0); i++)
2496       {
2497         rtx x = XVECEXP (pat, 0, i);
2498
2499         if (GET_CODE (x) == SET)
2500           mark_set (x, insn);
2501         else if (GET_CODE (x) == CLOBBER)
2502           mark_clobber (x, insn);
2503         else if (GET_CODE (x) == CALL)
2504           mark_call (insn);
2505       }
2506
2507   else if (GET_CODE (pat) == CLOBBER)
2508     mark_clobber (pat, insn);
2509   else if (GET_CODE (pat) == CALL)
2510     mark_call (insn);
2511 }
2512
2513 \f
2514 /* Classic GCSE reaching definition support.  */
2515
2516 /* Allocate reaching def variables.  */
2517
2518 static void
2519 alloc_rd_mem (n_blocks, n_insns)
2520      int n_blocks, n_insns;
2521 {
2522   rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2523   sbitmap_vector_zero (rd_kill, n_basic_blocks);
2524
2525   rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2526   sbitmap_vector_zero (rd_gen, n_basic_blocks);
2527
2528   reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2529   sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2530
2531   rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2532   sbitmap_vector_zero (rd_out, n_basic_blocks);
2533 }
2534
2535 /* Free reaching def variables.  */
2536
2537 static void
2538 free_rd_mem ()
2539 {
2540   free (rd_kill);
2541   free (rd_gen);
2542   free (reaching_defs);
2543   free (rd_out);
2544 }
2545
2546 /* Add INSN to the kills of BB.  REGNO, set in BB, is killed by INSN.  */
2547
2548 static void
2549 handle_rd_kill_set (insn, regno, bb)
2550      rtx insn;
2551      int regno, bb;
2552 {
2553   struct reg_set *this_reg;
2554
2555   for (this_reg = reg_set_table[regno]; this_reg; this_reg = this_reg ->next)
2556     if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2557       SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2558 }
2559
2560 /* Compute the set of kill's for reaching definitions.  */
2561
2562 static void
2563 compute_kill_rd ()
2564 {
2565   int bb, cuid;
2566   int regno, i;
2567
2568   /* For each block
2569        For each set bit in `gen' of the block (i.e each insn which
2570            generates a definition in the block)
2571          Call the reg set by the insn corresponding to that bit regx
2572          Look at the linked list starting at reg_set_table[regx]
2573          For each setting of regx in the linked list, which is not in
2574              this block
2575            Set the bit in `kill' corresponding to that insn.   */
2576   for (bb = 0; bb < n_basic_blocks; bb++)
2577     for (cuid = 0; cuid < max_cuid; cuid++)
2578       if (TEST_BIT (rd_gen[bb], cuid))
2579         {
2580           rtx insn = CUID_INSN (cuid);
2581           rtx pat = PATTERN (insn);
2582
2583           if (GET_CODE (insn) == CALL_INSN)
2584             {
2585               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2586                 {
2587                   if ((call_used_regs[regno]
2588                        && regno != STACK_POINTER_REGNUM
2589 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2590                        && regno != HARD_FRAME_POINTER_REGNUM
2591 #endif
2592 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2593                        && ! (regno == ARG_POINTER_REGNUM
2594                              && fixed_regs[regno])
2595 #endif
2596 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2597                        && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2598 #endif
2599                        && regno != FRAME_POINTER_REGNUM)
2600                       || global_regs[regno])
2601                     handle_rd_kill_set (insn, regno, bb);
2602                 }
2603             }
2604
2605           if (GET_CODE (pat) == PARALLEL)
2606             {
2607               for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2608                 {
2609                   enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2610
2611                   if ((code == SET || code == CLOBBER)
2612                       && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2613                     handle_rd_kill_set (insn,
2614                                         REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2615                                         bb);
2616                 }
2617             }
2618           else if (GET_CODE (pat) == SET && GET_CODE (SET_DEST (pat)) == REG)
2619             /* Each setting of this register outside of this block
2620                must be marked in the set of kills in this block.  */
2621             handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2622         }
2623 }
2624
2625 /* Compute the reaching definitions as in
2626    Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2627    Chapter 10.  It is the same algorithm as used for computing available
2628    expressions but applied to the gens and kills of reaching definitions.  */
2629
2630 static void
2631 compute_rd ()
2632 {
2633   int bb, changed, passes;
2634
2635   for (bb = 0; bb < n_basic_blocks; bb++)
2636     sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2637
2638   passes = 0;
2639   changed = 1;
2640   while (changed)
2641     {
2642       changed = 0;
2643       for (bb = 0; bb < n_basic_blocks; bb++)
2644         {
2645           sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
2646           changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2647                                             reaching_defs[bb], rd_kill[bb]);
2648         }
2649       passes++;
2650     }
2651
2652   if (gcse_file)
2653     fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2654 }
2655 \f
2656 /* Classic GCSE available expression support.  */
2657
2658 /* Allocate memory for available expression computation.  */
2659
2660 static void
2661 alloc_avail_expr_mem (n_blocks, n_exprs)
2662      int n_blocks, n_exprs;
2663 {
2664   ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2665   sbitmap_vector_zero (ae_kill, n_basic_blocks);
2666
2667   ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2668   sbitmap_vector_zero (ae_gen, n_basic_blocks);
2669
2670   ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2671   sbitmap_vector_zero (ae_in, n_basic_blocks);
2672
2673   ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2674   sbitmap_vector_zero (ae_out, n_basic_blocks);
2675
2676   u_bitmap = (sbitmap) sbitmap_alloc (n_exprs);
2677   sbitmap_ones (u_bitmap);
2678 }
2679
2680 static void
2681 free_avail_expr_mem ()
2682 {
2683   free (ae_kill);
2684   free (ae_gen);
2685   free (ae_in);
2686   free (ae_out);
2687   free (u_bitmap);
2688 }
2689
2690 /* Compute the set of available expressions generated in each basic block.  */
2691
2692 static void
2693 compute_ae_gen ()
2694 {
2695   unsigned int i;
2696   struct expr *expr;
2697   struct occr *occr;
2698
2699   /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2700      This is all we have to do because an expression is not recorded if it
2701      is not available, and the only expressions we want to work with are the
2702      ones that are recorded.  */
2703   for (i = 0; i < expr_hash_table_size; i++)
2704     for (expr = expr_hash_table[i]; expr != 0; expr = expr->next_same_hash)
2705       for (occr = expr->avail_occr; occr != 0; occr = occr->next)
2706         SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2707 }
2708
2709 /* Return non-zero if expression X is killed in BB.  */
2710
2711 static int
2712 expr_killed_p (x, bb)
2713      rtx x;
2714      int bb;
2715 {
2716   int i, j;
2717   enum rtx_code code;
2718   const char *fmt;
2719
2720   if (x == 0)
2721     return 1;
2722
2723   code = GET_CODE (x);
2724   switch (code)
2725     {
2726     case REG:
2727       return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2728
2729     case MEM:
2730       if (mem_set_in_block[bb])
2731         return 1;
2732       else
2733         return expr_killed_p (XEXP (x, 0), bb);
2734
2735     case PC:
2736     case CC0: /*FIXME*/
2737     case CONST:
2738     case CONST_INT:
2739     case CONST_DOUBLE:
2740     case SYMBOL_REF:
2741     case LABEL_REF:
2742     case ADDR_VEC:
2743     case ADDR_DIFF_VEC:
2744       return 0;
2745
2746     default:
2747       break;
2748     }
2749
2750   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2751     {
2752       if (fmt[i] == 'e')
2753         {
2754           /* If we are about to do the last recursive call
2755              needed at this level, change it into iteration.
2756              This function is called enough to be worth it.  */
2757           if (i == 0)
2758             return expr_killed_p (XEXP (x, i), bb);
2759           else if (expr_killed_p (XEXP (x, i), bb))
2760             return 1;
2761         }
2762       else if (fmt[i] == 'E')
2763         for (j = 0; j < XVECLEN (x, i); j++)
2764           if (expr_killed_p (XVECEXP (x, i, j), bb))
2765             return 1;
2766     }
2767
2768   return 0;
2769 }
2770
2771 /* Compute the set of available expressions killed in each basic block.  */
2772
2773 static void
2774 compute_ae_kill (ae_gen, ae_kill)
2775      sbitmap *ae_gen, *ae_kill;
2776 {
2777   int bb;
2778   unsigned int i;
2779   struct expr *expr;
2780
2781   for (bb = 0; bb < n_basic_blocks; bb++)
2782     for (i = 0; i < expr_hash_table_size; i++)
2783       for (expr = expr_hash_table[i]; expr; expr = expr->next_same_hash)
2784         {
2785           /* Skip EXPR if generated in this block.  */
2786           if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2787             continue;
2788
2789           if (expr_killed_p (expr->expr, bb))
2790             SET_BIT (ae_kill[bb], expr->bitmap_index);
2791         }
2792 }
2793 \f
2794 /* Actually perform the Classic GCSE optimizations.  */
2795
2796 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2797
2798    CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2799    as a positive reach.  We want to do this when there are two computations
2800    of the expression in the block.
2801
2802    VISITED is a pointer to a working buffer for tracking which BB's have
2803    been visited.  It is NULL for the top-level call.
2804
2805    We treat reaching expressions that go through blocks containing the same
2806    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2807    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2808    2 as not reaching.  The intent is to improve the probability of finding
2809    only one reaching expression and to reduce register lifetimes by picking
2810    the closest such expression.  */
2811
2812 static int
2813 expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
2814      struct occr *occr;
2815      struct expr *expr;
2816      int bb;
2817      int check_self_loop;
2818      char *visited;
2819 {
2820   edge pred;
2821
2822   for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
2823     {
2824       int pred_bb = pred->src->index;
2825
2826       if (visited[pred_bb])
2827         /* This predecessor has already been visited. Nothing to do.  */
2828           ;
2829       else if (pred_bb == bb)
2830         {
2831           /* BB loops on itself.  */
2832           if (check_self_loop
2833               && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2834               && BLOCK_NUM (occr->insn) == pred_bb)
2835             return 1;
2836
2837           visited[pred_bb] = 1;
2838         }
2839
2840       /* Ignore this predecessor if it kills the expression.  */
2841       else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2842         visited[pred_bb] = 1;
2843
2844       /* Does this predecessor generate this expression?  */
2845       else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2846         {
2847           /* Is this the occurrence we're looking for?
2848              Note that there's only one generating occurrence per block
2849              so we just need to check the block number.  */
2850           if (BLOCK_NUM (occr->insn) == pred_bb)
2851             return 1;
2852
2853           visited[pred_bb] = 1;
2854         }
2855
2856       /* Neither gen nor kill.  */
2857       else
2858         {
2859           visited[pred_bb] = 1;
2860           if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
2861               visited))
2862
2863             return 1;
2864         }
2865     }
2866
2867   /* All paths have been checked.  */
2868   return 0;
2869 }
2870
2871 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
2872    memory allocated for that function is returned. */
2873
2874 static int
2875 expr_reaches_here_p (occr, expr, bb, check_self_loop)
2876      struct occr *occr;
2877      struct expr *expr;
2878      int bb;
2879      int check_self_loop;
2880 {
2881   int rval;
2882   char *visited = (char *) xcalloc (n_basic_blocks, 1);
2883
2884   rval = expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited);
2885
2886   free (visited);
2887   return rval;
2888 }
2889
2890 /* Return the instruction that computes EXPR that reaches INSN's basic block.
2891    If there is more than one such instruction, return NULL.
2892
2893    Called only by handle_avail_expr.  */
2894
2895 static rtx
2896 computing_insn (expr, insn)
2897      struct expr *expr;
2898      rtx insn;
2899 {
2900   int bb = BLOCK_NUM (insn);
2901
2902   if (expr->avail_occr->next == NULL)
2903     {
2904       if (BLOCK_NUM (expr->avail_occr->insn) == bb)
2905         /* The available expression is actually itself
2906            (i.e. a loop in the flow graph) so do nothing.  */
2907         return NULL;
2908
2909       /* (FIXME) Case that we found a pattern that was created by
2910          a substitution that took place.  */
2911       return expr->avail_occr->insn;
2912     }
2913   else
2914     {
2915       /* Pattern is computed more than once.
2916          Search backwards from this insn to see how many of these
2917          computations actually reach this insn.  */
2918       struct occr *occr;
2919       rtx insn_computes_expr = NULL;
2920       int can_reach = 0;
2921
2922       for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
2923         {
2924           if (BLOCK_NUM (occr->insn) == bb)
2925             {
2926               /* The expression is generated in this block.
2927                  The only time we care about this is when the expression
2928                  is generated later in the block [and thus there's a loop].
2929                  We let the normal cse pass handle the other cases.  */
2930               if (INSN_CUID (insn) < INSN_CUID (occr->insn)
2931                   && expr_reaches_here_p (occr, expr, bb, 1))
2932                 {
2933                   can_reach++;
2934                   if (can_reach > 1)
2935                     return NULL;
2936
2937                   insn_computes_expr = occr->insn;
2938                 }
2939             }
2940           else if (expr_reaches_here_p (occr, expr, bb, 0))
2941             {
2942               can_reach++;
2943               if (can_reach > 1)
2944                 return NULL;
2945
2946               insn_computes_expr = occr->insn;
2947             }
2948         }
2949
2950       if (insn_computes_expr == NULL)
2951         abort ();
2952
2953       return insn_computes_expr;
2954     }
2955 }
2956
2957 /* Return non-zero if the definition in DEF_INSN can reach INSN.
2958    Only called by can_disregard_other_sets.  */
2959
2960 static int
2961 def_reaches_here_p (insn, def_insn)
2962      rtx insn, def_insn;
2963 {
2964   rtx reg;
2965
2966   if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
2967     return 1;
2968
2969   if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
2970     {
2971       if (INSN_CUID (def_insn) < INSN_CUID (insn))
2972         {
2973           if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
2974             return 1;
2975           else if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
2976             reg = XEXP (PATTERN (def_insn), 0);
2977           else if (GET_CODE (PATTERN (def_insn)) == SET)
2978             reg = SET_DEST (PATTERN (def_insn));
2979           else
2980             abort ();
2981
2982           return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
2983         }
2984       else
2985         return 0;
2986     }
2987
2988   return 0;
2989 }
2990
2991 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN.  The
2992    value returned is the number of definitions that reach INSN.  Returning a
2993    value of zero means that [maybe] more than one definition reaches INSN and
2994    the caller can't perform whatever optimization it is trying.  i.e. it is
2995    always safe to return zero.  */
2996
2997 static int
2998 can_disregard_other_sets (addr_this_reg, insn, for_combine)
2999      struct reg_set **addr_this_reg;
3000      rtx insn;
3001      int for_combine;
3002 {
3003   int number_of_reaching_defs = 0;
3004   struct reg_set *this_reg;
3005
3006   for (this_reg = *addr_this_reg; this_reg != 0; this_reg = this_reg->next)
3007     if (def_reaches_here_p (insn, this_reg->insn))
3008       {
3009         number_of_reaching_defs++;
3010         /* Ignore parallels for now.  */
3011         if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3012           return 0;
3013
3014         if (!for_combine
3015             && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3016                 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3017                                   SET_SRC (PATTERN (insn)))))
3018           /* A setting of the reg to a different value reaches INSN.  */
3019           return 0;
3020
3021         if (number_of_reaching_defs > 1)
3022           {
3023             /* If in this setting the value the register is being set to is
3024                equal to the previous value the register was set to and this
3025                setting reaches the insn we are trying to do the substitution
3026                on then we are ok.  */
3027             if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3028               return 0;
3029             else if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3030                                     SET_SRC (PATTERN (insn))))
3031               return 0;
3032           }
3033
3034         *addr_this_reg = this_reg;
3035       }
3036
3037   return number_of_reaching_defs;
3038 }
3039
3040 /* Expression computed by insn is available and the substitution is legal,
3041    so try to perform the substitution.
3042
3043    The result is non-zero if any changes were made.  */
3044
3045 static int
3046 handle_avail_expr (insn, expr)
3047      rtx insn;
3048      struct expr *expr;
3049 {
3050   rtx pat, insn_computes_expr;
3051   rtx to;
3052   struct reg_set *this_reg;
3053   int found_setting, use_src;
3054   int changed = 0;
3055
3056   /* We only handle the case where one computation of the expression
3057      reaches this instruction.  */
3058   insn_computes_expr = computing_insn (expr, insn);
3059   if (insn_computes_expr == NULL)
3060     return 0;
3061
3062   found_setting = 0;
3063   use_src = 0;
3064
3065   /* At this point we know only one computation of EXPR outside of this
3066      block reaches this insn.  Now try to find a register that the
3067      expression is computed into.  */
3068   if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3069     {
3070       /* This is the case when the available expression that reaches
3071          here has already been handled as an available expression.  */
3072       unsigned int regnum_for_replacing
3073         = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3074
3075       /* If the register was created by GCSE we can't use `reg_set_table',
3076          however we know it's set only once.  */
3077       if (regnum_for_replacing >= max_gcse_regno
3078           /* If the register the expression is computed into is set only once,
3079              or only one set reaches this insn, we can use it.  */
3080           || (((this_reg = reg_set_table[regnum_for_replacing]),
3081                this_reg->next == NULL)
3082               || can_disregard_other_sets (&this_reg, insn, 0)))
3083        {
3084          use_src = 1;
3085          found_setting = 1;
3086        }
3087     }
3088
3089   if (!found_setting)
3090     {
3091       unsigned int regnum_for_replacing
3092         = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3093
3094       /* This shouldn't happen.  */
3095       if (regnum_for_replacing >= max_gcse_regno)
3096         abort ();
3097
3098       this_reg = reg_set_table[regnum_for_replacing];
3099
3100       /* If the register the expression is computed into is set only once,
3101          or only one set reaches this insn, use it.  */
3102       if (this_reg->next == NULL
3103           || can_disregard_other_sets (&this_reg, insn, 0))
3104         found_setting = 1;
3105     }
3106
3107   if (found_setting)
3108     {
3109       pat = PATTERN (insn);
3110       if (use_src)
3111         to = SET_SRC (PATTERN (insn_computes_expr));
3112       else
3113         to = SET_DEST (PATTERN (insn_computes_expr));
3114       changed = validate_change (insn, &SET_SRC (pat), to, 0);
3115
3116       /* We should be able to ignore the return code from validate_change but
3117          to play it safe we check.  */
3118       if (changed)
3119         {
3120           gcse_subst_count++;
3121           if (gcse_file != NULL)
3122             {
3123               fprintf (gcse_file, "GCSE: Replacing the source in insn %d with",
3124                        INSN_UID (insn));
3125               fprintf (gcse_file, " reg %d %s insn %d\n",
3126                        REGNO (to), use_src ? "from" : "set in",
3127                        INSN_UID (insn_computes_expr));
3128             }
3129         }
3130     }
3131
3132   /* The register that the expr is computed into is set more than once.  */
3133   else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3134     {
3135       /* Insert an insn after insnx that copies the reg set in insnx
3136          into a new pseudo register call this new register REGN.
3137          From insnb until end of basic block or until REGB is set
3138          replace all uses of REGB with REGN.  */
3139       rtx new_insn;
3140
3141       to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3142
3143       /* Generate the new insn.  */
3144       /* ??? If the change fails, we return 0, even though we created
3145          an insn.  I think this is ok.  */
3146       new_insn
3147         = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3148                                         SET_DEST (PATTERN
3149                                                   (insn_computes_expr))),
3150                            insn_computes_expr);
3151
3152       /* Keep block number table up to date.  */
3153       set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3154
3155       /* Keep register set table up to date.  */
3156       record_one_set (REGNO (to), new_insn);
3157
3158       gcse_create_count++;
3159       if (gcse_file != NULL)
3160         {
3161           fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d",
3162                    INSN_UID (NEXT_INSN (insn_computes_expr)),
3163                    REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))));
3164           fprintf (gcse_file, ", computed in insn %d,\n",
3165                    INSN_UID (insn_computes_expr));
3166           fprintf (gcse_file, "      into newly allocated reg %d\n",
3167                    REGNO (to));
3168         }
3169
3170       pat = PATTERN (insn);
3171
3172       /* Do register replacement for INSN.  */
3173       changed = validate_change (insn, &SET_SRC (pat),
3174                                  SET_DEST (PATTERN
3175                                            (NEXT_INSN (insn_computes_expr))),
3176                                  0);
3177
3178       /* We should be able to ignore the return code from validate_change but
3179          to play it safe we check.  */
3180       if (changed)
3181         {
3182           gcse_subst_count++;
3183           if (gcse_file != NULL)
3184             {
3185               fprintf (gcse_file,
3186                        "GCSE: Replacing the source in insn %d with reg %d ",
3187                        INSN_UID (insn),
3188                        REGNO (SET_DEST (PATTERN (NEXT_INSN
3189                                                  (insn_computes_expr)))));
3190               fprintf (gcse_file, "set in insn %d\n",
3191                        INSN_UID (insn_computes_expr));
3192             }
3193         }
3194     }
3195
3196   return changed;
3197 }
3198
3199 /* Perform classic GCSE.  This is called by one_classic_gcse_pass after all
3200    the dataflow analysis has been done.
3201
3202    The result is non-zero if a change was made.  */
3203
3204 static int
3205 classic_gcse ()
3206 {
3207   int bb, changed;
3208   rtx insn;
3209
3210   /* Note we start at block 1.  */
3211
3212   changed = 0;
3213   for (bb = 1; bb < n_basic_blocks; bb++)
3214     {
3215       /* Reset tables used to keep track of what's still valid [since the
3216          start of the block].  */
3217       reset_opr_set_tables ();
3218
3219       for (insn = BLOCK_HEAD (bb);
3220            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3221            insn = NEXT_INSN (insn))
3222         {
3223           /* Is insn of form (set (pseudo-reg) ...)?  */
3224           if (GET_CODE (insn) == INSN
3225               && GET_CODE (PATTERN (insn)) == SET
3226               && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3227               && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3228             {
3229               rtx pat = PATTERN (insn);
3230               rtx src = SET_SRC (pat);
3231               struct expr *expr;
3232
3233               if (want_to_gcse_p (src)
3234                   /* Is the expression recorded?  */
3235                   && ((expr = lookup_expr (src)) != NULL)
3236                   /* Is the expression available [at the start of the
3237                      block]?  */
3238                   && TEST_BIT (ae_in[bb], expr->bitmap_index)
3239                   /* Are the operands unchanged since the start of the
3240                      block?  */
3241                   && oprs_not_set_p (src, insn))
3242                 changed |= handle_avail_expr (insn, expr);
3243             }
3244
3245           /* Keep track of everything modified by this insn.  */
3246           /* ??? Need to be careful w.r.t. mods done to INSN.  */
3247           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3248             mark_oprs_set (insn);
3249         }
3250     }
3251
3252   return changed;
3253 }
3254
3255 /* Top level routine to perform one classic GCSE pass.
3256
3257    Return non-zero if a change was made.  */
3258
3259 static int
3260 one_classic_gcse_pass (pass)
3261      int pass;
3262 {
3263   int changed = 0;
3264
3265   gcse_subst_count = 0;
3266   gcse_create_count = 0;
3267
3268   alloc_expr_hash_table (max_cuid);
3269   alloc_rd_mem (n_basic_blocks, max_cuid);
3270   compute_expr_hash_table ();
3271   if (gcse_file)
3272     dump_hash_table (gcse_file, "Expression", expr_hash_table,
3273                      expr_hash_table_size, n_exprs);
3274
3275   if (n_exprs > 0)
3276     {
3277       compute_kill_rd ();
3278       compute_rd ();
3279       alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3280       compute_ae_gen ();
3281       compute_ae_kill (ae_gen, ae_kill);
3282       compute_available (ae_gen, ae_kill, ae_out, ae_in);
3283       changed = classic_gcse ();
3284       free_avail_expr_mem ();
3285     }
3286
3287   free_rd_mem ();
3288   free_expr_hash_table ();
3289
3290   if (gcse_file)
3291     {
3292       fprintf (gcse_file, "\n");
3293       fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs,",
3294                current_function_name, pass, bytes_used, gcse_subst_count);
3295       fprintf (gcse_file, "%d insns created\n", gcse_create_count);
3296     }
3297
3298   return changed;
3299 }
3300 \f
3301 /* Compute copy/constant propagation working variables.  */
3302
3303 /* Local properties of assignments.  */
3304 static sbitmap *cprop_pavloc;
3305 static sbitmap *cprop_absaltered;
3306
3307 /* Global properties of assignments (computed from the local properties).  */
3308 static sbitmap *cprop_avin;
3309 static sbitmap *cprop_avout;
3310
3311 /* Allocate vars used for copy/const propagation.  N_BLOCKS is the number of
3312    basic blocks.  N_SETS is the number of sets.  */
3313
3314 static void
3315 alloc_cprop_mem (n_blocks, n_sets)
3316      int n_blocks, n_sets;
3317 {
3318   cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3319   cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3320
3321   cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3322   cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3323 }
3324
3325 /* Free vars used by copy/const propagation.  */
3326
3327 static void
3328 free_cprop_mem ()
3329 {
3330   free (cprop_pavloc);
3331   free (cprop_absaltered);
3332   free (cprop_avin);
3333   free (cprop_avout);
3334 }
3335
3336 /* For each block, compute whether X is transparent.  X is either an
3337    expression or an assignment [though we don't care which, for this context
3338    an assignment is treated as an expression].  For each block where an
3339    element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
3340    bit in BMAP.  */
3341
3342 static void
3343 compute_transp (x, indx, bmap, set_p)
3344      rtx x;
3345      int indx;
3346      sbitmap *bmap;
3347      int set_p;
3348 {
3349   int bb, i, j;
3350   enum rtx_code code;
3351   reg_set *r;
3352   const char *fmt;
3353
3354   /* repeat is used to turn tail-recursion into iteration since GCC
3355      can't do it when there's no return value.  */
3356  repeat:
3357
3358   if (x == 0)
3359     return;
3360
3361   code = GET_CODE (x);
3362   switch (code)
3363     {
3364     case REG:
3365       if (set_p)
3366         {
3367           if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3368             {
3369               for (bb = 0; bb < n_basic_blocks; bb++)
3370                 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3371                   SET_BIT (bmap[bb], indx);
3372             }
3373           else
3374             {
3375               for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3376                 SET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3377             }
3378         }
3379       else
3380         {
3381           if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3382             {
3383               for (bb = 0; bb < n_basic_blocks; bb++)
3384                 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3385                   RESET_BIT (bmap[bb], indx);
3386             }
3387           else
3388             {
3389               for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3390                 RESET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3391             }
3392         }
3393
3394       return;
3395
3396     case MEM:
3397       if (set_p)
3398         {
3399           for (bb = 0; bb < n_basic_blocks; bb++)
3400             if (mem_set_in_block[bb])
3401               SET_BIT (bmap[bb], indx);
3402         }
3403       else
3404         {
3405           for (bb = 0; bb < n_basic_blocks; bb++)
3406             if (mem_set_in_block[bb])
3407               RESET_BIT (bmap[bb], indx);
3408         }
3409
3410       x = XEXP (x, 0);
3411       goto repeat;
3412
3413     case PC:
3414     case CC0: /*FIXME*/
3415     case CONST:
3416     case CONST_INT:
3417     case CONST_DOUBLE:
3418     case SYMBOL_REF:
3419     case LABEL_REF:
3420     case ADDR_VEC:
3421     case ADDR_DIFF_VEC:
3422       return;
3423
3424     default:
3425       break;
3426     }
3427
3428   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3429     {
3430       if (fmt[i] == 'e')
3431         {
3432           /* If we are about to do the last recursive call
3433              needed at this level, change it into iteration.
3434              This function is called enough to be worth it.  */
3435           if (i == 0)
3436             {
3437               x = XEXP (x, i);
3438               goto repeat;
3439             }
3440
3441           compute_transp (XEXP (x, i), indx, bmap, set_p);
3442         }
3443       else if (fmt[i] == 'E')
3444         for (j = 0; j < XVECLEN (x, i); j++)
3445           compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3446     }
3447 }
3448
3449 /* Top level routine to do the dataflow analysis needed by copy/const
3450    propagation.  */
3451
3452 static void
3453 compute_cprop_data ()
3454 {
3455   compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
3456   compute_available (cprop_pavloc, cprop_absaltered,
3457                      cprop_avout, cprop_avin);
3458 }
3459 \f
3460 /* Copy/constant propagation.  */
3461
3462 /* Maximum number of register uses in an insn that we handle.  */
3463 #define MAX_USES 8
3464
3465 /* Table of uses found in an insn.
3466    Allocated statically to avoid alloc/free complexity and overhead.  */
3467 static struct reg_use reg_use_table[MAX_USES];
3468
3469 /* Index into `reg_use_table' while building it.  */
3470 static int reg_use_count;
3471
3472 /* Set up a list of register numbers used in INSN.  The found uses are stored
3473    in `reg_use_table'.  `reg_use_count' is initialized to zero before entry,
3474    and contains the number of uses in the table upon exit.
3475
3476    ??? If a register appears multiple times we will record it multiple times.
3477    This doesn't hurt anything but it will slow things down.  */
3478
3479 static void
3480 find_used_regs (x)
3481      rtx x;
3482 {
3483   int i, j;
3484   enum rtx_code code;
3485   const char *fmt;
3486
3487   /* repeat is used to turn tail-recursion into iteration since GCC
3488      can't do it when there's no return value.  */
3489  repeat:
3490
3491   if (x == 0)
3492     return;
3493
3494   code = GET_CODE (x);
3495   switch (code)
3496     {
3497     case REG:
3498       if (reg_use_count == MAX_USES)
3499         return;
3500
3501       reg_use_table[reg_use_count].reg_rtx = x;
3502       reg_use_count++;
3503       return;
3504
3505     case MEM:
3506       x = XEXP (x, 0);
3507       goto repeat;
3508
3509     case PC:
3510     case CC0:
3511     case CONST:
3512     case CONST_INT:
3513     case CONST_DOUBLE:
3514     case SYMBOL_REF:
3515     case LABEL_REF:
3516     case CLOBBER:
3517     case ADDR_VEC:
3518     case ADDR_DIFF_VEC:
3519     case ASM_INPUT: /*FIXME*/
3520       return;
3521
3522     case SET:
3523       if (GET_CODE (SET_DEST (x)) == MEM)
3524         find_used_regs (SET_DEST (x));
3525       x = SET_SRC (x);
3526       goto repeat;
3527
3528     default:
3529       break;
3530     }
3531
3532   /* Recursively scan the operands of this expression.  */
3533
3534   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3535     {
3536       if (fmt[i] == 'e')
3537         {
3538           /* If we are about to do the last recursive call
3539              needed at this level, change it into iteration.
3540              This function is called enough to be worth it.  */
3541           if (i == 0)
3542             {
3543               x = XEXP (x, 0);
3544               goto repeat;
3545             }
3546
3547           find_used_regs (XEXP (x, i));
3548         }
3549       else if (fmt[i] == 'E')
3550         for (j = 0; j < XVECLEN (x, i); j++)
3551           find_used_regs (XVECEXP (x, i, j));
3552     }
3553 }
3554
3555 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3556    Returns non-zero is successful.  */
3557
3558 static int
3559 try_replace_reg (from, to, insn)
3560      rtx from, to, insn;
3561 {
3562   rtx note;
3563   rtx src;
3564   int success;
3565   rtx set;
3566
3567   note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3568
3569   if (!note)
3570     note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3571
3572   /* If this fails we could try to simplify the result of the
3573      replacement and attempt to recognize the simplified insn.
3574
3575      But we need a general simplify_rtx that doesn't have pass
3576      specific state variables.  I'm not aware of one at the moment.  */
3577
3578   success = validate_replace_src (from, to, insn);
3579   set = single_set (insn);
3580
3581   /* We've failed to do replacement. Try to add REG_EQUAL note to not loose
3582      information.  */
3583   if (!success && !note)
3584     {
3585       if (!set)
3586         return 0;
3587
3588       note = REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL,
3589                                                    copy_rtx (SET_SRC (set)),
3590                                                    REG_NOTES (insn));
3591     }
3592
3593   /* Always do the replacement in REQ_EQUAL and REG_EQUIV notes.  Also
3594      try to simplify them.  */
3595   if (note)
3596     {
3597       rtx simplified;
3598
3599       src = XEXP (note, 0);
3600       replace_rtx (src, from, to);
3601
3602       /* Try to simplify resulting note. */
3603       simplified = simplify_rtx (src);
3604       if (simplified)
3605         {
3606           src = simplified;
3607           XEXP (note, 0) = src;
3608         }
3609
3610       /* REG_EQUAL may get simplified into register.
3611          We don't allow that. Remove that note. This code ought
3612          not to hapen, because previous code ought to syntetize
3613          reg-reg move, but be on the safe side.  */
3614       else if (REG_P (src))
3615         remove_note (insn, note);
3616     }
3617   return success;
3618 }
3619
3620 /* Find a set of REGNOs that are available on entry to INSN's block.  Returns
3621    NULL no such set is found.  */
3622
3623 static struct expr *
3624 find_avail_set (regno, insn)
3625      int regno;
3626      rtx insn;
3627 {
3628   /* SET1 contains the last set found that can be returned to the caller for
3629      use in a substitution.  */
3630   struct expr *set1 = 0;
3631
3632   /* Loops are not possible here.  To get a loop we would need two sets
3633      available at the start of the block containing INSN.  ie we would
3634      need two sets like this available at the start of the block:
3635
3636        (set (reg X) (reg Y))
3637        (set (reg Y) (reg X))
3638
3639      This can not happen since the set of (reg Y) would have killed the
3640      set of (reg X) making it unavailable at the start of this block.  */
3641   while (1)
3642      {
3643       rtx src;
3644       struct expr *set = lookup_set (regno, NULL_RTX);
3645
3646       /* Find a set that is available at the start of the block
3647          which contains INSN.  */
3648       while (set)
3649         {
3650           if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3651             break;
3652           set = next_set (regno, set);
3653         }
3654
3655       /* If no available set was found we've reached the end of the
3656          (possibly empty) copy chain.  */
3657       if (set == 0)
3658         break;
3659
3660       if (GET_CODE (set->expr) != SET)
3661         abort ();
3662
3663       src = SET_SRC (set->expr);
3664
3665       /* We know the set is available.
3666          Now check that SRC is ANTLOC (i.e. none of the source operands
3667          have changed since the start of the block).
3668
3669          If the source operand changed, we may still use it for the next
3670          iteration of this loop, but we may not use it for substitutions.  */
3671
3672       if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3673         set1 = set;
3674
3675       /* If the source of the set is anything except a register, then
3676          we have reached the end of the copy chain.  */
3677       if (GET_CODE (src) != REG)
3678         break;
3679
3680       /* Follow the copy chain, ie start another iteration of the loop
3681          and see if we have an available copy into SRC.  */
3682       regno = REGNO (src);
3683      }
3684
3685   /* SET1 holds the last set that was available and anticipatable at
3686      INSN.  */
3687   return set1;
3688 }
3689
3690 /* Subroutine of cprop_insn that tries to propagate constants into
3691    JUMP_INSNS.  INSN must be a conditional jump; COPY is a copy of it
3692    that we can use for substitutions.
3693    REG_USED is the use we will try to replace, SRC is the constant we
3694    will try to substitute for it.
3695    Returns nonzero if a change was made.  */
3696
3697 static int
3698 cprop_jump (insn, copy, reg_used, src)
3699      rtx insn, copy;
3700      struct reg_use *reg_used;
3701      rtx src;
3702 {
3703   rtx set = PATTERN (copy);
3704   rtx temp;
3705
3706   /* Replace the register with the appropriate constant.  */
3707   replace_rtx (SET_SRC (set), reg_used->reg_rtx, src);
3708
3709   temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)),
3710                                      GET_MODE (SET_SRC (set)),
3711                                      GET_MODE (XEXP (SET_SRC (set), 0)),
3712                                      XEXP (SET_SRC (set), 0),
3713                                      XEXP (SET_SRC (set), 1),
3714                                      XEXP (SET_SRC (set), 2));
3715
3716   /* If no simplification can be made, then try the next
3717      register.  */
3718   if (temp == 0)
3719     return 0;
3720
3721   SET_SRC (set) = temp;
3722
3723   /* That may have changed the structure of TEMP, so
3724      force it to be rerecognized if it has not turned
3725      into a nop or unconditional jump.  */
3726
3727   INSN_CODE (copy) = -1;
3728   if ((SET_DEST (set) == pc_rtx
3729        && (SET_SRC (set) == pc_rtx
3730            || GET_CODE (SET_SRC (set)) == LABEL_REF))
3731       || recog (PATTERN (copy), copy, NULL) >= 0)
3732     {
3733       /* This has either become an unconditional jump
3734          or a nop-jump.  We'd like to delete nop jumps
3735          here, but doing so confuses gcse.  So we just
3736          make the replacement and let later passes
3737          sort things out.  */
3738       PATTERN (insn) = set;
3739       INSN_CODE (insn) = -1;
3740
3741       /* One less use of the label this insn used to jump to
3742          if we turned this into a NOP jump.  */
3743       if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0)
3744         --LABEL_NUSES (JUMP_LABEL (insn));
3745
3746       /* If this has turned into an unconditional jump,
3747          then put a barrier after it so that the unreachable
3748          code will be deleted.  */
3749       if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3750         emit_barrier_after (insn);
3751
3752       run_jump_opt_after_gcse = 1;
3753
3754       const_prop_count++;
3755       if (gcse_file != NULL)
3756         {
3757           fprintf (gcse_file,
3758                    "CONST-PROP: Replacing reg %d in insn %d with constant ",
3759                    REGNO (reg_used->reg_rtx), INSN_UID (insn));
3760           print_rtl (gcse_file, src);
3761           fprintf (gcse_file, "\n");
3762         }
3763
3764       return 1;
3765     }
3766   return 0;
3767 }
3768
3769 #ifdef HAVE_cc0
3770
3771 /* Subroutine of cprop_insn that tries to propagate constants into JUMP_INSNS
3772    for machines that have CC0.  INSN is a single set that stores into CC0;
3773    the insn following it is a conditional jump.  REG_USED is the use we will
3774    try to replace, SRC is the constant we will try to substitute for it.
3775    Returns nonzero if a change was made.  */
3776
3777 static int
3778 cprop_cc0_jump (insn, reg_used, src)
3779      rtx insn;
3780      struct reg_use *reg_used;
3781      rtx src;
3782 {
3783   rtx jump = NEXT_INSN (insn);
3784   rtx copy = copy_rtx (jump);
3785   rtx set = PATTERN (copy);
3786
3787   /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3788      substitute into it.  */
3789   replace_rtx (SET_SRC (set), cc0_rtx, copy_rtx (SET_SRC (PATTERN (insn))));
3790   if (! cprop_jump (jump, copy, reg_used, src))
3791     return 0;
3792
3793   /* If we succeeded, delete the cc0 setter.  */
3794   PUT_CODE (insn, NOTE);
3795   NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3796   NOTE_SOURCE_FILE (insn) = 0;
3797   return 1;
3798  }
3799 #endif
3800
3801 /* Perform constant and copy propagation on INSN.
3802    The result is non-zero if a change was made.  */
3803
3804 static int
3805 cprop_insn (insn, alter_jumps)
3806      rtx insn;
3807      int alter_jumps;
3808 {
3809   struct reg_use *reg_used;
3810   int changed = 0;
3811   rtx note;
3812
3813   /* Only propagate into SETs.  Note that a conditional jump is a
3814      SET with pc_rtx as the destination.  */
3815   if ((GET_CODE (insn) != INSN
3816        && GET_CODE (insn) != JUMP_INSN)
3817       || GET_CODE (PATTERN (insn)) != SET)
3818     return 0;
3819
3820   reg_use_count = 0;
3821   find_used_regs (PATTERN (insn));
3822
3823   note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3824   if (!note)
3825     note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3826
3827   /* We may win even when propagating constants into notes. */
3828   if (note)
3829     find_used_regs (XEXP (note, 0));
3830
3831   for (reg_used = &reg_use_table[0]; reg_use_count > 0;
3832        reg_used++, reg_use_count--)
3833     {
3834       unsigned int regno = REGNO (reg_used->reg_rtx);
3835       rtx pat, src;
3836       struct expr *set;
3837
3838       /* Ignore registers created by GCSE.
3839          We do this because ... */
3840       if (regno >= max_gcse_regno)
3841         continue;
3842
3843       /* If the register has already been set in this block, there's
3844          nothing we can do.  */
3845       if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3846         continue;
3847
3848       /* Find an assignment that sets reg_used and is available
3849          at the start of the block.  */
3850       set = find_avail_set (regno, insn);
3851       if (! set)
3852         continue;
3853
3854       pat = set->expr;
3855       /* ??? We might be able to handle PARALLELs.  Later.  */
3856       if (GET_CODE (pat) != SET)
3857         abort ();
3858
3859       src = SET_SRC (pat);
3860
3861       /* Constant propagation.  */
3862       if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
3863           || GET_CODE (src) == SYMBOL_REF)
3864         {
3865           /* Handle normal insns first.  */
3866           if (GET_CODE (insn) == INSN
3867               && try_replace_reg (reg_used->reg_rtx, src, insn))
3868             {
3869               changed = 1;
3870               const_prop_count++;
3871               if (gcse_file != NULL)
3872                 {
3873                   fprintf (gcse_file, "CONST-PROP: Replacing reg %d in ",
3874                            regno);
3875                   fprintf (gcse_file, "insn %d with constant ",
3876                            INSN_UID (insn));
3877                   print_rtl (gcse_file, src);
3878                   fprintf (gcse_file, "\n");
3879                 }
3880
3881               /* The original insn setting reg_used may or may not now be
3882                  deletable.  We leave the deletion to flow.  */
3883             }
3884
3885           /* Try to propagate a CONST_INT into a conditional jump.
3886              We're pretty specific about what we will handle in this
3887              code, we can extend this as necessary over time.
3888
3889              Right now the insn in question must look like
3890              (set (pc) (if_then_else ...))  */
3891           else if (alter_jumps
3892                    && GET_CODE (insn) == JUMP_INSN
3893                    && condjump_p (insn)
3894                    && ! simplejump_p (insn))
3895             changed |= cprop_jump (insn, copy_rtx (insn), reg_used, src);
3896 #ifdef HAVE_cc0
3897           /* Similar code for machines that use a pair of CC0 setter and
3898              conditional jump insn.  */
3899           else if (alter_jumps
3900                    && GET_CODE (PATTERN (insn)) == SET
3901                    && SET_DEST (PATTERN (insn)) == cc0_rtx
3902                    && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
3903                    && condjump_p (NEXT_INSN (insn))
3904                    && ! simplejump_p (NEXT_INSN (insn)))
3905             changed |= cprop_cc0_jump (insn, reg_used, src);
3906 #endif
3907         }
3908       else if (GET_CODE (src) == REG
3909                && REGNO (src) >= FIRST_PSEUDO_REGISTER
3910                && REGNO (src) != regno)
3911         {
3912           if (try_replace_reg (reg_used->reg_rtx, src, insn))
3913             {
3914               changed = 1;
3915               copy_prop_count++;
3916               if (gcse_file != NULL)
3917                 {
3918                   fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d",
3919                            regno, INSN_UID (insn));
3920                   fprintf (gcse_file, " with reg %d\n", REGNO (src));
3921                 }
3922
3923               /* The original insn setting reg_used may or may not now be
3924                  deletable.  We leave the deletion to flow.  */
3925               /* FIXME: If it turns out that the insn isn't deletable,
3926                  then we may have unnecessarily extended register lifetimes
3927                  and made things worse.  */
3928             }
3929         }
3930     }
3931
3932   return changed;
3933 }
3934
3935 /* Forward propagate copies.  This includes copies and constants.  Return
3936    non-zero if a change was made.  */
3937
3938 static int
3939 cprop (alter_jumps)
3940      int alter_jumps;
3941 {
3942   int bb, changed;
3943   rtx insn;
3944
3945   /* Note we start at block 1.  */
3946
3947   changed = 0;
3948   for (bb = 1; bb < n_basic_blocks; bb++)
3949     {
3950       /* Reset tables used to keep track of what's still valid [since the
3951          start of the block].  */
3952       reset_opr_set_tables ();
3953
3954       for (insn = BLOCK_HEAD (bb);
3955            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3956            insn = NEXT_INSN (insn))
3957         {
3958           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3959             {
3960               changed |= cprop_insn (insn, alter_jumps);
3961
3962               /* Keep track of everything modified by this insn.  */
3963               /* ??? Need to be careful w.r.t. mods done to INSN.  Don't
3964                  call mark_oprs_set if we turned the insn into a NOTE.  */
3965               if (GET_CODE (insn) != NOTE)
3966                 mark_oprs_set (insn);
3967             }
3968         }
3969     }
3970
3971   if (gcse_file != NULL)
3972     fprintf (gcse_file, "\n");
3973
3974   return changed;
3975 }
3976
3977 /* Perform one copy/constant propagation pass.
3978    F is the first insn in the function.
3979    PASS is the pass count.  */
3980
3981 static int
3982 one_cprop_pass (pass, alter_jumps)
3983      int pass;
3984      int alter_jumps;
3985 {
3986   int changed = 0;
3987
3988   const_prop_count = 0;
3989   copy_prop_count = 0;
3990
3991   alloc_set_hash_table (max_cuid);
3992   compute_set_hash_table ();
3993   if (gcse_file)
3994     dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
3995                      n_sets);
3996   if (n_sets > 0)
3997     {
3998       alloc_cprop_mem (n_basic_blocks, n_sets);
3999       compute_cprop_data ();
4000       changed = cprop (alter_jumps);
4001       free_cprop_mem ();
4002     }
4003
4004   free_set_hash_table ();
4005
4006   if (gcse_file)
4007     {
4008       fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, ",
4009                current_function_name, pass, bytes_used);
4010       fprintf (gcse_file, "%d const props, %d copy props\n\n",
4011                const_prop_count, copy_prop_count);
4012     }
4013
4014   return changed;
4015 }
4016 \f
4017 /* Compute PRE+LCM working variables.  */
4018
4019 /* Local properties of expressions.  */
4020 /* Nonzero for expressions that are transparent in the block.  */
4021 static sbitmap *transp;
4022
4023 /* Nonzero for expressions that are transparent at the end of the block.
4024    This is only zero for expressions killed by abnormal critical edge
4025    created by a calls.  */
4026 static sbitmap *transpout;
4027
4028 /* Nonzero for expressions that are computed (available) in the block.  */
4029 static sbitmap *comp;
4030
4031 /* Nonzero for expressions that are locally anticipatable in the block.  */
4032 static sbitmap *antloc;
4033
4034 /* Nonzero for expressions where this block is an optimal computation
4035    point.  */
4036 static sbitmap *pre_optimal;
4037
4038 /* Nonzero for expressions which are redundant in a particular block.  */
4039 static sbitmap *pre_redundant;
4040
4041 /* Nonzero for expressions which should be inserted on a specific edge.  */
4042 static sbitmap *pre_insert_map;
4043
4044 /* Nonzero for expressions which should be deleted in a specific block.  */
4045 static sbitmap *pre_delete_map;
4046
4047 /* Contains the edge_list returned by pre_edge_lcm.  */
4048 static struct edge_list *edge_list;
4049
4050 static sbitmap *temp_bitmap;
4051
4052 /* Redundant insns.  */
4053 static sbitmap pre_redundant_insns;
4054
4055 /* Allocate vars used for PRE analysis.  */
4056
4057 static void
4058 alloc_pre_mem (n_blocks, n_exprs)
4059      int n_blocks, n_exprs;
4060 {
4061   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4062   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4063   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4064   temp_bitmap = sbitmap_vector_alloc (n_blocks, n_exprs);
4065
4066   pre_optimal = NULL;
4067   pre_redundant = NULL;
4068   pre_insert_map = NULL;
4069   pre_delete_map = NULL;
4070   ae_in = NULL;
4071   ae_out = NULL;
4072   u_bitmap = NULL;
4073   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
4074   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
4075
4076   /* pre_insert and pre_delete are allocated later.  */
4077 }
4078
4079 /* Free vars used for PRE analysis.  */
4080
4081 static void
4082 free_pre_mem ()
4083 {
4084   free (transp);
4085   free (comp);
4086   free (antloc);
4087   free (temp_bitmap);
4088
4089   if (pre_optimal)
4090     free (pre_optimal);
4091   if (pre_redundant)
4092     free (pre_redundant);
4093   if (pre_insert_map)
4094     free (pre_insert_map);
4095   if (pre_delete_map)
4096     free (pre_delete_map);
4097   if (transpout)
4098     free (transpout);
4099
4100   if (ae_in)
4101     free (ae_in);
4102   if (ae_out)
4103     free (ae_out);
4104   if (ae_kill)
4105     free (ae_kill);
4106   if (u_bitmap)
4107     free (u_bitmap);
4108
4109   transp = comp = antloc = NULL;
4110   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
4111   transpout = ae_in = ae_out = ae_kill = NULL;
4112   u_bitmap = NULL;
4113
4114 }
4115
4116 /* Top level routine to do the dataflow analysis needed by PRE.  */
4117
4118 static void
4119 compute_pre_data ()
4120 {
4121   compute_local_properties (transp, comp, antloc, 0);
4122   compute_transpout ();
4123   sbitmap_vector_zero (ae_kill, n_basic_blocks);
4124   compute_ae_kill (comp, ae_kill);
4125   edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
4126                             ae_kill, &pre_insert_map, &pre_delete_map);
4127 }
4128 \f
4129 /* PRE utilities */
4130
4131 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4132    block BB.
4133
4134    VISITED is a pointer to a working buffer for tracking which BB's have
4135    been visited.  It is NULL for the top-level call.
4136
4137    We treat reaching expressions that go through blocks containing the same
4138    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
4139    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4140    2 as not reaching.  The intent is to improve the probability of finding
4141    only one reaching expression and to reduce register lifetimes by picking
4142    the closest such expression.  */
4143
4144 static int
4145 pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited)
4146      int occr_bb;
4147      struct expr *expr;
4148      int bb;
4149      char *visited;
4150 {
4151   edge pred;
4152
4153   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
4154     {
4155       int pred_bb = pred->src->index;
4156
4157       if (pred->src == ENTRY_BLOCK_PTR
4158           /* Has predecessor has already been visited?  */
4159           || visited[pred_bb])
4160         ;/* Nothing to do.  */
4161
4162       /* Does this predecessor generate this expression?  */
4163       else if (TEST_BIT (comp[pred_bb], expr->bitmap_index))
4164         {
4165           /* Is this the occurrence we're looking for?
4166              Note that there's only one generating occurrence per block
4167              so we just need to check the block number.  */
4168           if (occr_bb == pred_bb)
4169             return 1;
4170
4171           visited[pred_bb] = 1;
4172         }
4173       /* Ignore this predecessor if it kills the expression.  */
4174       else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
4175         visited[pred_bb] = 1;
4176
4177       /* Neither gen nor kill.  */
4178       else
4179         {
4180           visited[pred_bb] = 1;
4181           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
4182             return 1;
4183         }
4184     }
4185
4186   /* All paths have been checked.  */
4187   return 0;
4188 }
4189
4190 /* The wrapper for pre_expr_reaches_here_work that ensures that any
4191    memory allocated for that function is returned. */
4192
4193 static int
4194 pre_expr_reaches_here_p (occr_bb, expr, bb)
4195      int occr_bb;
4196      struct expr *expr;
4197      int bb;
4198 {
4199   int rval;
4200   char *visited = (char *) xcalloc (n_basic_blocks, 1);
4201
4202   rval = pre_expr_reaches_here_p_work(occr_bb, expr, bb, visited);
4203
4204   free (visited);
4205   return rval;
4206 }
4207 \f
4208
4209 /* Given an expr, generate RTL which we can insert at the end of a BB,
4210    or on an edge.  Set the block number of any insns generated to
4211    the value of BB.  */
4212
4213 static rtx
4214 process_insert_insn (expr)
4215      struct expr *expr;
4216 {
4217   rtx reg = expr->reaching_reg;
4218   rtx pat, copied_expr;
4219   rtx first_new_insn;
4220
4221   start_sequence ();
4222   copied_expr = copy_rtx (expr->expr);
4223   emit_move_insn (reg, copied_expr);
4224   first_new_insn = get_insns ();
4225   pat = gen_sequence ();
4226   end_sequence ();
4227
4228   return pat;
4229 }
4230
4231 /* Add EXPR to the end of basic block BB.
4232
4233    This is used by both the PRE and code hoisting.
4234
4235    For PRE, we want to verify that the expr is either transparent
4236    or locally anticipatable in the target block.  This check makes
4237    no sense for code hoisting.  */
4238
4239 static void
4240 insert_insn_end_bb (expr, bb, pre)
4241      struct expr *expr;
4242      int bb;
4243      int pre;
4244 {
4245   rtx insn = BLOCK_END (bb);
4246   rtx new_insn;
4247   rtx reg = expr->reaching_reg;
4248   int regno = REGNO (reg);
4249   rtx pat;
4250   int i;
4251
4252   pat = process_insert_insn (expr);
4253
4254   /* If the last insn is a jump, insert EXPR in front [taking care to
4255      handle cc0, etc. properly].  */
4256
4257   if (GET_CODE (insn) == JUMP_INSN)
4258     {
4259 #ifdef HAVE_cc0
4260       rtx note;
4261 #endif
4262
4263       /* If this is a jump table, then we can't insert stuff here.  Since
4264          we know the previous real insn must be the tablejump, we insert
4265          the new instruction just before the tablejump.  */
4266       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4267           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4268         insn = prev_real_insn (insn);
4269
4270 #ifdef HAVE_cc0
4271       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4272          if cc0 isn't set.  */
4273       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4274       if (note)
4275         insn = XEXP (note, 0);
4276       else
4277         {
4278           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4279           if (maybe_cc0_setter
4280               && GET_RTX_CLASS (GET_CODE (maybe_cc0_setter)) == 'i'
4281               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4282             insn = maybe_cc0_setter;
4283         }
4284 #endif
4285       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
4286       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4287     }
4288
4289   /* Likewise if the last insn is a call, as will happen in the presence
4290      of exception handling.  */
4291   else if (GET_CODE (insn) == CALL_INSN)
4292     {
4293       HARD_REG_SET parm_regs;
4294       int nparm_regs;
4295       rtx p;
4296
4297       /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4298          we search backward and place the instructions before the first
4299          parameter is loaded.  Do this for everyone for consistency and a
4300          presumtion that we'll get better code elsewhere as well.
4301
4302          It should always be the case that we can put these instructions
4303          anywhere in the basic block with performing PRE optimizations.
4304          Check this.  */
4305
4306       if (pre
4307           && !TEST_BIT (antloc[bb], expr->bitmap_index)
4308           && !TEST_BIT (transp[bb], expr->bitmap_index))
4309         abort ();
4310
4311       /* Since different machines initialize their parameter registers
4312          in different orders, assume nothing.  Collect the set of all
4313          parameter registers.  */
4314       CLEAR_HARD_REG_SET (parm_regs);
4315       nparm_regs = 0;
4316       for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4317         if (GET_CODE (XEXP (p, 0)) == USE
4318             && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4319           {
4320             if (REGNO (XEXP (XEXP (p, 0), 0)) >= FIRST_PSEUDO_REGISTER)
4321               abort ();
4322
4323             SET_HARD_REG_BIT (parm_regs, REGNO (XEXP (XEXP (p, 0), 0)));
4324             nparm_regs++;
4325           }
4326
4327       /* Search backward for the first set of a register in this set.  */
4328       while (nparm_regs && BLOCK_HEAD (bb) != insn)
4329         {
4330           insn = PREV_INSN (insn);
4331           p = single_set (insn);
4332           if (p && GET_CODE (SET_DEST (p)) == REG
4333               && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4334               && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4335             {
4336               CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4337               nparm_regs--;
4338             }
4339         }
4340
4341       /* If we found all the parameter loads, then we want to insert
4342          before the first parameter load.
4343
4344          If we did not find all the parameter loads, then we might have
4345          stopped on the head of the block, which could be a CODE_LABEL.
4346          If we inserted before the CODE_LABEL, then we would be putting
4347          the insn in the wrong basic block.  In that case, put the insn
4348          after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
4349       while (GET_CODE (insn) == CODE_LABEL
4350              || (GET_CODE (insn) == NOTE
4351                  && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK))
4352         insn = NEXT_INSN (insn);
4353
4354       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4355     }
4356   else
4357     {
4358       new_insn = emit_insn_after (pat, insn);
4359       BLOCK_END (bb) = new_insn;
4360     }
4361
4362   /* Keep block number table up to date.
4363      Note, PAT could be a multiple insn sequence, we have to make
4364      sure that each insn in the sequence is handled.  */
4365   if (GET_CODE (pat) == SEQUENCE)
4366     {
4367       for (i = 0; i < XVECLEN (pat, 0); i++)
4368         {
4369           rtx insn = XVECEXP (pat, 0, i);
4370
4371           set_block_num (insn, bb);
4372           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4373             add_label_notes (PATTERN (insn), new_insn);
4374
4375           note_stores (PATTERN (insn), record_set_info, insn);
4376         }
4377     }
4378   else
4379     {
4380       add_label_notes (SET_SRC (pat), new_insn);
4381       set_block_num (new_insn, bb);
4382
4383       /* Keep register set table up to date.  */
4384       record_one_set (regno, new_insn);
4385     }
4386
4387   gcse_create_count++;
4388
4389   if (gcse_file)
4390     {
4391       fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, ",
4392                bb, INSN_UID (new_insn));
4393       fprintf (gcse_file, "copying expression %d to reg %d\n",
4394                expr->bitmap_index, regno);
4395     }
4396 }
4397
4398 /* Insert partially redundant expressions on edges in the CFG to make
4399    the expressions fully redundant.  */
4400
4401 static int
4402 pre_edge_insert (edge_list, index_map)
4403      struct edge_list *edge_list;
4404      struct expr **index_map;
4405 {
4406   int e, i, j, num_edges, set_size, did_insert = 0;
4407   sbitmap *inserted;
4408
4409   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4410      if it reaches any of the deleted expressions.  */
4411
4412   set_size = pre_insert_map[0]->size;
4413   num_edges = NUM_EDGES (edge_list);
4414   inserted = sbitmap_vector_alloc (num_edges, n_exprs);
4415   sbitmap_vector_zero (inserted, num_edges);
4416
4417   for (e = 0; e < num_edges; e++)
4418     {
4419       int indx;
4420       basic_block pred = INDEX_EDGE_PRED_BB (edge_list, e);
4421       int bb = pred->index;
4422
4423       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4424         {
4425           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
4426
4427           for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
4428             if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4429               {
4430                 struct expr *expr = index_map[j];
4431                 struct occr *occr;
4432
4433                 /* Now look at each deleted occurence of this expression.  */
4434                 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4435                   {
4436                     if (! occr->deleted_p)
4437                       continue;
4438
4439                     /* Insert this expression on this edge if if it would
4440                        reach the deleted occurence in BB.  */
4441                     if (!TEST_BIT (inserted[e], j))
4442                       {
4443                         rtx insn;
4444                         edge eg = INDEX_EDGE (edge_list, e);
4445
4446                         /* We can't insert anything on an abnormal and
4447                            critical edge, so we insert the insn at the end of
4448                            the previous block. There are several alternatives
4449                            detailed in Morgans book P277 (sec 10.5) for
4450                            handling this situation.  This one is easiest for
4451                            now.  */
4452
4453                         if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
4454                           insert_insn_end_bb (index_map[j], bb, 0);
4455                         else
4456                           {
4457                             insn = process_insert_insn (index_map[j]);
4458                             insert_insn_on_edge (insn, eg);
4459                           }
4460
4461                         if (gcse_file)
4462                           {
4463                             fprintf (gcse_file, "PRE/HOIST: edge (%d,%d), ",
4464                                      bb,
4465                                      INDEX_EDGE_SUCC_BB (edge_list, e)->index);
4466                             fprintf (gcse_file, "copy expression %d\n",
4467                                      expr->bitmap_index);
4468                           }
4469
4470                         SET_BIT (inserted[e], j);
4471                         did_insert = 1;
4472                         gcse_create_count++;
4473                       }
4474                   }
4475               }
4476         }
4477     }
4478
4479   free (inserted);
4480   return did_insert;
4481 }
4482
4483 /* Copy the result of INSN to REG.  INDX is the expression number.  */
4484
4485 static void
4486 pre_insert_copy_insn (expr, insn)
4487      struct expr *expr;
4488      rtx insn;
4489 {
4490   rtx reg = expr->reaching_reg;
4491   int regno = REGNO (reg);
4492   int indx = expr->bitmap_index;
4493   rtx set = single_set (insn);
4494   rtx new_insn;
4495   int bb = BLOCK_NUM (insn);
4496
4497   if (!set)
4498     abort ();
4499
4500   new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4501                               insn);
4502
4503   /* Keep block number table up to date.  */
4504   set_block_num (new_insn, bb);
4505
4506   /* Keep register set table up to date.  */
4507   record_one_set (regno, new_insn);
4508   if (insn == BLOCK_END (bb))
4509     BLOCK_END (bb) = new_insn;
4510
4511   gcse_create_count++;
4512
4513   if (gcse_file)
4514     fprintf (gcse_file,
4515              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4516               BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4517               INSN_UID (insn), regno);
4518 }
4519
4520 /* Copy available expressions that reach the redundant expression
4521    to `reaching_reg'.  */
4522
4523 static void
4524 pre_insert_copies ()
4525 {
4526   unsigned int i;
4527   struct expr *expr;
4528   struct occr *occr;
4529   struct occr *avail;
4530
4531   /* For each available expression in the table, copy the result to
4532      `reaching_reg' if the expression reaches a deleted one.
4533
4534      ??? The current algorithm is rather brute force.
4535      Need to do some profiling.  */
4536
4537   for (i = 0; i < expr_hash_table_size; i++)
4538     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4539       {
4540         /* If the basic block isn't reachable, PPOUT will be TRUE.  However,
4541            we don't want to insert a copy here because the expression may not
4542            really be redundant.  So only insert an insn if the expression was
4543            deleted.  This test also avoids further processing if the
4544            expression wasn't deleted anywhere.  */
4545         if (expr->reaching_reg == NULL)
4546           continue;
4547
4548         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4549           {
4550             if (! occr->deleted_p)
4551               continue;
4552
4553             for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4554               {
4555                 rtx insn = avail->insn;
4556
4557                 /* No need to handle this one if handled already.  */
4558                 if (avail->copied_p)
4559                   continue;
4560
4561                 /* Don't handle this one if it's a redundant one.  */
4562                 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4563                   continue;
4564
4565                 /* Or if the expression doesn't reach the deleted one.  */
4566                 if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
4567                                                BLOCK_NUM (occr->insn)))
4568                   continue;
4569
4570                 /* Copy the result of avail to reaching_reg.  */
4571                 pre_insert_copy_insn (expr, insn);
4572                 avail->copied_p = 1;
4573               }
4574           }
4575       }
4576 }
4577
4578 /* Delete redundant computations.
4579    Deletion is done by changing the insn to copy the `reaching_reg' of
4580    the expression into the result of the SET.  It is left to later passes
4581    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4582
4583    Returns non-zero if a change is made.  */
4584
4585 static int
4586 pre_delete ()
4587 {
4588   unsigned int i;
4589   int bb, changed;
4590   struct expr *expr;
4591   struct occr *occr;
4592
4593   /* Compute the expressions which are redundant and need to be replaced by
4594      copies from the reaching reg to the target reg.  */
4595   for (bb = 0; bb < n_basic_blocks; bb++)
4596     sbitmap_copy (temp_bitmap[bb], pre_delete_map[bb]);
4597
4598   changed = 0;
4599   for (i = 0; i < expr_hash_table_size; i++)
4600     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4601       {
4602         int indx = expr->bitmap_index;
4603
4604         /* We only need to search antic_occr since we require
4605            ANTLOC != 0.  */
4606
4607         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4608           {
4609             rtx insn = occr->insn;
4610             rtx set;
4611             int bb = BLOCK_NUM (insn);
4612
4613             if (TEST_BIT (temp_bitmap[bb], indx))
4614               {
4615                 set = single_set (insn);
4616                 if (! set)
4617                   abort ();
4618
4619                 /* Create a pseudo-reg to store the result of reaching
4620                    expressions into.  Get the mode for the new pseudo from
4621                    the mode of the original destination pseudo.  */
4622                 if (expr->reaching_reg == NULL)
4623                   expr->reaching_reg
4624                     = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4625
4626                 /* In theory this should never fail since we're creating
4627                    a reg->reg copy.
4628
4629                    However, on the x86 some of the movXX patterns actually
4630                    contain clobbers of scratch regs.  This may cause the
4631                    insn created by validate_change to not match any pattern
4632                    and thus cause validate_change to fail.   */
4633                 if (validate_change (insn, &SET_SRC (set),
4634                                      expr->reaching_reg, 0))
4635                   {
4636                     occr->deleted_p = 1;
4637                     SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4638                     changed = 1;
4639                     gcse_subst_count++;
4640                   }
4641
4642                 if (gcse_file)
4643                   {
4644                     fprintf (gcse_file,
4645                              "PRE: redundant insn %d (expression %d) in ",
4646                                INSN_UID (insn), indx);
4647                     fprintf (gcse_file, "bb %d, reaching reg is %d\n",
4648                              bb, REGNO (expr->reaching_reg));
4649                   }
4650               }
4651           }
4652       }
4653
4654   return changed;
4655 }
4656
4657 /* Perform GCSE optimizations using PRE.
4658    This is called by one_pre_gcse_pass after all the dataflow analysis
4659    has been done.
4660
4661    This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
4662    lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
4663    Compiler Design and Implementation.
4664
4665    ??? A new pseudo reg is created to hold the reaching expression.  The nice
4666    thing about the classical approach is that it would try to use an existing
4667    reg.  If the register can't be adequately optimized [i.e. we introduce
4668    reload problems], one could add a pass here to propagate the new register
4669    through the block.
4670
4671    ??? We don't handle single sets in PARALLELs because we're [currently] not
4672    able to copy the rest of the parallel when we insert copies to create full
4673    redundancies from partial redundancies.  However, there's no reason why we
4674    can't handle PARALLELs in the cases where there are no partial
4675    redundancies.  */
4676
4677 static int
4678 pre_gcse ()
4679 {
4680   unsigned int i;
4681   int did_insert, changed;
4682   struct expr **index_map;
4683   struct expr *expr;
4684
4685   /* Compute a mapping from expression number (`bitmap_index') to
4686      hash table entry.  */
4687
4688   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
4689   for (i = 0; i < expr_hash_table_size; i++)
4690     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4691       index_map[expr->bitmap_index] = expr;
4692
4693   /* Reset bitmap used to track which insns are redundant.  */
4694   pre_redundant_insns = sbitmap_alloc (max_cuid);
4695   sbitmap_zero (pre_redundant_insns);
4696
4697   /* Delete the redundant insns first so that
4698      - we know what register to use for the new insns and for the other
4699        ones with reaching expressions
4700      - we know which insns are redundant when we go to create copies  */
4701
4702   changed = pre_delete ();
4703
4704   did_insert = pre_edge_insert (edge_list, index_map);
4705
4706   /* In other places with reaching expressions, copy the expression to the
4707      specially allocated pseudo-reg that reaches the redundant expr.  */
4708   pre_insert_copies ();
4709   if (did_insert)
4710     {
4711       commit_edge_insertions ();
4712       changed = 1;
4713     }
4714
4715   free (index_map);
4716   free (pre_redundant_insns);
4717   return changed;
4718 }
4719
4720 /* Top level routine to perform one PRE GCSE pass.
4721
4722    Return non-zero if a change was made.  */
4723
4724 static int
4725 one_pre_gcse_pass (pass)
4726      int pass;
4727 {
4728   int changed = 0;
4729
4730   gcse_subst_count = 0;
4731   gcse_create_count = 0;
4732
4733   alloc_expr_hash_table (max_cuid);
4734   add_noreturn_fake_exit_edges ();
4735   compute_expr_hash_table ();
4736   if (gcse_file)
4737     dump_hash_table (gcse_file, "Expression", expr_hash_table,
4738                      expr_hash_table_size, n_exprs);
4739
4740   if (n_exprs > 0)
4741     {
4742       alloc_pre_mem (n_basic_blocks, n_exprs);
4743       compute_pre_data ();
4744       changed |= pre_gcse ();
4745       free_edge_list (edge_list);
4746       free_pre_mem ();
4747     }
4748
4749   remove_fake_edges ();
4750   free_expr_hash_table ();
4751
4752   if (gcse_file)
4753     {
4754       fprintf (gcse_file, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
4755                current_function_name, pass, bytes_used);
4756       fprintf (gcse_file, "%d substs, %d insns created\n",
4757                gcse_subst_count, gcse_create_count);
4758     }
4759
4760   return changed;
4761 }
4762 \f
4763 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4764    We have to add REG_LABEL notes, because the following loop optimization
4765    pass requires them.  */
4766
4767 /* ??? This is very similar to the loop.c add_label_notes function.  We
4768    could probably share code here.  */
4769
4770 /* ??? If there was a jump optimization pass after gcse and before loop,
4771    then we would not need to do this here, because jump would add the
4772    necessary REG_LABEL notes.  */
4773
4774 static void
4775 add_label_notes (x, insn)
4776      rtx x;
4777      rtx insn;
4778 {
4779   enum rtx_code code = GET_CODE (x);
4780   int i, j;
4781   const char *fmt;
4782
4783   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4784     {
4785       /* This code used to ignore labels that referred to dispatch tables to
4786          avoid flow generating (slighly) worse code.
4787
4788          We no longer ignore such label references (see LABEL_REF handling in
4789          mark_jump_label for additional information).  */
4790
4791       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4792                                             REG_NOTES (insn));
4793       return;
4794     }
4795
4796   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
4797     {
4798       if (fmt[i] == 'e')
4799         add_label_notes (XEXP (x, i), insn);
4800       else if (fmt[i] == 'E')
4801         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4802           add_label_notes (XVECEXP (x, i, j), insn);
4803     }
4804 }
4805
4806 /* Compute transparent outgoing information for each block.
4807
4808    An expression is transparent to an edge unless it is killed by
4809    the edge itself.  This can only happen with abnormal control flow,
4810    when the edge is traversed through a call.  This happens with
4811    non-local labels and exceptions.
4812
4813    This would not be necessary if we split the edge.  While this is
4814    normally impossible for abnormal critical edges, with some effort
4815    it should be possible with exception handling, since we still have
4816    control over which handler should be invoked.  But due to increased
4817    EH table sizes, this may not be worthwhile.  */
4818
4819 static void
4820 compute_transpout ()
4821 {
4822   int bb;
4823   unsigned int i;
4824   struct expr *expr;
4825
4826   sbitmap_vector_ones (transpout, n_basic_blocks);
4827
4828   for (bb = 0; bb < n_basic_blocks; ++bb)
4829     {
4830       /* Note that flow inserted a nop a the end of basic blocks that
4831          end in call instructions for reasons other than abnormal
4832          control flow.  */
4833       if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4834         continue;
4835
4836       for (i = 0; i < expr_hash_table_size; i++)
4837         for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4838           if (GET_CODE (expr->expr) == MEM)
4839             {
4840               if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
4841                   && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
4842                 continue;
4843
4844               /* ??? Optimally, we would use interprocedural alias
4845                  analysis to determine if this mem is actually killed
4846                  by this call.  */
4847               RESET_BIT (transpout[bb], expr->bitmap_index);
4848             }
4849     }
4850 }
4851
4852 /* Removal of useless null pointer checks */
4853
4854 /* Called via note_stores.  X is set by SETTER.  If X is a register we must
4855    invalidate nonnull_local and set nonnull_killed.  DATA is really a
4856    `null_pointer_info *'.
4857
4858    We ignore hard registers.  */
4859
4860 static void
4861 invalidate_nonnull_info (x, setter, data)
4862      rtx x;
4863      rtx setter ATTRIBUTE_UNUSED;
4864      void *data;
4865 {
4866   unsigned int regno;
4867   struct null_pointer_info *npi = (struct null_pointer_info *) data;
4868
4869   while (GET_CODE (x) == SUBREG)
4870     x = SUBREG_REG (x);
4871
4872   /* Ignore anything that is not a register or is a hard register.  */
4873   if (GET_CODE (x) != REG
4874       || REGNO (x) < npi->min_reg
4875       || REGNO (x) >= npi->max_reg)
4876     return;
4877
4878   regno = REGNO (x) - npi->min_reg;
4879
4880   RESET_BIT (npi->nonnull_local[npi->current_block], regno);
4881   SET_BIT (npi->nonnull_killed[npi->current_block], regno);
4882 }
4883
4884 /* Do null-pointer check elimination for the registers indicated in
4885    NPI.  NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
4886    they are not our responsibility to free.  */
4887
4888 static void
4889 delete_null_pointer_checks_1 (block_reg, nonnull_avin, nonnull_avout, npi)
4890      unsigned int *block_reg;
4891      sbitmap *nonnull_avin;
4892      sbitmap *nonnull_avout;
4893      struct null_pointer_info *npi;
4894 {
4895   int bb;
4896   int current_block;
4897   sbitmap *nonnull_local = npi->nonnull_local;
4898   sbitmap *nonnull_killed = npi->nonnull_killed;
4899
4900   /* Compute local properties, nonnull and killed.  A register will have
4901      the nonnull property if at the end of the current block its value is
4902      known to be nonnull.  The killed property indicates that somewhere in
4903      the block any information we had about the register is killed.
4904
4905      Note that a register can have both properties in a single block.  That
4906      indicates that it's killed, then later in the block a new value is
4907      computed.  */
4908   sbitmap_vector_zero (nonnull_local, n_basic_blocks);
4909   sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
4910
4911   for (current_block = 0; current_block < n_basic_blocks; current_block++)
4912     {
4913       rtx insn, stop_insn;
4914
4915       /* Set the current block for invalidate_nonnull_info.  */
4916       npi->current_block = current_block;
4917
4918       /* Scan each insn in the basic block looking for memory references and
4919          register sets.  */
4920       stop_insn = NEXT_INSN (BLOCK_END (current_block));
4921       for (insn = BLOCK_HEAD (current_block);
4922            insn != stop_insn;
4923            insn = NEXT_INSN (insn))
4924         {
4925           rtx set;
4926           rtx reg;
4927
4928           /* Ignore anything that is not a normal insn.  */
4929           if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
4930             continue;
4931
4932           /* Basically ignore anything that is not a simple SET.  We do have
4933              to make sure to invalidate nonnull_local and set nonnull_killed
4934              for such insns though.  */
4935           set = single_set (insn);
4936           if (!set)
4937             {
4938               note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
4939               continue;
4940             }
4941
4942           /* See if we've got a useable memory load.  We handle it first
4943              in case it uses its address register as a dest (which kills
4944              the nonnull property).  */
4945           if (GET_CODE (SET_SRC (set)) == MEM
4946               && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
4947               && REGNO (reg) >= npi->min_reg
4948               && REGNO (reg) < npi->max_reg)
4949             SET_BIT (nonnull_local[current_block],
4950                      REGNO (reg) - npi->min_reg);
4951
4952           /* Now invalidate stuff clobbered by this insn.  */
4953           note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
4954
4955           /* And handle stores, we do these last since any sets in INSN can
4956              not kill the nonnull property if it is derived from a MEM
4957              appearing in a SET_DEST.  */
4958           if (GET_CODE (SET_DEST (set)) == MEM
4959               && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
4960               && REGNO (reg) >= npi->min_reg
4961               && REGNO (reg) < npi->max_reg)
4962             SET_BIT (nonnull_local[current_block],
4963                      REGNO (reg) - npi->min_reg);
4964         }
4965     }
4966
4967   /* Now compute global properties based on the local properties.   This
4968      is a classic global availablity algorithm.  */
4969   compute_available (nonnull_local, nonnull_killed,
4970                      nonnull_avout, nonnull_avin);
4971
4972   /* Now look at each bb and see if it ends with a compare of a value
4973      against zero.  */
4974   for (bb = 0; bb < n_basic_blocks; bb++)
4975     {
4976       rtx last_insn = BLOCK_END (bb);
4977       rtx condition, earliest;
4978       int compare_and_branch;
4979
4980       /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
4981          since BLOCK_REG[BB] is zero if this block did not end with a
4982          comparison against zero, this condition works.  */
4983       if (block_reg[bb] < npi->min_reg
4984           || block_reg[bb] >= npi->max_reg)
4985         continue;
4986
4987       /* LAST_INSN is a conditional jump.  Get its condition.  */
4988       condition = get_condition (last_insn, &earliest);
4989
4990       /* If we can't determine the condition then skip.  */
4991       if (! condition)
4992         continue;
4993
4994       /* Is the register known to have a nonzero value?  */
4995       if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
4996         continue;
4997
4998       /* Try to compute whether the compare/branch at the loop end is one or
4999          two instructions.  */
5000       if (earliest == last_insn)
5001         compare_and_branch = 1;
5002       else if (earliest == prev_nonnote_insn (last_insn))
5003         compare_and_branch = 2;
5004       else
5005         continue;
5006
5007       /* We know the register in this comparison is nonnull at exit from
5008          this block.  We can optimize this comparison.  */
5009       if (GET_CODE (condition) == NE)
5010         {
5011           rtx new_jump;
5012
5013           new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
5014                                             last_insn);
5015           JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5016           LABEL_NUSES (JUMP_LABEL (new_jump))++;
5017           emit_barrier_after (new_jump);
5018         }
5019       delete_insn (last_insn);
5020       if (compare_and_branch == 2)
5021         delete_insn (earliest);
5022
5023       /* Don't check this block again.  (Note that BLOCK_END is
5024          invalid here; we deleted the last instruction in the
5025          block.)  */
5026       block_reg[bb] = 0;
5027     }
5028 }
5029
5030 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5031    at compile time.
5032
5033    This is conceptually similar to global constant/copy propagation and
5034    classic global CSE (it even uses the same dataflow equations as cprop).
5035
5036    If a register is used as memory address with the form (mem (reg)), then we
5037    know that REG can not be zero at that point in the program.  Any instruction
5038    which sets REG "kills" this property.
5039
5040    So, if every path leading to a conditional branch has an available memory
5041    reference of that form, then we know the register can not have the value
5042    zero at the conditional branch.
5043
5044    So we merely need to compute the local properies and propagate that data
5045    around the cfg, then optimize where possible.
5046
5047    We run this pass two times.  Once before CSE, then again after CSE.  This
5048    has proven to be the most profitable approach.  It is rare for new
5049    optimization opportunities of this nature to appear after the first CSE
5050    pass.
5051
5052    This could probably be integrated with global cprop with a little work.  */
5053
5054 void
5055 delete_null_pointer_checks (f)
5056      rtx f ATTRIBUTE_UNUSED;
5057 {
5058   sbitmap *nonnull_avin, *nonnull_avout;
5059   unsigned int *block_reg;
5060   int bb;
5061   int reg;
5062   int regs_per_pass;
5063   int max_reg;
5064   struct null_pointer_info npi;
5065
5066   /* If we have only a single block, then there's nothing to do.  */
5067   if (n_basic_blocks <= 1)
5068     return;
5069
5070   /* Trying to perform global optimizations on flow graphs which have
5071      a high connectivity will take a long time and is unlikely to be
5072      particularly useful.
5073
5074      In normal circumstances a cfg should have about twice has many edges
5075      as blocks.  But we do not want to punish small functions which have
5076      a couple switch statements.  So we require a relatively large number
5077      of basic blocks and the ratio of edges to blocks to be high.  */
5078   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
5079     return;
5080
5081   /* We need four bitmaps, each with a bit for each register in each
5082      basic block.  */
5083   max_reg = max_reg_num ();
5084   regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
5085
5086   /* Allocate bitmaps to hold local and global properties.  */
5087   npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5088   npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5089   nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5090   nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5091
5092   /* Go through the basic blocks, seeing whether or not each block
5093      ends with a conditional branch whose condition is a comparison
5094      against zero.  Record the register compared in BLOCK_REG.  */
5095   block_reg = (unsigned int *) xcalloc (n_basic_blocks, sizeof (int));
5096   for (bb = 0; bb < n_basic_blocks; bb++)
5097     {
5098       rtx last_insn = BLOCK_END (bb);
5099       rtx condition, earliest, reg;
5100
5101       /* We only want conditional branches.  */
5102       if (GET_CODE (last_insn) != JUMP_INSN
5103           || !any_condjump_p (last_insn)
5104           || !onlyjump_p (last_insn))
5105         continue;
5106
5107       /* LAST_INSN is a conditional jump.  Get its condition.  */
5108       condition = get_condition (last_insn, &earliest);
5109
5110       /* If we were unable to get the condition, or it is not a equality
5111          comparison against zero then there's nothing we can do.  */
5112       if (!condition
5113           || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5114           || GET_CODE (XEXP (condition, 1)) != CONST_INT
5115           || (XEXP (condition, 1)
5116               != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
5117         continue;
5118
5119       /* We must be checking a register against zero.  */
5120       reg = XEXP (condition, 0);
5121       if (GET_CODE (reg) != REG)
5122         continue;
5123
5124       block_reg[bb] = REGNO (reg);
5125     }
5126
5127   /* Go through the algorithm for each block of registers.  */
5128   for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
5129     {
5130       npi.min_reg = reg;
5131       npi.max_reg = MIN (reg + regs_per_pass, max_reg);
5132       delete_null_pointer_checks_1 (block_reg, nonnull_avin,
5133                                     nonnull_avout, &npi);
5134     }
5135
5136   /* Free the table of registers compared at the end of every block.  */
5137   free (block_reg);
5138
5139   /* Free bitmaps.  */
5140   free (npi.nonnull_local);
5141   free (npi.nonnull_killed);
5142   free (nonnull_avin);
5143   free (nonnull_avout);
5144 }
5145
5146 /* Code Hoisting variables and subroutines.  */
5147
5148 /* Very busy expressions.  */
5149 static sbitmap *hoist_vbein;
5150 static sbitmap *hoist_vbeout;
5151
5152 /* Hoistable expressions.  */
5153 static sbitmap *hoist_exprs;
5154
5155 /* Dominator bitmaps.  */
5156 static sbitmap *dominators;
5157
5158 /* ??? We could compute post dominators and run this algorithm in
5159    reverse to to perform tail merging, doing so would probably be
5160    more effective than the tail merging code in jump.c.
5161
5162    It's unclear if tail merging could be run in parallel with
5163    code hoisting.  It would be nice.  */
5164
5165 /* Allocate vars used for code hoisting analysis.  */
5166
5167 static void
5168 alloc_code_hoist_mem (n_blocks, n_exprs)
5169      int n_blocks, n_exprs;
5170 {
5171   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5172   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5173   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5174
5175   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5176   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5177   hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5178   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5179
5180   dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5181 }
5182
5183 /* Free vars used for code hoisting analysis.  */
5184
5185 static void
5186 free_code_hoist_mem ()
5187 {
5188   free (antloc);
5189   free (transp);
5190   free (comp);
5191
5192   free (hoist_vbein);
5193   free (hoist_vbeout);
5194   free (hoist_exprs);
5195   free (transpout);
5196
5197   free (dominators);
5198 }
5199
5200 /* Compute the very busy expressions at entry/exit from each block.
5201
5202    An expression is very busy if all paths from a given point
5203    compute the expression.  */
5204
5205 static void
5206 compute_code_hoist_vbeinout ()
5207 {
5208   int bb, changed, passes;
5209
5210   sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5211   sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5212
5213   passes = 0;
5214   changed = 1;
5215
5216   while (changed)
5217     {
5218       changed = 0;
5219
5220       /* We scan the blocks in the reverse order to speed up
5221          the convergence.  */
5222       for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5223         {
5224           changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5225                                            hoist_vbeout[bb], transp[bb]);
5226           if (bb != n_basic_blocks - 1)
5227             sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
5228         }
5229
5230       passes++;
5231     }
5232
5233   if (gcse_file)
5234     fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5235 }
5236
5237 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
5238
5239 static void
5240 compute_code_hoist_data ()
5241 {
5242   compute_local_properties (transp, comp, antloc, 0);
5243   compute_transpout ();
5244   compute_code_hoist_vbeinout ();
5245   compute_flow_dominators (dominators, NULL);
5246   if (gcse_file)
5247     fprintf (gcse_file, "\n");
5248 }
5249
5250 /* Determine if the expression identified by EXPR_INDEX would
5251    reach BB unimpared if it was placed at the end of EXPR_BB.
5252
5253    It's unclear exactly what Muchnick meant by "unimpared".  It seems
5254    to me that the expression must either be computed or transparent in
5255    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
5256    would allow the expression to be hoisted out of loops, even if
5257    the expression wasn't a loop invariant.
5258
5259    Contrast this to reachability for PRE where an expression is
5260    considered reachable if *any* path reaches instead of *all*
5261    paths.  */
5262
5263 static int
5264 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5265      int expr_bb;
5266      int expr_index;
5267      int bb;
5268      char *visited;
5269 {
5270   edge pred;
5271   int visited_allocated_locally = 0;
5272
5273
5274   if (visited == NULL)
5275     {
5276        visited_allocated_locally = 1;
5277        visited = xcalloc (n_basic_blocks, 1);
5278     }
5279
5280   visited[expr_bb] = 1;
5281   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5282     {
5283       int pred_bb = pred->src->index;
5284
5285       if (pred->src == ENTRY_BLOCK_PTR)
5286         break;
5287       else if (visited[pred_bb])
5288         continue;
5289
5290       /* Does this predecessor generate this expression?  */
5291       else if (TEST_BIT (comp[pred_bb], expr_index))
5292         break;
5293       else if (! TEST_BIT (transp[pred_bb], expr_index))
5294         break;
5295
5296       /* Not killed.  */
5297       else
5298         {
5299           visited[pred_bb] = 1;
5300           if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5301                                            pred_bb, visited))
5302             break;
5303         }
5304     }
5305   if (visited_allocated_locally)
5306     free (visited);
5307
5308   return (pred == NULL);
5309 }
5310 \f
5311 /* Actually perform code hoisting.  */
5312
5313 static void
5314 hoist_code ()
5315 {
5316   int bb, dominated;
5317   unsigned int i;
5318   struct expr **index_map;
5319   struct expr *expr;
5320
5321   sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5322
5323   /* Compute a mapping from expression number (`bitmap_index') to
5324      hash table entry.  */
5325
5326   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
5327   for (i = 0; i < expr_hash_table_size; i++)
5328     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5329       index_map[expr->bitmap_index] = expr;
5330
5331   /* Walk over each basic block looking for potentially hoistable
5332      expressions, nothing gets hoisted from the entry block.  */
5333   for (bb = 0; bb < n_basic_blocks; bb++)
5334     {
5335       int found = 0;
5336       int insn_inserted_p;
5337
5338       /* Examine each expression that is very busy at the exit of this
5339          block.  These are the potentially hoistable expressions.  */
5340       for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5341         {
5342           int hoistable = 0;
5343
5344           if (TEST_BIT (hoist_vbeout[bb], i) && TEST_BIT (transpout[bb], i))
5345             {
5346               /* We've found a potentially hoistable expression, now
5347                  we look at every block BB dominates to see if it
5348                  computes the expression.  */
5349               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5350                 {
5351                   /* Ignore self dominance.  */
5352                   if (bb == dominated
5353                       || ! TEST_BIT (dominators[dominated], bb))
5354                     continue;
5355
5356                   /* We've found a dominated block, now see if it computes
5357                      the busy expression and whether or not moving that
5358                      expression to the "beginning" of that block is safe.  */
5359                   if (!TEST_BIT (antloc[dominated], i))
5360                     continue;
5361
5362                   /* Note if the expression would reach the dominated block
5363                      unimpared if it was placed at the end of BB.
5364
5365                      Keep track of how many times this expression is hoistable
5366                      from a dominated block into BB.  */
5367                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5368                     hoistable++;
5369                 }
5370
5371               /* If we found more than one hoistable occurence of this
5372                  expression, then note it in the bitmap of expressions to
5373                  hoist.  It makes no sense to hoist things which are computed
5374                  in only one BB, and doing so tends to pessimize register
5375                  allocation.  One could increase this value to try harder
5376                  to avoid any possible code expansion due to register
5377                  allocation issues; however experiments have shown that
5378                  the vast majority of hoistable expressions are only movable
5379                  from two successors, so raising this threshhold is likely
5380                  to nullify any benefit we get from code hoisting.  */
5381               if (hoistable > 1)
5382                 {
5383                   SET_BIT (hoist_exprs[bb], i);
5384                   found = 1;
5385                 }
5386             }
5387         }
5388
5389       /* If we found nothing to hoist, then quit now.  */
5390       if (! found)
5391         continue;
5392
5393       /* Loop over all the hoistable expressions.  */
5394       for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5395         {
5396           /* We want to insert the expression into BB only once, so
5397              note when we've inserted it.  */
5398           insn_inserted_p = 0;
5399
5400           /* These tests should be the same as the tests above.  */
5401           if (TEST_BIT (hoist_vbeout[bb], i))
5402             {
5403               /* We've found a potentially hoistable expression, now
5404                  we look at every block BB dominates to see if it
5405                  computes the expression.  */
5406               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5407                 {
5408                   /* Ignore self dominance.  */
5409                   if (bb == dominated
5410                       || ! TEST_BIT (dominators[dominated], bb))
5411                     continue;
5412
5413                   /* We've found a dominated block, now see if it computes
5414                      the busy expression and whether or not moving that
5415                      expression to the "beginning" of that block is safe.  */
5416                   if (!TEST_BIT (antloc[dominated], i))
5417                     continue;
5418
5419                   /* The expression is computed in the dominated block and
5420                      it would be safe to compute it at the start of the
5421                      dominated block.  Now we have to determine if the
5422                      expresion would reach the dominated block if it was
5423                      placed at the end of BB.  */
5424                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5425                     {
5426                       struct expr *expr = index_map[i];
5427                       struct occr *occr = expr->antic_occr;
5428                       rtx insn;
5429                       rtx set;
5430
5431                       /* Find the right occurence of this expression.  */
5432                       while (BLOCK_NUM (occr->insn) != dominated && occr)
5433                         occr = occr->next;
5434
5435                       /* Should never happen.  */
5436                       if (!occr)
5437                         abort ();
5438
5439                       insn = occr->insn;
5440
5441                       set = single_set (insn);
5442                       if (! set)
5443                         abort ();
5444
5445                       /* Create a pseudo-reg to store the result of reaching
5446                          expressions into.  Get the mode for the new pseudo
5447                          from the mode of the original destination pseudo.  */
5448                       if (expr->reaching_reg == NULL)
5449                         expr->reaching_reg
5450                           = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5451
5452                       /* In theory this should never fail since we're creating
5453                          a reg->reg copy.
5454
5455                          However, on the x86 some of the movXX patterns
5456                          actually contain clobbers of scratch regs.  This may
5457                          cause the insn created by validate_change to not
5458                          match any pattern and thus cause validate_change to
5459                          fail.  */
5460                       if (validate_change (insn, &SET_SRC (set),
5461                                            expr->reaching_reg, 0))
5462                         {
5463                           occr->deleted_p = 1;
5464                           if (!insn_inserted_p)
5465                             {
5466                               insert_insn_end_bb (index_map[i], bb, 0);
5467                               insn_inserted_p = 1;
5468                             }
5469                         }
5470                     }
5471                 }
5472             }
5473         }
5474     }
5475
5476     free (index_map);
5477 }
5478
5479 /* Top level routine to perform one code hoisting (aka unification) pass
5480
5481    Return non-zero if a change was made.  */
5482
5483 static int
5484 one_code_hoisting_pass ()
5485 {
5486   int changed = 0;
5487
5488   alloc_expr_hash_table (max_cuid);
5489   compute_expr_hash_table ();
5490   if (gcse_file)
5491     dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5492                      expr_hash_table_size, n_exprs);
5493
5494   if (n_exprs > 0)
5495     {
5496       alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5497       compute_code_hoist_data ();
5498       hoist_code ();
5499       free_code_hoist_mem ();
5500     }
5501
5502   free_expr_hash_table ();
5503
5504   return changed;
5505 }