gcc/gcse.c

   1 /* Global common subexpression elimination/Partial redundancy elimination
   2    and global constant/copy propagation for GNU compiler.
   3    Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* TODO
  23    - reordering of memory allocation and freeing to be more space efficient
  24    - do rough calc of how many regs are needed in each block, and a rough
  25      calc of how many regs are available in each class and use that to
  26      throttle back the code in cases where RTX_COST is minimal.
  27    - dead store elimination
  28    - a store to the same address as a load does not kill the load if the
  29      source of the store is also the destination of the load.  Handling this
  30      allows more load motion, particularly out of loops.
  31    - ability to realloc sbitmap vectors would allow one initial computation
  32      of reg_set_in_block with only subsequent additions, rather than
  33      recomputing it for each pass
  34
  35 */
  36
  37 /* References searched while implementing this.
  38
  39    Compilers Principles, Techniques and Tools
  40    Aho, Sethi, Ullman
  41    Addison-Wesley, 1988
  42
  43    Global Optimization by Suppression of Partial Redundancies
  44    E. Morel, C. Renvoise
  45    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  46
  47    A Portable Machine-Independent Global Optimizer - Design and Measurements
  48    Frederick Chow
  49    Stanford Ph.D. thesis, Dec. 1983
  50
  51    A Fast Algorithm for Code Movement Optimization
  52    D.M. Dhamdhere
  53    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  54
  55    A Solution to a Problem with Morel and Renvoise's
  56    Global Optimization by Suppression of Partial Redundancies
  57    K-H Drechsler, M.P. Stadel
  58    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  59
  60    Practical Adaptation of the Global Optimization
  61    Algorithm of Morel and Renvoise
  62    D.M. Dhamdhere
  63    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  64
  65    Efficiently Computing Static Single Assignment Form and the Control
  66    Dependence Graph
  67    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  68    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  69
  70    Lazy Code Motion
  71    J. Knoop, O. Ruthing, B. Steffen
  72    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  73
  74    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  75    Time for Reducible Flow Control
  76    Thomas Ball
  77    ACM Letters on Programming Languages and Systems,
  78    Vol. 2, Num. 1-4, Mar-Dec 1993
  79
  80    An Efficient Representation for Sparse Sets
  81    Preston Briggs, Linda Torczon
  82    ACM Letters on Programming Languages and Systems,
  83    Vol. 2, Num. 1-4, Mar-Dec 1993
  84
  85    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  86    K-H Drechsler, M.P. Stadel
  87    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  88
  89    Partial Dead Code Elimination
  90    J. Knoop, O. Ruthing, B. Steffen
  91    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  92
  93    Effective Partial Redundancy Elimination
  94    P. Briggs, K.D. Cooper
  95    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  96
  97    The Program Structure Tree: Computing Control Regions in Linear Time
  98    R. Johnson, D. Pearson, K. Pingali
  99    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
 100
 101    Optimal Code Motion: Theory and Practice
 102    J. Knoop, O. Ruthing, B. Steffen
 103    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
 104
 105    The power of assignment motion
 106    J. Knoop, O. Ruthing, B. Steffen
 107    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 108
 109    Global code motion / global value numbering
 110    C. Click
 111    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 112
 113    Value Driven Redundancy Elimination
 114    L.T. Simpson
 115    Rice University Ph.D. thesis, Apr. 1996
 116
 117    Value Numbering
 118    L.T. Simpson
 119    Massively Scalar Compiler Project, Rice University, Sep. 1996
 120
 121    High Performance Compilers for Parallel Computing
 122    Michael Wolfe
 123    Addison-Wesley, 1996
 124
 125    Advanced Compiler Design and Implementation
 126    Steven Muchnick
 127    Morgan Kaufmann, 1997
 128
 129    Building an Optimizing Compiler
 130    Robert Morgan
 131    Digital Press, 1998
 132
 133    People wishing to speed up the code here should read:
 134      Elimination Algorithms for Data Flow Analysis
 135      B.G. Ryder, M.C. Paull
 136      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 137
 138      How to Analyze Large Programs Efficiently and Informatively
 139      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 140      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 141
 142    People wishing to do something different can find various possibilities
 143    in the above papers and elsewhere.
 144 */
 145
 146 #include "config.h"
 147 #include "system.h"
 148 #include "toplev.h"
 149
 150 #include "rtl.h"
 151 #include "tm_p.h"
 152 #include "regs.h"
 153 #include "hard-reg-set.h"
 154 #include "flags.h"
 155 #include "real.h"
 156 #include "insn-config.h"
 157 #include "recog.h"
 158 #include "basic-block.h"
 159 #include "output.h"
 160 #include "function.h"
 161 #include "expr.h"
 162
 163 #include "obstack.h"
 164 #define obstack_chunk_alloc gmalloc
 165 #define obstack_chunk_free free
 166
 167 /* Maximum number of passes to perform.  */
 168 #define MAX_PASSES 1
 169
 170 /* Propagate flow information through back edges and thus enable PRE's
 171    moving loop invariant calculations out of loops.
 172
 173    Originally this tended to create worse overall code, but several
 174    improvements during the development of PRE seem to have made following
 175    back edges generally a win.
 176
 177    Note much of the loop invariant code motion done here would normally
 178    be done by loop.c, which has more heuristics for when to move invariants
 179    out of loops.  At some point we might need to move some of those
 180    heuristics into gcse.c.  */
 181 #define FOLLOW_BACK_EDGES 1
 182
 183 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 184    are a superset of those done by GCSE.
 185
 186    We perform the following steps:
 187
 188    1) Compute basic block information.
 189
 190    2) Compute table of places where registers are set.
 191
 192    3) Perform copy/constant propagation.
 193
 194    4) Perform global cse.
 195
 196    5) Perform another pass of copy/constant propagation.
 197
 198    Two passes of copy/constant propagation are done because the first one
 199    enables more GCSE and the second one helps to clean up the copies that
 200    GCSE creates.  This is needed more for PRE than for Classic because Classic
 201    GCSE will try to use an existing register containing the common
 202    subexpression rather than create a new one.  This is harder to do for PRE
 203    because of the code motion (which Classic GCSE doesn't do).
 204
 205    Expressions we are interested in GCSE-ing are of the form
 206    (set (pseudo-reg) (expression)).
 207    Function want_to_gcse_p says what these are.
 208
 209    PRE handles moving invariant expressions out of loops (by treating them as
 210    partially redundant).
 211
 212    Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
 213    assignment) based GVN (global value numbering).  L. T. Simpson's paper
 214    (Rice University) on value numbering is a useful reference for this.
 215
 216    **********************
 217
 218    We used to support multiple passes but there are diminishing returns in
 219    doing so.  The first pass usually makes 90% of the changes that are doable.
 220    A second pass can make a few more changes made possible by the first pass.
 221    Experiments show any further passes don't make enough changes to justify
 222    the expense.
 223
 224    A study of spec92 using an unlimited number of passes:
 225    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 226    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 227    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 228
 229    It was found doing copy propagation between each pass enables further
 230    substitutions.
 231
 232    PRE is quite expensive in complicated functions because the DFA can take
 233    awhile to converge.  Hence we only perform one pass.  Macro MAX_PASSES can
 234    be modified if one wants to experiment.
 235
 236    **********************
 237
 238    The steps for PRE are:
 239
 240    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 241
 242    2) Perform the data flow analysis for PRE.
 243
 244    3) Delete the redundant instructions
 245
 246    4) Insert the required copies [if any] that make the partially
 247       redundant instructions fully redundant.
 248
 249    5) For other reaching expressions, insert an instruction to copy the value
 250       to a newly created pseudo that will reach the redundant instruction.
 251
 252    The deletion is done first so that when we do insertions we
 253    know which pseudo reg to use.
 254
 255    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 256    argue it is not.  The number of iterations for the algorithm to converge
 257    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 258
 259    PRE GCSE depends heavily on the second CSE pass to clean up the copies
 260    we create.  To make an expression reach the place where it's redundant,
 261    the result of the expression is copied to a new register, and the redundant
 262    expression is deleted by replacing it with this new register.  Classic GCSE
 263    doesn't have this problem as much as it computes the reaching defs of
 264    each register in each block and thus can try to use an existing register.
 265
 266    **********************
 267
 268    A fair bit of simplicity is created by creating small functions for simple
 269    tasks, even when the function is only called in one place.  This may
 270    measurably slow things down [or may not] by creating more function call
 271    overhead than is necessary.  The source is laid out so that it's trivial
 272    to make the affected functions inline so that one can measure what speed
 273    up, if any, can be achieved, and maybe later when things settle things can
 274    be rearranged.
 275
 276    Help stamp out big monolithic functions!  */
 277 \f
 278 /* GCSE global vars.  */
 279
 280 /* -dG dump file.  */
 281 static FILE *gcse_file;
 282
 283 /* Note whether or not we should run jump optimization after gcse.  We
 284    want to do this for two cases.
 285
 286     * If we changed any jumps via cprop.
 287
 288     * If we added any labels via edge splitting.  */
 289
 290 static int run_jump_opt_after_gcse;
 291
 292 /* Bitmaps are normally not included in debugging dumps.
 293    However it's useful to be able to print them from GDB.
 294    We could create special functions for this, but it's simpler to
 295    just allow passing stderr to the dump_foo fns.  Since stderr can
 296    be a macro, we store a copy here.  */
 297 static FILE *debug_stderr;
 298
 299 /* An obstack for our working variables.  */
 300 static struct obstack gcse_obstack;
 301
 302 /* Non-zero for each mode that supports (set (reg) (reg)).
 303    This is trivially true for integer and floating point values.
 304    It may or may not be true for condition codes.  */
 305 static char can_copy_p[(int) NUM_MACHINE_MODES];
 306
 307 /* Non-zero if can_copy_p has been initialized.  */
 308 static int can_copy_init_p;
 309
 310 struct reg_use {rtx reg_rtx; };
 311
 312 /* Hash table of expressions.  */
 313
 314 struct expr
 315 {
 316   /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
 317   rtx expr;
 318   /* Index in the available expression bitmaps.  */
 319   int bitmap_index;
 320   /* Next entry with the same hash.  */
 321   struct expr *next_same_hash;
 322   /* List of anticipatable occurrences in basic blocks in the function.
 323      An "anticipatable occurrence" is one that is the first occurrence in the
 324      basic block, the operands are not modified in the basic block prior
 325      to the occurrence and the output is not used between the start of
 326      the block and the occurrence.  */
 327   struct occr *antic_occr;
 328   /* List of available occurrence in basic blocks in the function.
 329      An "available occurrence" is one that is the last occurrence in the
 330      basic block and the operands are not modified by following statements in
 331      the basic block [including this insn].  */
 332   struct occr *avail_occr;
 333   /* Non-null if the computation is PRE redundant.
 334      The value is the newly created pseudo-reg to record a copy of the
 335      expression in all the places that reach the redundant copy.  */
 336   rtx reaching_reg;
 337 };
 338
 339 /* Occurrence of an expression.
 340    There is one per basic block.  If a pattern appears more than once the
 341    last appearance is used [or first for anticipatable expressions].  */
 342
 343 struct occr
 344 {
 345   /* Next occurrence of this expression.  */
 346   struct occr *next;
 347   /* The insn that computes the expression.  */
 348   rtx insn;
 349   /* Non-zero if this [anticipatable] occurrence has been deleted.  */
 350   char deleted_p;
 351   /* Non-zero if this [available] occurrence has been copied to
 352      reaching_reg.  */
 353   /* ??? This is mutually exclusive with deleted_p, so they could share
 354      the same byte.  */
 355   char copied_p;
 356 };
 357
 358 /* Expression and copy propagation hash tables.
 359    Each hash table is an array of buckets.
 360    ??? It is known that if it were an array of entries, structure elements
 361    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 362    not clear whether in the final analysis a sufficient amount of memory would
 363    be saved as the size of the available expression bitmaps would be larger
 364    [one could build a mapping table without holes afterwards though].
 365    Someday I'll perform the computation and figure it out.  */
 366
 367 /* Total size of the expression hash table, in elements.  */
 368 static int expr_hash_table_size;
 369 /* The table itself.
 370    This is an array of `expr_hash_table_size' elements.  */
 371 static struct expr **expr_hash_table;
 372
 373 /* Total size of the copy propagation hash table, in elements.  */
 374 static int set_hash_table_size;
 375
 376 /* The table itself.
 377    This is an array of `set_hash_table_size' elements.  */
 378 static struct expr **set_hash_table;
 379
 380 /* Mapping of uids to cuids.
 381    Only real insns get cuids.  */
 382 static int *uid_cuid;
 383
 384 /* Highest UID in UID_CUID.  */
 385 static int max_uid;
 386
 387 /* Get the cuid of an insn.  */
 388 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
 389
 390 /* Number of cuids.  */
 391 static int max_cuid;
 392
 393 /* Mapping of cuids to insns.  */
 394 static rtx *cuid_insn;
 395
 396 /* Get insn from cuid.  */
 397 #define CUID_INSN(CUID) (cuid_insn[CUID])
 398
 399 /* Maximum register number in function prior to doing gcse + 1.
 400    Registers created during this pass have regno >= max_gcse_regno.
 401    This is named with "gcse" to not collide with global of same name.  */
 402 static int max_gcse_regno;
 403
 404 /* Maximum number of cse-able expressions found.  */
 405 static int n_exprs;
 406
 407 /* Maximum number of assignments for copy propagation found.  */
 408 static int n_sets;
 409
 410 /* Table of registers that are modified.
 411
 412    For each register, each element is a list of places where the pseudo-reg
 413    is set.
 414
 415    For simplicity, GCSE is done on sets of pseudo-regs only.  PRE GCSE only
 416    requires knowledge of which blocks kill which regs [and thus could use
 417    a bitmap instead of the lists `reg_set_table' uses].
 418
 419    `reg_set_table' and could be turned into an array of bitmaps (num-bbs x
 420    num-regs) [however perhaps it may be useful to keep the data as is].  One
 421    advantage of recording things this way is that `reg_set_table' is fairly
 422    sparse with respect to pseudo regs but for hard regs could be fairly dense
 423    [relatively speaking].  And recording sets of pseudo-regs in lists speeds
 424    up functions like compute_transp since in the case of pseudo-regs we only
 425    need to iterate over the number of times a pseudo-reg is set, not over the
 426    number of basic blocks [clearly there is a bit of a slow down in the cases
 427    where a pseudo is set more than once in a block, however it is believed
 428    that the net effect is to speed things up].  This isn't done for hard-regs
 429    because recording call-clobbered hard-regs in `reg_set_table' at each
 430    function call can consume a fair bit of memory, and iterating over
 431    hard-regs stored this way in compute_transp will be more expensive.  */
 432
 433 typedef struct reg_set
 434 {
 435   /* The next setting of this register.  */
 436   struct reg_set *next;
 437   /* The insn where it was set.  */
 438   rtx insn;
 439 } reg_set;
 440
 441 static reg_set **reg_set_table;
 442
 443 /* Size of `reg_set_table'.
 444    The table starts out at max_gcse_regno + slop, and is enlarged as
 445    necessary.  */
 446 static int reg_set_table_size;
 447
 448 /* Amount to grow `reg_set_table' by when it's full.  */
 449 #define REG_SET_TABLE_SLOP 100
 450
 451 /* Bitmap containing one bit for each register in the program.
 452    Used when performing GCSE to track which registers have been set since
 453    the start of the basic block.  */
 454 static sbitmap reg_set_bitmap;
 455
 456 /* For each block, a bitmap of registers set in the block.
 457    This is used by expr_killed_p and compute_transp.
 458    It is computed during hash table computation and not by compute_sets
 459    as it includes registers added since the last pass (or between cprop and
 460    gcse) and it's currently not easy to realloc sbitmap vectors.  */
 461 static sbitmap *reg_set_in_block;
 462
 463 /* For each block, non-zero if memory is set in that block.
 464    This is computed during hash table computation and is used by
 465    expr_killed_p and compute_transp.
 466    ??? Handling of memory is very simple, we don't make any attempt
 467    to optimize things (later).
 468    ??? This can be computed by compute_sets since the information
 469    doesn't change.  */
 470 static char *mem_set_in_block;
 471
 472 /* Various variables for statistics gathering.  */
 473
 474 /* Memory used in a pass.
 475    This isn't intended to be absolutely precise.  Its intent is only
 476    to keep an eye on memory usage.  */
 477 static int bytes_used;
 478
 479 /* GCSE substitutions made.  */
 480 static int gcse_subst_count;
 481 /* Number of copy instructions created.  */
 482 static int gcse_create_count;
 483 /* Number of constants propagated.  */
 484 static int const_prop_count;
 485 /* Number of copys propagated.  */
 486 static int copy_prop_count;
 487 \f
 488 /* These variables are used by classic GCSE.
 489    Normally they'd be defined a bit later, but `rd_gen' needs to
 490    be declared sooner.  */
 491
 492 /* A bitmap of all ones for implementing the algorithm for available
 493    expressions and reaching definitions.  */
 494 /* ??? Available expression bitmaps have a different size than reaching
 495    definition bitmaps.  This should be the larger of the two, however, it
 496    is not currently used for reaching definitions.  */
 497 static sbitmap u_bitmap;
 498
 499 /* Each block has a bitmap of each type.
 500    The length of each blocks bitmap is:
 501
 502        max_cuid  - for reaching definitions
 503        n_exprs - for available expressions
 504
 505    Thus we view the bitmaps as 2 dimensional arrays.  i.e.
 506    rd_kill[block_num][cuid_num]
 507    ae_kill[block_num][expr_num]                  */
 508
 509 /* For reaching defs */
 510 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
 511
 512 /* for available exprs */
 513 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
 514
 515 /* Objects of this type are passed around by the null-pointer check
 516    removal routines.  */
 517 struct null_pointer_info
 518 {
 519   /* The basic block being processed.  */
 520   int current_block;
 521   /* The first register to be handled in this pass.  */
 522   int min_reg;
 523   /* One greater than the last register to be handled in this pass.  */
 524   int max_reg;
 525   sbitmap *nonnull_local;
 526   sbitmap *nonnull_killed;
 527 };
 528 \f
 529 static void compute_can_copy    PARAMS ((void));
 530 static char *gmalloc            PARAMS ((unsigned int));
 531 static char *grealloc           PARAMS ((char *, unsigned int));
 532 static char *gcse_alloc         PARAMS ((unsigned long));
 533 static void alloc_gcse_mem      PARAMS ((rtx));
 534 static void free_gcse_mem       PARAMS ((void));
 535 static void alloc_reg_set_mem   PARAMS ((int));
 536 static void free_reg_set_mem    PARAMS ((void));
 537 static int get_bitmap_width     PARAMS ((int, int, int));
 538 static void record_one_set      PARAMS ((int, rtx));
 539 static void record_set_info     PARAMS ((rtx, rtx, void *));
 540 static void compute_sets        PARAMS ((rtx));
 541 static void hash_scan_insn      PARAMS ((rtx, int, int));
 542 static void hash_scan_set       PARAMS ((rtx, rtx, int));
 543 static void hash_scan_clobber   PARAMS ((rtx, rtx));
 544 static void hash_scan_call      PARAMS ((rtx, rtx));
 545 static int want_to_gcse_p       PARAMS ((rtx));
 546 static int oprs_unchanged_p     PARAMS ((rtx, rtx, int));
 547 static int oprs_anticipatable_p PARAMS ((rtx, rtx));
 548 static int oprs_available_p     PARAMS ((rtx, rtx));
 549 static void insert_expr_in_table PARAMS ((rtx, enum machine_mode, rtx,
 550                                           int, int));
 551 static void insert_set_in_table PARAMS ((rtx, rtx));
 552 static unsigned int hash_expr   PARAMS ((rtx, enum machine_mode, int *, int));
 553 static unsigned int hash_expr_1 PARAMS ((rtx, enum machine_mode, int *));
 554 static unsigned int hash_set    PARAMS ((int, int));
 555 static int expr_equiv_p         PARAMS ((rtx, rtx));
 556 static void record_last_reg_set_info PARAMS ((rtx, int));
 557 static void record_last_mem_set_info PARAMS ((rtx));
 558 static void record_last_set_info PARAMS ((rtx, rtx, void *));
 559 static void compute_hash_table  PARAMS ((int));
 560 static void alloc_set_hash_table PARAMS ((int));
 561 static void free_set_hash_table PARAMS ((void));
 562 static void compute_set_hash_table PARAMS ((void));
 563 static void alloc_expr_hash_table PARAMS ((int));
 564 static void free_expr_hash_table PARAMS ((void));
 565 static void compute_expr_hash_table PARAMS ((void));
 566 static void dump_hash_table     PARAMS ((FILE *, const char *, struct expr **,
 567                                          int, int));
 568 static struct expr *lookup_expr PARAMS ((rtx));
 569 static struct expr *lookup_set  PARAMS ((int, rtx));
 570 static struct expr *next_set    PARAMS ((int, struct expr *));
 571 static void reset_opr_set_tables PARAMS ((void));
 572 static int oprs_not_set_p       PARAMS ((rtx, rtx));
 573 static void mark_call           PARAMS ((rtx));
 574 static void mark_set            PARAMS ((rtx, rtx));
 575 static void mark_clobber        PARAMS ((rtx, rtx));
 576 static void mark_oprs_set       PARAMS ((rtx));
 577 static void alloc_cprop_mem     PARAMS ((int, int));
 578 static void free_cprop_mem      PARAMS ((void));
 579 static void compute_transp      PARAMS ((rtx, int, sbitmap *, int));
 580 static void compute_transpout   PARAMS ((void));
 581 static void compute_local_properties PARAMS ((sbitmap *, sbitmap *, sbitmap *,
 582                                               int));
 583 static void compute_cprop_data  PARAMS ((void));
 584 static void find_used_regs      PARAMS ((rtx));
 585 static int try_replace_reg      PARAMS ((rtx, rtx, rtx));
 586 static struct expr *find_avail_set PARAMS ((int, rtx));
 587 static int cprop_jump           PARAMS ((rtx, rtx, struct reg_use *, rtx));
 588 #ifdef HAVE_cc0
 589 static int cprop_cc0_jump       PARAMS ((rtx, struct reg_use *, rtx));
 590 #endif
 591 static int cprop_insn           PARAMS ((rtx, int));
 592 static int cprop                PARAMS ((int));
 593 static int one_cprop_pass       PARAMS ((int, int));
 594 static void alloc_pre_mem       PARAMS ((int, int));
 595 static void free_pre_mem        PARAMS ((void));
 596 static void compute_pre_data    PARAMS ((void));
 597 static int pre_expr_reaches_here_p PARAMS ((int, struct expr *, int));
 598 static void insert_insn_end_bb  PARAMS ((struct expr *, int, int));
 599 static void pre_insert_copy_insn PARAMS ((struct expr *, rtx));
 600 static void pre_insert_copies   PARAMS ((void));
 601 static int pre_delete           PARAMS ((void));
 602 static int pre_gcse             PARAMS ((void));
 603 static int one_pre_gcse_pass    PARAMS ((int));
 604 static void add_label_notes     PARAMS ((rtx, rtx));
 605 static void alloc_code_hoist_mem PARAMS ((int, int));
 606 static void free_code_hoist_mem PARAMS ((void));
 607 static void compute_code_hoist_vbeinout PARAMS ((void));
 608 static void compute_code_hoist_data PARAMS ((void));
 609 static int hoist_expr_reaches_here_p PARAMS ((int, int, int, char *));
 610 static void hoist_code          PARAMS ((void));
 611 static int one_code_hoisting_pass PARAMS ((void));
 612 static void alloc_rd_mem        PARAMS ((int, int));
 613 static void free_rd_mem         PARAMS ((void));
 614 static void handle_rd_kill_set  PARAMS ((rtx, int, int));
 615 static void compute_kill_rd     PARAMS ((void));
 616 static void compute_rd          PARAMS ((void));
 617 static void alloc_avail_expr_mem PARAMS ((int, int));
 618 static void free_avail_expr_mem PARAMS ((void));
 619 static void compute_ae_gen      PARAMS ((void));
 620 static int expr_killed_p        PARAMS ((rtx, int));
 621 static void compute_ae_kill     PARAMS ((sbitmap *, sbitmap *));
 622 static int expr_reaches_here_p  PARAMS ((struct occr *, struct expr *,
 623                                          int, int));
 624 static rtx computing_insn       PARAMS ((struct expr *, rtx));
 625 static int def_reaches_here_p   PARAMS ((rtx, rtx));
 626 static int can_disregard_other_sets PARAMS ((struct reg_set **, rtx, int));
 627 static int handle_avail_expr    PARAMS ((rtx, struct expr *));
 628 static int classic_gcse         PARAMS ((void));
 629 static int one_classic_gcse_pass PARAMS ((int));
 630 static void invalidate_nonnull_info PARAMS ((rtx, rtx, void *));
 631 static void delete_null_pointer_checks_1 PARAMS ((int *, sbitmap *, sbitmap *,
 632                                                   struct null_pointer_info *));
 633 static rtx process_insert_insn  PARAMS ((struct expr *));
 634 static int pre_edge_insert      PARAMS ((struct edge_list *, struct expr **));
 635 static int expr_reaches_here_p_work PARAMS ((struct occr *, struct expr *,
 636                                              int, int, char *));
 637 static int pre_expr_reaches_here_p_work PARAMS ((int, struct expr *,
 638                                                  int, char *));
 639 \f
 640 /* Entry point for global common subexpression elimination.
 641    F is the first instruction in the function.  */
 642
 643 int
 644 gcse_main (f, file)
 645      rtx f;
 646      FILE *file;
 647 {
 648   int changed, pass;
 649   /* Bytes used at start of pass.  */
 650   int initial_bytes_used;
 651   /* Maximum number of bytes used by a pass.  */
 652   int max_pass_bytes;
 653   /* Point to release obstack data from for each pass.  */
 654   char *gcse_obstack_bottom;
 655
 656   /* We do not construct an accurate cfg in functions which call
 657      setjmp, so just punt to be safe.  */
 658   if (current_function_calls_setjmp)
 659     return 0;
 660
 661   /* Assume that we do not need to run jump optimizations after gcse.  */
 662   run_jump_opt_after_gcse = 0;
 663
 664   /* For calling dump_foo fns from gdb.  */
 665   debug_stderr = stderr;
 666   gcse_file = file;
 667
 668   /* Identify the basic block information for this function, including
 669      successors and predecessors.  */
 670   max_gcse_regno = max_reg_num ();
 671   find_basic_blocks (f, max_gcse_regno, file);
 672   cleanup_cfg (f);
 673
 674   if (file)
 675     dump_flow_info (file);
 676
 677   /* Return if there's nothing to do.  */
 678   if (n_basic_blocks <= 1)
 679     {
 680       free_basic_block_vars (0);
 681       return 0;
 682     }
 683
 684   /* Trying to perform global optimizations on flow graphs which have
 685      a high connectivity will take a long time and is unlikely to be
 686      particularly useful.
 687
 688      In normal circumstances a cfg should have about twice has many edges
 689      as blocks.  But we do not want to punish small functions which have
 690      a couple switch statements.  So we require a relatively large number
 691      of basic blocks and the ratio of edges to blocks to be high.  */
 692   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
 693     {
 694       free_basic_block_vars (0);
 695       return 0;
 696     }
 697
 698   /* See what modes support reg/reg copy operations.  */
 699   if (! can_copy_init_p)
 700     {
 701       compute_can_copy ();
 702       can_copy_init_p = 1;
 703     }
 704
 705   gcc_obstack_init (&gcse_obstack);
 706   bytes_used = 0;
 707
 708   /* Record where pseudo-registers are set.  This data is kept accurate
 709      during each pass.  ??? We could also record hard-reg information here
 710      [since it's unchanging], however it is currently done during hash table
 711      computation.
 712
 713      It may be tempting to compute MEM set information here too, but MEM sets
 714      will be subject to code motion one day and thus we need to compute
 715      information about memory sets when we build the hash tables.  */
 716
 717   alloc_reg_set_mem (max_gcse_regno);
 718   compute_sets (f);
 719
 720   pass = 0;
 721   initial_bytes_used = bytes_used;
 722   max_pass_bytes = 0;
 723   gcse_obstack_bottom = gcse_alloc (1);
 724   changed = 1;
 725   while (changed && pass < MAX_PASSES)
 726     {
 727       changed = 0;
 728       if (file)
 729         fprintf (file, "GCSE pass %d\n\n", pass + 1);
 730
 731       /* Initialize bytes_used to the space for the pred/succ lists,
 732          and the reg_set_table data.  */
 733       bytes_used = initial_bytes_used;
 734
 735       /* Each pass may create new registers, so recalculate each time.  */
 736       max_gcse_regno = max_reg_num ();
 737
 738       alloc_gcse_mem (f);
 739
 740       /* Don't allow constant propagation to modify jumps
 741          during this pass.  */
 742       changed = one_cprop_pass (pass + 1, 0);
 743
 744       if (optimize_size)
 745         changed |= one_classic_gcse_pass (pass + 1);
 746       else
 747         {
 748           changed |= one_pre_gcse_pass (pass + 1);
 749           free_reg_set_mem ();
 750           alloc_reg_set_mem (max_reg_num ());
 751           compute_sets (f);
 752           run_jump_opt_after_gcse = 1;
 753         }
 754
 755       if (max_pass_bytes < bytes_used)
 756         max_pass_bytes = bytes_used;
 757
 758       /* Free up memory, then reallocate for code hoisting.  We can
 759          not re-use the existing allocated memory because the tables
 760          will not have info for the insns or registers created by
 761          partial redundancy elimination.  */
 762       free_gcse_mem ();
 763
 764       /* It does not make sense to run code hoisting unless we optimizing
 765          for code size -- it rarely makes programs faster, and can make
 766          them bigger if we did partial redundancy elimination (when optimizing
 767          for space, we use a classic gcse algorithm instead of partial
 768          redundancy algorithms).  */
 769       if (optimize_size)
 770         {
 771           max_gcse_regno = max_reg_num ();
 772           alloc_gcse_mem (f);
 773           changed |= one_code_hoisting_pass ();
 774           free_gcse_mem ();
 775
 776           if (max_pass_bytes < bytes_used)
 777             max_pass_bytes = bytes_used;
 778         }
 779
 780       if (file)
 781         {
 782           fprintf (file, "\n");
 783           fflush (file);
 784         }
 785
 786       obstack_free (&gcse_obstack, gcse_obstack_bottom);
 787       pass++;
 788     }
 789
 790   /* Do one last pass of copy propagation, including cprop into
 791      conditional jumps.  */
 792
 793   max_gcse_regno = max_reg_num ();
 794   alloc_gcse_mem (f);
 795   /* This time, go ahead and allow cprop to alter jumps.  */
 796   one_cprop_pass (pass + 1, 1);
 797   free_gcse_mem ();
 798
 799   if (file)
 800     {
 801       fprintf (file, "GCSE of %s: %d basic blocks, ",
 802                current_function_name, n_basic_blocks);
 803       fprintf (file, "%d pass%s, %d bytes\n\n",
 804                pass, pass > 1 ? "es" : "", max_pass_bytes);
 805     }
 806
 807   obstack_free (&gcse_obstack, NULL_PTR);
 808   free_reg_set_mem ();
 809   free_basic_block_vars (0);
 810   return run_jump_opt_after_gcse;
 811 }
 812 \f
 813 /* Misc. utilities.  */
 814
 815 /* Compute which modes support reg/reg copy operations.  */
 816
 817 static void
 818 compute_can_copy ()
 819 {
 820   int i;
 821 #ifndef AVOID_CCMODE_COPIES
 822   rtx reg,insn;
 823 #endif
 824   char *free_point = (char *) oballoc (1);
 825
 826   bzero (can_copy_p, NUM_MACHINE_MODES);
 827
 828   start_sequence ();
 829   for (i = 0; i < NUM_MACHINE_MODES; i++)
 830     if (GET_MODE_CLASS (i) == MODE_CC)
 831       {
 832 #ifdef AVOID_CCMODE_COPIES
 833         can_copy_p[i] = 0;
 834 #else
 835         reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 836         insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 837         if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
 838           can_copy_p[i] = 1;
 839 #endif
 840         else
 841           can_copy_p[i] = 1;
 842       }
 843
 844   end_sequence ();
 845
 846   /* Free the objects we just allocated.  */
 847   obfree (free_point);
 848 }
 849 \f
 850 /* Cover function to xmalloc to record bytes allocated.  */
 851
 852 static char *
 853 gmalloc (size)
 854      unsigned int size;
 855 {
 856   bytes_used += size;
 857   return xmalloc (size);
 858 }
 859
 860 /* Cover function to xrealloc.
 861    We don't record the additional size since we don't know it.
 862    It won't affect memory usage stats much anyway.  */
 863
 864 static char *
 865 grealloc (ptr, size)
 866      char *ptr;
 867      unsigned int size;
 868 {
 869   return xrealloc (ptr, size);
 870 }
 871
 872 /* Cover function to obstack_alloc.
 873    We don't need to record the bytes allocated here since
 874    obstack_chunk_alloc is set to gmalloc.  */
 875
 876 static char *
 877 gcse_alloc (size)
 878      unsigned long size;
 879 {
 880   return (char *) obstack_alloc (&gcse_obstack, size);
 881 }
 882
 883 /* Allocate memory for the cuid mapping array,
 884    and reg/memory set tracking tables.
 885
 886    This is called at the start of each pass.  */
 887
 888 static void
 889 alloc_gcse_mem (f)
 890      rtx f;
 891 {
 892   int i,n;
 893   rtx insn;
 894
 895   /* Find the largest UID and create a mapping from UIDs to CUIDs.
 896      CUIDs are like UIDs except they increase monotonically, have no gaps,
 897      and only apply to real insns.  */
 898
 899   max_uid = get_max_uid ();
 900   n = (max_uid + 1) * sizeof (int);
 901   uid_cuid = (int *) gmalloc (n);
 902   bzero ((char *) uid_cuid, n);
 903   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 904     {
 905       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
 906         INSN_CUID (insn) = i++;
 907       else
 908         INSN_CUID (insn) = i;
 909     }
 910
 911   /* Create a table mapping cuids to insns.  */
 912
 913   max_cuid = i;
 914   n = (max_cuid + 1) * sizeof (rtx);
 915   cuid_insn = (rtx *) gmalloc (n);
 916   bzero ((char *) cuid_insn, n);
 917   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 918     if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
 919       CUID_INSN (i++) = insn;
 920
 921   /* Allocate vars to track sets of regs.  */
 922   reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
 923
 924   /* Allocate vars to track sets of regs, memory per block.  */
 925   reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
 926                                                        max_gcse_regno);
 927   mem_set_in_block = (char *) gmalloc (n_basic_blocks);
 928 }
 929
 930 /* Free memory allocated by alloc_gcse_mem.  */
 931
 932 static void
 933 free_gcse_mem ()
 934 {
 935   free (uid_cuid);
 936   free (cuid_insn);
 937
 938   free (reg_set_bitmap);
 939
 940   free (reg_set_in_block);
 941   free (mem_set_in_block);
 942 }
 943
 944 /* Many of the global optimization algorithms work by solving dataflow
 945    equations for various expressions.  Initially, some local value is
 946    computed for each expression in each block.  Then, the values across the
 947    various blocks are combined (by following flow graph edges) to arrive at
 948    global values.  Conceptually, each set of equations is independent.  We
 949    may therefore solve all the equations in parallel, solve them one at a
 950    time, or pick any intermediate approach.
 951
 952    When you're going to need N two-dimensional bitmaps, each X (say, the
 953    number of blocks) by Y (say, the number of expressions), call this
 954    function.  It's not important what X and Y represent; only that Y
 955    correspond to the things that can be done in parallel.  This function will
 956    return an appropriate chunking factor C; you should solve C sets of
 957    equations in parallel.  By going through this function, we can easily
 958    trade space against time; by solving fewer equations in parallel we use
 959    less space.  */
 960
 961 static int
 962 get_bitmap_width (n, x, y)
 963      int n;
 964      int x;
 965      int y;
 966 {
 967   /* It's not really worth figuring out *exactly* how much memory will
 968      be used by a particular choice.  The important thing is to get
 969      something approximately right.  */
 970   size_t max_bitmap_memory = 10 * 1024 * 1024;
 971
 972   /* The number of bytes we'd use for a single column of minimum
 973      width.  */
 974   size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
 975
 976   /* Often, it's reasonable just to solve all the equations in
 977      parallel.  */
 978   if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
 979     return y;
 980
 981   /* Otherwise, pick the largest width we can, without going over the
 982      limit.  */
 983   return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
 984                              / column_size);
 985 }
 986 \f
 987 /* Compute the local properties of each recorded expression.
 988
 989    Local properties are those that are defined by the block, irrespective of
 990    other blocks.
 991
 992    An expression is transparent in a block if its operands are not modified
 993    in the block.
 994
 995    An expression is computed (locally available) in a block if it is computed
 996    at least once and expression would contain the same value if the
 997    computation was moved to the end of the block.
 998
 999    An expression is locally anticipatable in a block if it is computed at
1000    least once and expression would contain the same value if the computation
1001    was moved to the beginning of the block.
1002
1003    We call this routine for cprop, pre and code hoisting.  They all compute
1004    basically the same information and thus can easily share this code.
1005
1006    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
1007    properties.  If NULL, then it is not necessary to compute or record that
1008    particular property.
1009
1010    SETP controls which hash table to look at.  If zero, this routine looks at
1011    the expr hash table; if nonzero this routine looks at the set hash table.
1012    Additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
1013    ABSALTERED.  */
1014
1015 static void
1016 compute_local_properties (transp, comp, antloc, setp)
1017      sbitmap *transp;
1018      sbitmap *comp;
1019      sbitmap *antloc;
1020      int setp;
1021 {
1022   int i, hash_table_size;
1023   struct expr **hash_table;
1024
1025   /* Initialize any bitmaps that were passed in.  */
1026   if (transp)
1027     {
1028       if (setp)
1029         sbitmap_vector_zero (transp, n_basic_blocks);
1030       else
1031         sbitmap_vector_ones (transp, n_basic_blocks);
1032     }
1033
1034   if (comp)
1035     sbitmap_vector_zero (comp, n_basic_blocks);
1036   if (antloc)
1037     sbitmap_vector_zero (antloc, n_basic_blocks);
1038
1039   /* We use the same code for cprop, pre and hoisting.  For cprop
1040      we care about the set hash table, for pre and hoisting we
1041      care about the expr hash table.  */
1042   hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1043   hash_table = setp ? set_hash_table : expr_hash_table;
1044
1045   for (i = 0; i < hash_table_size; i++)
1046     {
1047       struct expr *expr;
1048
1049       for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1050         {
1051           int indx = expr->bitmap_index;
1052           struct occr *occr;
1053
1054           /* The expression is transparent in this block if it is not killed.
1055              We start by assuming all are transparent [none are killed], and
1056              then reset the bits for those that are.  */
1057           if (transp)
1058             compute_transp (expr->expr, indx, transp, setp);
1059
1060           /* The occurrences recorded in antic_occr are exactly those that
1061              we want to set to non-zero in ANTLOC.  */
1062           if (antloc)
1063             for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1064               {
1065                 SET_BIT (antloc[BLOCK_NUM (occr->insn)], indx);
1066
1067                 /* While we're scanning the table, this is a good place to
1068                    initialize this.  */
1069                 occr->deleted_p = 0;
1070               }
1071
1072           /* The occurrences recorded in avail_occr are exactly those that
1073              we want to set to non-zero in COMP.  */
1074           if (comp)
1075             for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1076               {
1077                 SET_BIT (comp[BLOCK_NUM (occr->insn)], indx);
1078
1079                 /* While we're scanning the table, this is a good place to
1080                    initialize this.  */
1081                 occr->copied_p = 0;
1082               }
1083
1084           /* While we're scanning the table, this is a good place to
1085              initialize this.  */
1086           expr->reaching_reg = 0;
1087         }
1088     }
1089 }
1090 \f
1091 /* Register set information.
1092
1093    `reg_set_table' records where each register is set or otherwise
1094    modified.  */
1095
1096 static struct obstack reg_set_obstack;
1097
1098 static void
1099 alloc_reg_set_mem (n_regs)
1100      int n_regs;
1101 {
1102   unsigned int n;
1103
1104   reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1105   n = reg_set_table_size * sizeof (struct reg_set *);
1106   reg_set_table = (struct reg_set **) gmalloc (n);
1107   bzero ((char *) reg_set_table, n);
1108
1109   gcc_obstack_init (&reg_set_obstack);
1110 }
1111
1112 static void
1113 free_reg_set_mem ()
1114 {
1115   free (reg_set_table);
1116   obstack_free (&reg_set_obstack, NULL_PTR);
1117 }
1118
1119 /* Record REGNO in the reg_set table.  */
1120
1121 static void
1122 record_one_set (regno, insn)
1123      int regno;
1124      rtx insn;
1125 {
1126   /* allocate a new reg_set element and link it onto the list */
1127   struct reg_set *new_reg_info, *reg_info_ptr1, *reg_info_ptr2;
1128
1129   /* If the table isn't big enough, enlarge it.  */
1130   if (regno >= reg_set_table_size)
1131     {
1132       int new_size = regno + REG_SET_TABLE_SLOP;
1133
1134       reg_set_table
1135         = (struct reg_set **) grealloc ((char *) reg_set_table,
1136                                         new_size * sizeof (struct reg_set *));
1137       bzero ((char *) (reg_set_table + reg_set_table_size),
1138              (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1139       reg_set_table_size = new_size;
1140     }
1141
1142   new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1143                                                    sizeof (struct reg_set));
1144   bytes_used += sizeof (struct reg_set);
1145   new_reg_info->insn = insn;
1146   new_reg_info->next = NULL;
1147   if (reg_set_table[regno] == NULL)
1148     reg_set_table[regno] = new_reg_info;
1149   else
1150     {
1151       reg_info_ptr1 = reg_info_ptr2 = reg_set_table[regno];
1152       /* ??? One could keep a "last" pointer to speed this up.  */
1153       while (reg_info_ptr1 != NULL)
1154         {
1155           reg_info_ptr2 = reg_info_ptr1;
1156           reg_info_ptr1 = reg_info_ptr1->next;
1157         }
1158
1159       reg_info_ptr2->next = new_reg_info;
1160     }
1161 }
1162
1163 /* Called from compute_sets via note_stores to handle one SET or CLOBBER in
1164    an insn.  The DATA is really the instruction in which the SET is
1165    occurring.  */
1166
1167 static void
1168 record_set_info (dest, setter, data)
1169      rtx dest, setter ATTRIBUTE_UNUSED;
1170      void *data;
1171 {
1172   rtx record_set_insn = (rtx) data;
1173
1174   if (GET_CODE (dest) == REG && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1175     record_one_set (REGNO (dest), record_set_insn);
1176 }
1177
1178 /* Scan the function and record each set of each pseudo-register.
1179
1180    This is called once, at the start of the gcse pass.  See the comments for
1181    `reg_set_table' for further documenation.  */
1182
1183 static void
1184 compute_sets (f)
1185      rtx f;
1186 {
1187   rtx insn;
1188
1189   for (insn = f; insn != 0; insn = NEXT_INSN (insn))
1190     if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
1191       note_stores (PATTERN (insn), record_set_info, insn);
1192 }
1193 \f
1194 /* Hash table support.  */
1195
1196 /* For each register, the cuid of the first/last insn in the block to set it,
1197    or -1 if not set.  */
1198 #define NEVER_SET -1
1199 static int *reg_first_set;
1200 static int *reg_last_set;
1201
1202 /* While computing "first/last set" info, this is the CUID of first/last insn
1203    to set memory or -1 if not set.  `mem_last_set' is also used when
1204    performing GCSE to record whether memory has been set since the beginning
1205    of the block.
1206
1207    Note that handling of memory is very simple, we don't make any attempt
1208    to optimize things (later).  */
1209 static int mem_first_set;
1210 static int mem_last_set;
1211
1212 /* Perform a quick check whether X, the source of a set, is something
1213    we want to consider for GCSE.  */
1214
1215 static int
1216 want_to_gcse_p (x)
1217      rtx x;
1218 {
1219   switch (GET_CODE (x))
1220     {
1221     case REG:
1222     case SUBREG:
1223     case CONST_INT:
1224     case CONST_DOUBLE:
1225     case CALL:
1226       return 0;
1227
1228     default:
1229       break;
1230     }
1231
1232   return 1;
1233 }
1234
1235 /* Return non-zero if the operands of expression X are unchanged from the
1236    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1237    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
1238
1239 static int
1240 oprs_unchanged_p (x, insn, avail_p)
1241      rtx x, insn;
1242      int avail_p;
1243 {
1244   int i, j;
1245   enum rtx_code code;
1246   const char *fmt;
1247
1248   if (x == 0)
1249     return 1;
1250
1251   code = GET_CODE (x);
1252   switch (code)
1253     {
1254     case REG:
1255       if (avail_p)
1256         return (reg_last_set[REGNO (x)] == NEVER_SET
1257                 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1258       else
1259         return (reg_first_set[REGNO (x)] == NEVER_SET
1260                 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1261
1262     case MEM:
1263       if (avail_p && mem_last_set != NEVER_SET
1264           && mem_last_set >= INSN_CUID (insn))
1265         return 0;
1266       else if (! avail_p && mem_first_set != NEVER_SET
1267                && mem_first_set < INSN_CUID (insn))
1268         return 0;
1269       else
1270         return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
1271
1272     case PRE_DEC:
1273     case PRE_INC:
1274     case POST_DEC:
1275     case POST_INC:
1276       return 0;
1277
1278     case PC:
1279     case CC0: /*FIXME*/
1280     case CONST:
1281     case CONST_INT:
1282     case CONST_DOUBLE:
1283     case SYMBOL_REF:
1284     case LABEL_REF:
1285     case ADDR_VEC:
1286     case ADDR_DIFF_VEC:
1287       return 1;
1288
1289     default:
1290       break;
1291     }
1292
1293   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1294     {
1295       if (fmt[i] == 'e')
1296         {
1297           /* If we are about to do the last recursive call needed at this
1298              level, change it into iteration.  This function is called enough
1299              to be worth it.  */
1300           if (i == 0)
1301             return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
1302
1303           else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
1304             return 0;
1305         }
1306       else if (fmt[i] == 'E')
1307         for (j = 0; j < XVECLEN (x, i); j++)
1308           if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1309             return 0;
1310     }
1311
1312   return 1;
1313 }
1314
1315 /* Return non-zero if the operands of expression X are unchanged from
1316    the start of INSN's basic block up to but not including INSN.  */
1317
1318 static int
1319 oprs_anticipatable_p (x, insn)
1320      rtx x, insn;
1321 {
1322   return oprs_unchanged_p (x, insn, 0);
1323 }
1324
1325 /* Return non-zero if the operands of expression X are unchanged from
1326    INSN to the end of INSN's basic block.  */
1327
1328 static int
1329 oprs_available_p (x, insn)
1330      rtx x, insn;
1331 {
1332   return oprs_unchanged_p (x, insn, 1);
1333 }
1334
1335 /* Hash expression X.
1336
1337    MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
1338    indicating if a volatile operand is found or if the expression contains
1339    something we don't want to insert in the table.
1340
1341    ??? One might want to merge this with canon_hash.  Later.  */
1342
1343 static unsigned int
1344 hash_expr (x, mode, do_not_record_p, hash_table_size)
1345      rtx x;
1346      enum machine_mode mode;
1347      int *do_not_record_p;
1348      int hash_table_size;
1349 {
1350   unsigned int hash;
1351
1352   *do_not_record_p = 0;
1353
1354   hash = hash_expr_1 (x, mode, do_not_record_p);
1355   return hash % hash_table_size;
1356 }
1357
1358 /* Subroutine of hash_expr to do the actual work.  */
1359
1360 static unsigned int
1361 hash_expr_1 (x, mode, do_not_record_p)
1362      rtx x;
1363      enum machine_mode mode;
1364      int *do_not_record_p;
1365 {
1366   int i, j;
1367   unsigned hash = 0;
1368   enum rtx_code code;
1369   const char *fmt;
1370
1371   /* Used to turn recursion into iteration.  We can't rely on GCC's
1372      tail-recursion eliminatio since we need to keep accumulating values
1373      in HASH.  */
1374
1375   if (x == 0)
1376     return hash;
1377
1378  repeat:
1379   code = GET_CODE (x);
1380   switch (code)
1381     {
1382     case REG:
1383       hash += ((unsigned int) REG << 7) + REGNO (x);
1384       return hash;
1385
1386     case CONST_INT:
1387       hash += (((unsigned int) CONST_INT << 7) + (unsigned int) mode
1388                + (unsigned int) INTVAL (x));
1389       return hash;
1390
1391     case CONST_DOUBLE:
1392       /* This is like the general case, except that it only counts
1393          the integers representing the constant.  */
1394       hash += (unsigned int) code + (unsigned int) GET_MODE (x);
1395       if (GET_MODE (x) != VOIDmode)
1396         for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1397           hash += (unsigned int) XWINT (x, i);
1398       else
1399         hash += ((unsigned int) CONST_DOUBLE_LOW (x)
1400                  + (unsigned int) CONST_DOUBLE_HIGH (x));
1401       return hash;
1402
1403       /* Assume there is only one rtx object for any given label.  */
1404     case LABEL_REF:
1405       /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1406          differences and differences between each stage's debugging dumps.  */
1407       hash += (((unsigned int) LABEL_REF << 7)
1408                + CODE_LABEL_NUMBER (XEXP (x, 0)));
1409       return hash;
1410
1411     case SYMBOL_REF:
1412       {
1413         /* Don't hash on the symbol's address to avoid bootstrap differences.
1414            Different hash values may cause expressions to be recorded in
1415            different orders and thus different registers to be used in the
1416            final assembler.  This also avoids differences in the dump files
1417            between various stages.  */
1418         unsigned int h = 0;
1419         unsigned char *p = (unsigned char *) XSTR (x, 0);
1420
1421         while (*p)
1422           h += (h << 7) + *p++; /* ??? revisit */
1423
1424         hash += ((unsigned int) SYMBOL_REF << 7) + h;
1425         return hash;
1426       }
1427
1428     case MEM:
1429       if (MEM_VOLATILE_P (x))
1430         {
1431           *do_not_record_p = 1;
1432           return 0;
1433         }
1434
1435       hash += (unsigned int) MEM;
1436       hash += MEM_ALIAS_SET (x);
1437       x = XEXP (x, 0);
1438       goto repeat;
1439
1440     case PRE_DEC:
1441     case PRE_INC:
1442     case POST_DEC:
1443     case POST_INC:
1444     case PC:
1445     case CC0:
1446     case CALL:
1447     case UNSPEC_VOLATILE:
1448       *do_not_record_p = 1;
1449       return 0;
1450
1451     case ASM_OPERANDS:
1452       if (MEM_VOLATILE_P (x))
1453         {
1454           *do_not_record_p = 1;
1455           return 0;
1456         }
1457
1458     default:
1459       break;
1460     }
1461
1462   hash += (unsigned) code + (unsigned) GET_MODE (x);
1463   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1464     {
1465       if (fmt[i] == 'e')
1466         {
1467           /* If we are about to do the last recursive call
1468              needed at this level, change it into iteration.
1469              This function is called enough to be worth it.  */
1470           if (i == 0)
1471             {
1472               x = XEXP (x, i);
1473               goto repeat;
1474             }
1475
1476           hash += hash_expr_1 (XEXP (x, i), 0, do_not_record_p);
1477           if (*do_not_record_p)
1478             return 0;
1479         }
1480
1481       else if (fmt[i] == 'E')
1482         for (j = 0; j < XVECLEN (x, i); j++)
1483           {
1484             hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1485             if (*do_not_record_p)
1486               return 0;
1487           }
1488
1489       else if (fmt[i] == 's')
1490         {
1491           register unsigned char *p = (unsigned char *) XSTR (x, i);
1492
1493           if (p)
1494             while (*p)
1495               hash += *p++;
1496         }
1497       else if (fmt[i] == 'i')
1498         hash += (unsigned int) XINT (x, i);
1499       else
1500         abort ();
1501     }
1502
1503   return hash;
1504 }
1505
1506 /* Hash a set of register REGNO.
1507
1508    Sets are hashed on the register that is set.  This simplifies the PRE copy
1509    propagation code.
1510
1511    ??? May need to make things more elaborate.  Later, as necessary.  */
1512
1513 static unsigned int
1514 hash_set (regno, hash_table_size)
1515      int regno;
1516      int hash_table_size;
1517 {
1518   unsigned int hash;
1519
1520   hash = regno;
1521   return hash % hash_table_size;
1522 }
1523
1524 /* Return non-zero if exp1 is equivalent to exp2.
1525    ??? Borrowed from cse.c.  Might want to remerge with cse.c.  Later.  */
1526
1527 static int
1528 expr_equiv_p (x, y)
1529      rtx x, y;
1530 {
1531   register int i, j;
1532   register enum rtx_code code;
1533   register const char *fmt;
1534
1535   if (x == y)
1536     return 1;
1537
1538   if (x == 0 || y == 0)
1539     return x == y;
1540
1541   code = GET_CODE (x);
1542   if (code != GET_CODE (y))
1543     return 0;
1544
1545   /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.  */
1546   if (GET_MODE (x) != GET_MODE (y))
1547     return 0;
1548
1549   switch (code)
1550     {
1551     case PC:
1552     case CC0:
1553       return x == y;
1554
1555     case CONST_INT:
1556       return INTVAL (x) == INTVAL (y);
1557
1558     case LABEL_REF:
1559       return XEXP (x, 0) == XEXP (y, 0);
1560
1561     case SYMBOL_REF:
1562       return XSTR (x, 0) == XSTR (y, 0);
1563
1564     case REG:
1565       return REGNO (x) == REGNO (y);
1566
1567     case MEM:
1568       /* Can't merge two expressions in different alias sets, since we can
1569          decide that the expression is transparent in a block when it isn't,
1570          due to it being set with the different alias set.  */
1571       if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1572         return 0;
1573       break;
1574
1575     /*  For commutative operations, check both orders.  */
1576     case PLUS:
1577     case MULT:
1578     case AND:
1579     case IOR:
1580     case XOR:
1581     case NE:
1582     case EQ:
1583       return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1584                && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1585               || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1586                   && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1587
1588     default:
1589       break;
1590     }
1591
1592   /* Compare the elements.  If any pair of corresponding elements
1593      fail to match, return 0 for the whole thing.  */
1594
1595   fmt = GET_RTX_FORMAT (code);
1596   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1597     {
1598       switch (fmt[i])
1599         {
1600         case 'e':
1601           if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1602             return 0;
1603           break;
1604
1605         case 'E':
1606           if (XVECLEN (x, i) != XVECLEN (y, i))
1607             return 0;
1608           for (j = 0; j < XVECLEN (x, i); j++)
1609             if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1610               return 0;
1611           break;
1612
1613         case 's':
1614           if (strcmp (XSTR (x, i), XSTR (y, i)))
1615             return 0;
1616           break;
1617
1618         case 'i':
1619           if (XINT (x, i) != XINT (y, i))
1620             return 0;
1621           break;
1622
1623         case 'w':
1624           if (XWINT (x, i) != XWINT (y, i))
1625             return 0;
1626         break;
1627
1628         case '0':
1629           break;
1630
1631         default:
1632           abort ();
1633         }
1634       }
1635
1636   return 1;
1637 }
1638
1639 /* Insert expression X in INSN in the hash table.
1640    If it is already present, record it as the last occurrence in INSN's
1641    basic block.
1642
1643    MODE is the mode of the value X is being stored into.
1644    It is only used if X is a CONST_INT.
1645
1646    ANTIC_P is non-zero if X is an anticipatable expression.
1647    AVAIL_P is non-zero if X is an available expression.  */
1648
1649 static void
1650 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1651      rtx x;
1652      enum machine_mode mode;
1653      rtx insn;
1654      int antic_p, avail_p;
1655 {
1656   int found, do_not_record_p;
1657   unsigned int hash;
1658   struct expr *cur_expr, *last_expr = NULL;
1659   struct occr *antic_occr, *avail_occr;
1660   struct occr *last_occr = NULL;
1661
1662   hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1663
1664   /* Do not insert expression in table if it contains volatile operands,
1665      or if hash_expr determines the expression is something we don't want
1666      to or can't handle.  */
1667   if (do_not_record_p)
1668     return;
1669
1670   cur_expr = expr_hash_table[hash];
1671   found = 0;
1672
1673   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1674     {
1675       /* If the expression isn't found, save a pointer to the end of
1676          the list.  */
1677       last_expr = cur_expr;
1678       cur_expr = cur_expr->next_same_hash;
1679     }
1680
1681   if (! found)
1682     {
1683       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1684       bytes_used += sizeof (struct expr);
1685       if (expr_hash_table[hash] == NULL)
1686         /* This is the first pattern that hashed to this index.  */
1687         expr_hash_table[hash] = cur_expr;
1688       else
1689         /* Add EXPR to end of this hash chain.  */
1690         last_expr->next_same_hash = cur_expr;
1691
1692       /* Set the fields of the expr element.  */
1693       cur_expr->expr = x;
1694       cur_expr->bitmap_index = n_exprs++;
1695       cur_expr->next_same_hash = NULL;
1696       cur_expr->antic_occr = NULL;
1697       cur_expr->avail_occr = NULL;
1698     }
1699
1700   /* Now record the occurrence(s).  */
1701   if (antic_p)
1702     {
1703       antic_occr = cur_expr->antic_occr;
1704
1705       /* Search for another occurrence in the same basic block.  */
1706       while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1707         {
1708           /* If an occurrence isn't found, save a pointer to the end of
1709              the list.  */
1710           last_occr = antic_occr;
1711           antic_occr = antic_occr->next;
1712         }
1713
1714       if (antic_occr)
1715         /* Found another instance of the expression in the same basic block.
1716            Prefer the currently recorded one.  We want the first one in the
1717            block and the block is scanned from start to end.  */
1718         ; /* nothing to do */
1719       else
1720         {
1721           /* First occurrence of this expression in this basic block.  */
1722           antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1723           bytes_used += sizeof (struct occr);
1724           /* First occurrence of this expression in any block?  */
1725           if (cur_expr->antic_occr == NULL)
1726             cur_expr->antic_occr = antic_occr;
1727           else
1728             last_occr->next = antic_occr;
1729
1730           antic_occr->insn = insn;
1731           antic_occr->next = NULL;
1732         }
1733     }
1734
1735   if (avail_p)
1736     {
1737       avail_occr = cur_expr->avail_occr;
1738
1739       /* Search for another occurrence in the same basic block.  */
1740       while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1741         {
1742           /* If an occurrence isn't found, save a pointer to the end of
1743              the list.  */
1744           last_occr = avail_occr;
1745           avail_occr = avail_occr->next;
1746         }
1747
1748       if (avail_occr)
1749         /* Found another instance of the expression in the same basic block.
1750            Prefer this occurrence to the currently recorded one.  We want
1751            the last one in the block and the block is scanned from start
1752            to end.  */
1753         avail_occr->insn = insn;
1754       else
1755         {
1756           /* First occurrence of this expression in this basic block.  */
1757           avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1758           bytes_used += sizeof (struct occr);
1759
1760           /* First occurrence of this expression in any block?  */
1761           if (cur_expr->avail_occr == NULL)
1762             cur_expr->avail_occr = avail_occr;
1763           else
1764             last_occr->next = avail_occr;
1765
1766           avail_occr->insn = insn;
1767           avail_occr->next = NULL;
1768         }
1769     }
1770 }
1771
1772 /* Insert pattern X in INSN in the hash table.
1773    X is a SET of a reg to either another reg or a constant.
1774    If it is already present, record it as the last occurrence in INSN's
1775    basic block.  */
1776
1777 static void
1778 insert_set_in_table (x, insn)
1779      rtx x;
1780      rtx insn;
1781 {
1782   int found;
1783   unsigned int hash;
1784   struct expr *cur_expr, *last_expr = NULL;
1785   struct occr *cur_occr, *last_occr = NULL;
1786
1787   if (GET_CODE (x) != SET
1788       || GET_CODE (SET_DEST (x)) != REG)
1789     abort ();
1790
1791   hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1792
1793   cur_expr = set_hash_table[hash];
1794   found = 0;
1795
1796   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1797     {
1798       /* If the expression isn't found, save a pointer to the end of
1799          the list.  */
1800       last_expr = cur_expr;
1801       cur_expr = cur_expr->next_same_hash;
1802     }
1803
1804   if (! found)
1805     {
1806       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1807       bytes_used += sizeof (struct expr);
1808       if (set_hash_table[hash] == NULL)
1809         /* This is the first pattern that hashed to this index.  */
1810         set_hash_table[hash] = cur_expr;
1811       else
1812         /* Add EXPR to end of this hash chain.  */
1813         last_expr->next_same_hash = cur_expr;
1814
1815       /* Set the fields of the expr element.
1816          We must copy X because it can be modified when copy propagation is
1817          performed on its operands.  */
1818       /* ??? Should this go in a different obstack?  */
1819       cur_expr->expr = copy_rtx (x);
1820       cur_expr->bitmap_index = n_sets++;
1821       cur_expr->next_same_hash = NULL;
1822       cur_expr->antic_occr = NULL;
1823       cur_expr->avail_occr = NULL;
1824     }
1825
1826   /* Now record the occurrence.  */
1827   cur_occr = cur_expr->avail_occr;
1828
1829   /* Search for another occurrence in the same basic block.  */
1830   while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1831     {
1832       /* If an occurrence isn't found, save a pointer to the end of
1833          the list.  */
1834       last_occr = cur_occr;
1835       cur_occr = cur_occr->next;
1836     }
1837
1838   if (cur_occr)
1839     /* Found another instance of the expression in the same basic block.
1840        Prefer this occurrence to the currently recorded one.  We want the
1841        last one in the block and the block is scanned from start to end.  */
1842     cur_occr->insn = insn;
1843   else
1844     {
1845       /* First occurrence of this expression in this basic block.  */
1846       cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1847       bytes_used += sizeof (struct occr);
1848
1849       /* First occurrence of this expression in any block?  */
1850       if (cur_expr->avail_occr == NULL)
1851         cur_expr->avail_occr = cur_occr;
1852       else
1853         last_occr->next = cur_occr;
1854
1855       cur_occr->insn = insn;
1856       cur_occr->next = NULL;
1857     }
1858 }
1859
1860 /* Scan pattern PAT of INSN and add an entry to the hash table.  If SET_P is
1861    non-zero, this is for the assignment hash table, otherwise it is for the
1862    expression hash table.  */
1863
1864 static void
1865 hash_scan_set (pat, insn, set_p)
1866      rtx pat, insn;
1867      int set_p;
1868 {
1869   rtx src = SET_SRC (pat);
1870   rtx dest = SET_DEST (pat);
1871
1872   if (GET_CODE (src) == CALL)
1873     hash_scan_call (src, insn);
1874
1875   if (GET_CODE (dest) == REG)
1876     {
1877       int regno = REGNO (dest);
1878       rtx tmp;
1879
1880       /* Only record sets of pseudo-regs in the hash table.  */
1881       if (! set_p
1882           && regno >= FIRST_PSEUDO_REGISTER
1883           /* Don't GCSE something if we can't do a reg/reg copy.  */
1884           && can_copy_p [GET_MODE (dest)]
1885           /* Is SET_SRC something we want to gcse?  */
1886           && want_to_gcse_p (src))
1887         {
1888           /* An expression is not anticipatable if its operands are
1889              modified before this insn.  */
1890           int antic_p = oprs_anticipatable_p (src, insn);
1891           /* An expression is not available if its operands are
1892              subsequently modified, including this insn.  */
1893           int avail_p = oprs_available_p (src, insn);
1894
1895           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1896         }
1897
1898       /* Record sets for constant/copy propagation.  */
1899       else if (set_p
1900                && regno >= FIRST_PSEUDO_REGISTER
1901                && ((GET_CODE (src) == REG
1902                     && REGNO (src) >= FIRST_PSEUDO_REGISTER
1903                     && can_copy_p [GET_MODE (dest)])
1904                    || GET_CODE (src) == CONST_INT
1905                    || GET_CODE (src) == SYMBOL_REF
1906                    || GET_CODE (src) == CONST_DOUBLE)
1907                /* A copy is not available if its src or dest is subsequently
1908                   modified.  Here we want to search from INSN+1 on, but
1909                   oprs_available_p searches from INSN on.  */
1910                && (insn == BLOCK_END (BLOCK_NUM (insn))
1911                    || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1912                        && oprs_available_p (pat, tmp))))
1913         insert_set_in_table (pat, insn);
1914     }
1915 }
1916
1917 static void
1918 hash_scan_clobber (x, insn)
1919      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1920 {
1921   /* Currently nothing to do.  */
1922 }
1923
1924 static void
1925 hash_scan_call (x, insn)
1926      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1927 {
1928   /* Currently nothing to do.  */
1929 }
1930
1931 /* Process INSN and add hash table entries as appropriate.
1932
1933    Only available expressions that set a single pseudo-reg are recorded.
1934
1935    Single sets in a PARALLEL could be handled, but it's an extra complication
1936    that isn't dealt with right now.  The trick is handling the CLOBBERs that
1937    are also in the PARALLEL.  Later.
1938
1939    If SET_P is non-zero, this is for the assignment hash table,
1940    otherwise it is for the expression hash table.
1941    If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
1942    not record any expressions.  */
1943
1944 static void
1945 hash_scan_insn (insn, set_p, in_libcall_block)
1946      rtx insn;
1947      int set_p;
1948      int in_libcall_block;
1949 {
1950   rtx pat = PATTERN (insn);
1951   int i;
1952
1953   /* Pick out the sets of INSN and for other forms of instructions record
1954      what's been modified.  */
1955
1956   if (GET_CODE (pat) == SET && ! in_libcall_block)
1957     {
1958       /* Ignore obvious no-ops.  */
1959       if (SET_SRC (pat) != SET_DEST (pat))
1960         hash_scan_set (pat, insn, set_p);
1961     }
1962   else if (GET_CODE (pat) == PARALLEL)
1963     for (i = 0; i < XVECLEN (pat, 0); i++)
1964       {
1965         rtx x = XVECEXP (pat, 0, i);
1966
1967         if (GET_CODE (x) == SET)
1968           {
1969             if (GET_CODE (SET_SRC (x)) == CALL)
1970               hash_scan_call (SET_SRC (x), insn);
1971           }
1972         else if (GET_CODE (x) == CLOBBER)
1973           hash_scan_clobber (x, insn);
1974         else if (GET_CODE (x) == CALL)
1975           hash_scan_call (x, insn);
1976       }
1977
1978   else if (GET_CODE (pat) == CLOBBER)
1979     hash_scan_clobber (pat, insn);
1980   else if (GET_CODE (pat) == CALL)
1981     hash_scan_call (pat, insn);
1982 }
1983
1984 static void
1985 dump_hash_table (file, name, table, table_size, total_size)
1986      FILE *file;
1987      const char *name;
1988      struct expr **table;
1989      int table_size, total_size;
1990 {
1991   int i;
1992   /* Flattened out table, so it's printed in proper order.  */
1993   struct expr **flat_table;
1994   unsigned int *hash_val;
1995   struct expr *expr;
1996
1997   flat_table
1998     = (struct expr **) xcalloc (total_size, sizeof (struct expr *));
1999   hash_val = (unsigned int *) xmalloc (total_size * sizeof (unsigned int));
2000
2001   for (i = 0; i < table_size; i++)
2002     for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
2003       {
2004         flat_table[expr->bitmap_index] = expr;
2005         hash_val[expr->bitmap_index] = i;
2006       }
2007
2008   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2009            name, table_size, total_size);
2010
2011   for (i = 0; i < total_size; i++)
2012     if (flat_table[i] != 0)
2013       {
2014         expr = flat_table[i];
2015         fprintf (file, "Index %d (hash value %d)\n  ",
2016                  expr->bitmap_index, hash_val[i]);
2017         print_rtl (file, expr->expr);
2018         fprintf (file, "\n");
2019       }
2020
2021   fprintf (file, "\n");
2022
2023   free (flat_table);
2024   free (hash_val);
2025 }
2026
2027 /* Record register first/last/block set information for REGNO in INSN.
2028
2029    reg_first_set records the first place in the block where the register
2030    is set and is used to compute "anticipatability".
2031
2032    reg_last_set records the last place in the block where the register
2033    is set and is used to compute "availability".
2034
2035    reg_set_in_block records whether the register is set in the block
2036    and is used to compute "transparency".  */
2037
2038 static void
2039 record_last_reg_set_info (insn, regno)
2040      rtx insn;
2041      int regno;
2042 {
2043   if (reg_first_set[regno] == NEVER_SET)
2044     reg_first_set[regno] = INSN_CUID (insn);
2045
2046   reg_last_set[regno] = INSN_CUID (insn);
2047   SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2048 }
2049
2050 /* Record memory first/last/block set information for INSN.  */
2051
2052 static void
2053 record_last_mem_set_info (insn)
2054      rtx insn;
2055 {
2056   if (mem_first_set == NEVER_SET)
2057     mem_first_set = INSN_CUID (insn);
2058
2059   mem_last_set = INSN_CUID (insn);
2060   mem_set_in_block[BLOCK_NUM (insn)] = 1;
2061 }
2062
2063 /* Called from compute_hash_table via note_stores to handle one
2064    SET or CLOBBER in an insn.  DATA is really the instruction in which
2065    the SET is taking place.  */
2066
2067 static void
2068 record_last_set_info (dest, setter, data)
2069      rtx dest, setter ATTRIBUTE_UNUSED;
2070      void *data;
2071 {
2072   rtx last_set_insn = (rtx) data;
2073
2074   if (GET_CODE (dest) == SUBREG)
2075     dest = SUBREG_REG (dest);
2076
2077   if (GET_CODE (dest) == REG)
2078     record_last_reg_set_info (last_set_insn, REGNO (dest));
2079   else if (GET_CODE (dest) == MEM
2080            /* Ignore pushes, they clobber nothing.  */
2081            && ! push_operand (dest, GET_MODE (dest)))
2082     record_last_mem_set_info (last_set_insn);
2083 }
2084
2085 /* Top level function to create an expression or assignment hash table.
2086
2087    Expression entries are placed in the hash table if
2088    - they are of the form (set (pseudo-reg) src),
2089    - src is something we want to perform GCSE on,
2090    - none of the operands are subsequently modified in the block
2091
2092    Assignment entries are placed in the hash table if
2093    - they are of the form (set (pseudo-reg) src),
2094    - src is something we want to perform const/copy propagation on,
2095    - none of the operands or target are subsequently modified in the block
2096
2097    Currently src must be a pseudo-reg or a const_int.
2098
2099    F is the first insn.
2100    SET_P is non-zero for computing the assignment hash table.  */
2101
2102 static void
2103 compute_hash_table (set_p)
2104      int set_p;
2105 {
2106   int bb;
2107
2108   /* While we compute the hash table we also compute a bit array of which
2109      registers are set in which blocks.
2110      We also compute which blocks set memory, in the absence of aliasing
2111      support [which is TODO].
2112      ??? This isn't needed during const/copy propagation, but it's cheap to
2113      compute.  Later.  */
2114   sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2115   bzero ((char *) mem_set_in_block, n_basic_blocks);
2116
2117   /* Some working arrays used to track first and last set in each block.  */
2118   /* ??? One could use alloca here, but at some size a threshold is crossed
2119      beyond which one should use malloc.  Are we at that threshold here?  */
2120   reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2121   reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2122
2123   for (bb = 0; bb < n_basic_blocks; bb++)
2124     {
2125       rtx insn;
2126       int regno;
2127       int in_libcall_block;
2128       int i;
2129
2130       /* First pass over the instructions records information used to
2131          determine when registers and memory are first and last set.
2132          ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2133          could be moved to compute_sets since they currently don't change.  */
2134
2135       for (i = 0; i < max_gcse_regno; i++)
2136         reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2137       mem_first_set = NEVER_SET;
2138       mem_last_set = NEVER_SET;
2139
2140       for (insn = BLOCK_HEAD (bb);
2141            insn && insn != NEXT_INSN (BLOCK_END (bb));
2142            insn = NEXT_INSN (insn))
2143         {
2144 #ifdef NON_SAVING_SETJMP
2145           if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2146               && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2147             {
2148               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2149                 record_last_reg_set_info (insn, regno);
2150               continue;
2151             }
2152 #endif
2153
2154           if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
2155             continue;
2156
2157           if (GET_CODE (insn) == CALL_INSN)
2158             {
2159               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2160                 if ((call_used_regs[regno]
2161                      && regno != STACK_POINTER_REGNUM
2162 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2163                      && regno != HARD_FRAME_POINTER_REGNUM
2164 #endif
2165 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2166                      && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2167 #endif
2168 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2169                      && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2170 #endif
2171
2172                      && regno != FRAME_POINTER_REGNUM)
2173                     || global_regs[regno])
2174                   record_last_reg_set_info (insn, regno);
2175
2176               if (! CONST_CALL_P (insn))
2177                 record_last_mem_set_info (insn);
2178             }
2179
2180           note_stores (PATTERN (insn), record_last_set_info, insn);
2181         }
2182
2183       /* The next pass builds the hash table.  */
2184
2185       for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2186            insn && insn != NEXT_INSN (BLOCK_END (bb));
2187            insn = NEXT_INSN (insn))
2188         if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
2189           {
2190             if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2191               in_libcall_block = 1;
2192             else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2193               in_libcall_block = 0;
2194             hash_scan_insn (insn, set_p, in_libcall_block);
2195         }
2196     }
2197
2198   free (reg_first_set);
2199   free (reg_last_set);
2200
2201   /* Catch bugs early.  */
2202   reg_first_set = reg_last_set = 0;
2203 }
2204
2205 /* Allocate space for the set hash table.
2206    N_INSNS is the number of instructions in the function.
2207    It is used to determine the number of buckets to use.  */
2208
2209 static void
2210 alloc_set_hash_table (n_insns)
2211      int n_insns;
2212 {
2213   int n;
2214
2215   set_hash_table_size = n_insns / 4;
2216   if (set_hash_table_size < 11)
2217     set_hash_table_size = 11;
2218
2219   /* Attempt to maintain efficient use of hash table.
2220      Making it an odd number is simplest for now.
2221      ??? Later take some measurements.  */
2222   set_hash_table_size |= 1;
2223   n = set_hash_table_size * sizeof (struct expr *);
2224   set_hash_table = (struct expr **) gmalloc (n);
2225 }
2226
2227 /* Free things allocated by alloc_set_hash_table.  */
2228
2229 static void
2230 free_set_hash_table ()
2231 {
2232   free (set_hash_table);
2233 }
2234
2235 /* Compute the hash table for doing copy/const propagation.  */
2236
2237 static void
2238 compute_set_hash_table ()
2239 {
2240   /* Initialize count of number of entries in hash table.  */
2241   n_sets = 0;
2242   bzero ((char *) set_hash_table,
2243          set_hash_table_size * sizeof (struct expr *));
2244
2245   compute_hash_table (1);
2246 }
2247
2248 /* Allocate space for the expression hash table.
2249    N_INSNS is the number of instructions in the function.
2250    It is used to determine the number of buckets to use.  */
2251
2252 static void
2253 alloc_expr_hash_table (n_insns)
2254      int n_insns;
2255 {
2256   int n;
2257
2258   expr_hash_table_size = n_insns / 2;
2259   /* Make sure the amount is usable.  */
2260   if (expr_hash_table_size < 11)
2261     expr_hash_table_size = 11;
2262
2263   /* Attempt to maintain efficient use of hash table.
2264      Making it an odd number is simplest for now.
2265      ??? Later take some measurements.  */
2266   expr_hash_table_size |= 1;
2267   n = expr_hash_table_size * sizeof (struct expr *);
2268   expr_hash_table = (struct expr **) gmalloc (n);
2269 }
2270
2271 /* Free things allocated by alloc_expr_hash_table.  */
2272
2273 static void
2274 free_expr_hash_table ()
2275 {
2276   free (expr_hash_table);
2277 }
2278
2279 /* Compute the hash table for doing GCSE.  */
2280
2281 static void
2282 compute_expr_hash_table ()
2283 {
2284   /* Initialize count of number of entries in hash table.  */
2285   n_exprs = 0;
2286   bzero ((char *) expr_hash_table,
2287          expr_hash_table_size * sizeof (struct expr *));
2288
2289   compute_hash_table (0);
2290 }
2291 \f
2292 /* Expression tracking support.  */
2293
2294 /* Lookup pattern PAT in the expression table.
2295    The result is a pointer to the table entry, or NULL if not found.  */
2296
2297 static struct expr *
2298 lookup_expr (pat)
2299      rtx pat;
2300 {
2301   int do_not_record_p;
2302   unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2303                                  expr_hash_table_size);
2304   struct expr *expr;
2305
2306   if (do_not_record_p)
2307     return NULL;
2308
2309   expr = expr_hash_table[hash];
2310
2311   while (expr && ! expr_equiv_p (expr->expr, pat))
2312     expr = expr->next_same_hash;
2313
2314   return expr;
2315 }
2316
2317 /* Lookup REGNO in the set table.  If PAT is non-NULL look for the entry that
2318    matches it, otherwise return the first entry for REGNO.  The result is a
2319    pointer to the table entry, or NULL if not found.  */
2320
2321 static struct expr *
2322 lookup_set (regno, pat)
2323      int regno;
2324      rtx pat;
2325 {
2326   unsigned int hash = hash_set (regno, set_hash_table_size);
2327   struct expr *expr;
2328
2329   expr = set_hash_table[hash];
2330
2331   if (pat)
2332     {
2333       while (expr && ! expr_equiv_p (expr->expr, pat))
2334         expr = expr->next_same_hash;
2335     }
2336   else
2337     {
2338       while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2339         expr = expr->next_same_hash;
2340     }
2341
2342   return expr;
2343 }
2344
2345 /* Return the next entry for REGNO in list EXPR.  */
2346
2347 static struct expr *
2348 next_set (regno, expr)
2349      int regno;
2350      struct expr *expr;
2351 {
2352   do
2353     expr = expr->next_same_hash;
2354   while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2355
2356   return expr;
2357 }
2358
2359 /* Reset tables used to keep track of what's still available [since the
2360    start of the block].  */
2361
2362 static void
2363 reset_opr_set_tables ()
2364 {
2365   /* Maintain a bitmap of which regs have been set since beginning of
2366      the block.  */
2367   sbitmap_zero (reg_set_bitmap);
2368
2369   /* Also keep a record of the last instruction to modify memory.
2370      For now this is very trivial, we only record whether any memory
2371      location has been modified.  */
2372   mem_last_set = 0;
2373 }
2374
2375 /* Return non-zero if the operands of X are not set before INSN in
2376    INSN's basic block.  */
2377
2378 static int
2379 oprs_not_set_p (x, insn)
2380      rtx x, insn;
2381 {
2382   int i, j;
2383   enum rtx_code code;
2384   const char *fmt;
2385
2386   if (x == 0)
2387     return 1;
2388
2389   code = GET_CODE (x);
2390   switch (code)
2391     {
2392     case PC:
2393     case CC0:
2394     case CONST:
2395     case CONST_INT:
2396     case CONST_DOUBLE:
2397     case SYMBOL_REF:
2398     case LABEL_REF:
2399     case ADDR_VEC:
2400     case ADDR_DIFF_VEC:
2401       return 1;
2402
2403     case MEM:
2404       if (mem_last_set != 0)
2405         return 0;
2406       else
2407         return oprs_not_set_p (XEXP (x, 0), insn);
2408
2409     case REG:
2410       return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2411
2412     default:
2413       break;
2414     }
2415
2416   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2417     {
2418       if (fmt[i] == 'e')
2419         {
2420           /* If we are about to do the last recursive call
2421              needed at this level, change it into iteration.
2422              This function is called enough to be worth it.  */
2423           if (i == 0)
2424             return oprs_not_set_p (XEXP (x, i), insn);
2425
2426           if (! oprs_not_set_p (XEXP (x, i), insn))
2427             return 0;
2428         }
2429       else if (fmt[i] == 'E')
2430         for (j = 0; j < XVECLEN (x, i); j++)
2431           if (! oprs_not_set_p (XVECEXP (x, i, j), insn))
2432             return 0;
2433     }
2434
2435   return 1;
2436 }
2437
2438 /* Mark things set by a CALL.  */
2439
2440 static void
2441 mark_call (insn)
2442      rtx insn;
2443 {
2444   mem_last_set = INSN_CUID (insn);
2445 }
2446
2447 /* Mark things set by a SET.  */
2448
2449 static void
2450 mark_set (pat, insn)
2451      rtx pat, insn;
2452 {
2453   rtx dest = SET_DEST (pat);
2454
2455   while (GET_CODE (dest) == SUBREG
2456          || GET_CODE (dest) == ZERO_EXTRACT
2457          || GET_CODE (dest) == SIGN_EXTRACT
2458          || GET_CODE (dest) == STRICT_LOW_PART)
2459     dest = XEXP (dest, 0);
2460
2461   if (GET_CODE (dest) == REG)
2462     SET_BIT (reg_set_bitmap, REGNO (dest));
2463   else if (GET_CODE (dest) == MEM)
2464     mem_last_set = INSN_CUID (insn);
2465
2466   if (GET_CODE (SET_SRC (pat)) == CALL)
2467     mark_call (insn);
2468 }
2469
2470 /* Record things set by a CLOBBER.  */
2471
2472 static void
2473 mark_clobber (pat, insn)
2474      rtx pat, insn;
2475 {
2476   rtx clob = XEXP (pat, 0);
2477
2478   while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2479     clob = XEXP (clob, 0);
2480
2481   if (GET_CODE (clob) == REG)
2482     SET_BIT (reg_set_bitmap, REGNO (clob));
2483   else
2484     mem_last_set = INSN_CUID (insn);
2485 }
2486
2487 /* Record things set by INSN.
2488    This data is used by oprs_not_set_p.  */
2489
2490 static void
2491 mark_oprs_set (insn)
2492      rtx insn;
2493 {
2494   rtx pat = PATTERN (insn);
2495   int i;
2496
2497   if (GET_CODE (pat) == SET)
2498     mark_set (pat, insn);
2499   else if (GET_CODE (pat) == PARALLEL)
2500     for (i = 0; i < XVECLEN (pat, 0); i++)
2501       {
2502         rtx x = XVECEXP (pat, 0, i);
2503
2504         if (GET_CODE (x) == SET)
2505           mark_set (x, insn);
2506         else if (GET_CODE (x) == CLOBBER)
2507           mark_clobber (x, insn);
2508         else if (GET_CODE (x) == CALL)
2509           mark_call (insn);
2510       }
2511
2512   else if (GET_CODE (pat) == CLOBBER)
2513     mark_clobber (pat, insn);
2514   else if (GET_CODE (pat) == CALL)
2515     mark_call (insn);
2516 }
2517
2518 \f
2519 /* Classic GCSE reaching definition support.  */
2520
2521 /* Allocate reaching def variables.  */
2522
2523 static void
2524 alloc_rd_mem (n_blocks, n_insns)
2525      int n_blocks, n_insns;
2526 {
2527   rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2528   sbitmap_vector_zero (rd_kill, n_basic_blocks);
2529
2530   rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2531   sbitmap_vector_zero (rd_gen, n_basic_blocks);
2532
2533   reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2534   sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2535
2536   rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2537   sbitmap_vector_zero (rd_out, n_basic_blocks);
2538 }
2539
2540 /* Free reaching def variables.  */
2541
2542 static void
2543 free_rd_mem ()
2544 {
2545   free (rd_kill);
2546   free (rd_gen);
2547   free (reaching_defs);
2548   free (rd_out);
2549 }
2550
2551 /* Add INSN to the kills of BB.  REGNO, set in BB, is killed by INSN.  */
2552
2553 static void
2554 handle_rd_kill_set (insn, regno, bb)
2555      rtx insn;
2556      int regno, bb;
2557 {
2558   struct reg_set *this_reg;
2559
2560   for (this_reg = reg_set_table[regno]; this_reg; this_reg = this_reg ->next)
2561     if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2562       SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2563 }
2564
2565 /* Compute the set of kill's for reaching definitions.  */
2566
2567 static void
2568 compute_kill_rd ()
2569 {
2570   int bb, cuid;
2571   int regno, i;
2572
2573   /* For each block
2574        For each set bit in `gen' of the block (i.e each insn which
2575            generates a definition in the block)
2576          Call the reg set by the insn corresponding to that bit regx
2577          Look at the linked list starting at reg_set_table[regx]
2578          For each setting of regx in the linked list, which is not in
2579              this block
2580            Set the bit in `kill' corresponding to that insn.   */
2581   for (bb = 0; bb < n_basic_blocks; bb++)
2582     for (cuid = 0; cuid < max_cuid; cuid++)
2583       if (TEST_BIT (rd_gen[bb], cuid))
2584         {
2585           rtx insn = CUID_INSN (cuid);
2586           rtx pat = PATTERN (insn);
2587
2588           if (GET_CODE (insn) == CALL_INSN)
2589             {
2590               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2591                 {
2592                   if ((call_used_regs[regno]
2593                        && regno != STACK_POINTER_REGNUM
2594 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2595                        && regno != HARD_FRAME_POINTER_REGNUM
2596 #endif
2597 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2598                        && ! (regno == ARG_POINTER_REGNUM
2599                              && fixed_regs[regno])
2600 #endif
2601 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2602                        && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2603 #endif
2604                        && regno != FRAME_POINTER_REGNUM)
2605                       || global_regs[regno])
2606                     handle_rd_kill_set (insn, regno, bb);
2607                 }
2608             }
2609
2610           if (GET_CODE (pat) == PARALLEL)
2611             {
2612               for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2613                 {
2614                   enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2615
2616                   if ((code == SET || code == CLOBBER)
2617                       && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2618                     handle_rd_kill_set (insn,
2619                                         REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2620                                         bb);
2621                 }
2622             }
2623           else if (GET_CODE (pat) == SET && GET_CODE (SET_DEST (pat)) == REG)
2624             /* Each setting of this register outside of this block
2625                must be marked in the set of kills in this block.  */
2626             handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2627         }
2628 }
2629
2630 /* Compute the reaching definitions as in
2631    Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2632    Chapter 10.  It is the same algorithm as used for computing available
2633    expressions but applied to the gens and kills of reaching definitions.  */
2634
2635 static void
2636 compute_rd ()
2637 {
2638   int bb, changed, passes;
2639
2640   for (bb = 0; bb < n_basic_blocks; bb++)
2641     sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2642
2643   passes = 0;
2644   changed = 1;
2645   while (changed)
2646     {
2647       changed = 0;
2648       for (bb = 0; bb < n_basic_blocks; bb++)
2649         {
2650           sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
2651           changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2652                                             reaching_defs[bb], rd_kill[bb]);
2653         }
2654       passes++;
2655     }
2656
2657   if (gcse_file)
2658     fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2659 }
2660 \f
2661 /* Classic GCSE available expression support.  */
2662
2663 /* Allocate memory for available expression computation.  */
2664
2665 static void
2666 alloc_avail_expr_mem (n_blocks, n_exprs)
2667      int n_blocks, n_exprs;
2668 {
2669   ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2670   sbitmap_vector_zero (ae_kill, n_basic_blocks);
2671
2672   ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2673   sbitmap_vector_zero (ae_gen, n_basic_blocks);
2674
2675   ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2676   sbitmap_vector_zero (ae_in, n_basic_blocks);
2677
2678   ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2679   sbitmap_vector_zero (ae_out, n_basic_blocks);
2680
2681   u_bitmap = (sbitmap) sbitmap_alloc (n_exprs);
2682   sbitmap_ones (u_bitmap);
2683 }
2684
2685 static void
2686 free_avail_expr_mem ()
2687 {
2688   free (ae_kill);
2689   free (ae_gen);
2690   free (ae_in);
2691   free (ae_out);
2692   free (u_bitmap);
2693 }
2694
2695 /* Compute the set of available expressions generated in each basic block.  */
2696
2697 static void
2698 compute_ae_gen ()
2699 {
2700   int i;
2701   struct expr *expr;
2702   struct occr *occr;
2703
2704   /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2705      This is all we have to do because an expression is not recorded if it
2706      is not available, and the only expressions we want to work with are the
2707      ones that are recorded.  */
2708   for (i = 0; i < expr_hash_table_size; i++)
2709     for (expr = expr_hash_table[i]; expr != 0; expr = expr->next_same_hash)
2710       for (occr = expr->avail_occr; occr != 0; occr = occr->next)
2711         SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2712 }
2713
2714 /* Return non-zero if expression X is killed in BB.  */
2715
2716 static int
2717 expr_killed_p (x, bb)
2718      rtx x;
2719      int bb;
2720 {
2721   int i, j;
2722   enum rtx_code code;
2723   const char *fmt;
2724
2725   if (x == 0)
2726     return 1;
2727
2728   code = GET_CODE (x);
2729   switch (code)
2730     {
2731     case REG:
2732       return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2733
2734     case MEM:
2735       if (mem_set_in_block[bb])
2736         return 1;
2737       else
2738         return expr_killed_p (XEXP (x, 0), bb);
2739
2740     case PC:
2741     case CC0: /*FIXME*/
2742     case CONST:
2743     case CONST_INT:
2744     case CONST_DOUBLE:
2745     case SYMBOL_REF:
2746     case LABEL_REF:
2747     case ADDR_VEC:
2748     case ADDR_DIFF_VEC:
2749       return 0;
2750
2751     default:
2752       break;
2753     }
2754
2755   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2756     {
2757       if (fmt[i] == 'e')
2758         {
2759           /* If we are about to do the last recursive call
2760              needed at this level, change it into iteration.
2761              This function is called enough to be worth it.  */
2762           if (i == 0)
2763             return expr_killed_p (XEXP (x, i), bb);
2764           else if (expr_killed_p (XEXP (x, i), bb))
2765             return 1;
2766         }
2767       else if (fmt[i] == 'E')
2768         for (j = 0; j < XVECLEN (x, i); j++)
2769           if (expr_killed_p (XVECEXP (x, i, j), bb))
2770             return 1;
2771     }
2772
2773   return 0;
2774 }
2775
2776 /* Compute the set of available expressions killed in each basic block.  */
2777
2778 static void
2779 compute_ae_kill (ae_gen, ae_kill)
2780      sbitmap *ae_gen, *ae_kill;
2781 {
2782   int bb, i;
2783   struct expr *expr;
2784
2785   for (bb = 0; bb < n_basic_blocks; bb++)
2786     for (i = 0; i < expr_hash_table_size; i++)
2787       for (expr = expr_hash_table[i]; expr; expr = expr->next_same_hash)
2788         {
2789           /* Skip EXPR if generated in this block.  */
2790           if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2791             continue;
2792
2793           if (expr_killed_p (expr->expr, bb))
2794             SET_BIT (ae_kill[bb], expr->bitmap_index);
2795         }
2796 }
2797 \f
2798 /* Actually perform the Classic GCSE optimizations.  */
2799
2800 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2801
2802    CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2803    as a positive reach.  We want to do this when there are two computations
2804    of the expression in the block.
2805
2806    VISITED is a pointer to a working buffer for tracking which BB's have
2807    been visited.  It is NULL for the top-level call.
2808
2809    We treat reaching expressions that go through blocks containing the same
2810    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2811    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2812    2 as not reaching.  The intent is to improve the probability of finding
2813    only one reaching expression and to reduce register lifetimes by picking
2814    the closest such expression.  */
2815
2816 static int
2817 expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
2818      struct occr *occr;
2819      struct expr *expr;
2820      int bb;
2821      int check_self_loop;
2822      char *visited;
2823 {
2824   edge pred;
2825
2826   for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
2827     {
2828       int pred_bb = pred->src->index;
2829
2830       if (visited[pred_bb])
2831         /* This predecessor has already been visited. Nothing to do.  */
2832           ;
2833       else if (pred_bb == bb)
2834         {
2835           /* BB loops on itself.  */
2836           if (check_self_loop
2837               && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2838               && BLOCK_NUM (occr->insn) == pred_bb)
2839             return 1;
2840
2841           visited[pred_bb] = 1;
2842         }
2843
2844       /* Ignore this predecessor if it kills the expression.  */
2845       else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2846         visited[pred_bb] = 1;
2847
2848       /* Does this predecessor generate this expression?  */
2849       else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2850         {
2851           /* Is this the occurrence we're looking for?
2852              Note that there's only one generating occurrence per block
2853              so we just need to check the block number.  */
2854           if (BLOCK_NUM (occr->insn) == pred_bb)
2855             return 1;
2856
2857           visited[pred_bb] = 1;
2858         }
2859
2860       /* Neither gen nor kill.  */
2861       else
2862         {
2863           visited[pred_bb] = 1;
2864           if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
2865               visited))
2866
2867             return 1;
2868         }
2869     }
2870
2871   /* All paths have been checked.  */
2872   return 0;
2873 }
2874
2875 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
2876    memory allocated for that function is returned. */
2877
2878 static int
2879 expr_reaches_here_p (occr, expr, bb, check_self_loop)
2880      struct occr *occr;
2881      struct expr *expr;
2882      int bb;
2883      int check_self_loop;
2884 {
2885   int rval;
2886   char *visited = (char *) xcalloc (n_basic_blocks, 1);
2887
2888   rval = expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited);
2889
2890   free (visited);
2891   return rval;
2892 }
2893
2894 /* Return the instruction that computes EXPR that reaches INSN's basic block.
2895    If there is more than one such instruction, return NULL.
2896
2897    Called only by handle_avail_expr.  */
2898
2899 static rtx
2900 computing_insn (expr, insn)
2901      struct expr *expr;
2902      rtx insn;
2903 {
2904   int bb = BLOCK_NUM (insn);
2905
2906   if (expr->avail_occr->next == NULL)
2907     {
2908       if (BLOCK_NUM (expr->avail_occr->insn) == bb)
2909         /* The available expression is actually itself
2910            (i.e. a loop in the flow graph) so do nothing.  */
2911         return NULL;
2912
2913       /* (FIXME) Case that we found a pattern that was created by
2914          a substitution that took place.  */
2915       return expr->avail_occr->insn;
2916     }
2917   else
2918     {
2919       /* Pattern is computed more than once.
2920          Search backwards from this insn to see how many of these
2921          computations actually reach this insn.  */
2922       struct occr *occr;
2923       rtx insn_computes_expr = NULL;
2924       int can_reach = 0;
2925
2926       for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
2927         {
2928           if (BLOCK_NUM (occr->insn) == bb)
2929             {
2930               /* The expression is generated in this block.
2931                  The only time we care about this is when the expression
2932                  is generated later in the block [and thus there's a loop].
2933                  We let the normal cse pass handle the other cases.  */
2934               if (INSN_CUID (insn) < INSN_CUID (occr->insn)
2935                   && expr_reaches_here_p (occr, expr, bb, 1))
2936                 {
2937                   can_reach++;
2938                   if (can_reach > 1)
2939                     return NULL;
2940
2941                   insn_computes_expr = occr->insn;
2942                 }
2943             }
2944           else if (expr_reaches_here_p (occr, expr, bb, 0))
2945             {
2946               can_reach++;
2947               if (can_reach > 1)
2948                 return NULL;
2949
2950               insn_computes_expr = occr->insn;
2951             }
2952         }
2953
2954       if (insn_computes_expr == NULL)
2955         abort ();
2956
2957       return insn_computes_expr;
2958     }
2959 }
2960
2961 /* Return non-zero if the definition in DEF_INSN can reach INSN.
2962    Only called by can_disregard_other_sets.  */
2963
2964 static int
2965 def_reaches_here_p (insn, def_insn)
2966      rtx insn, def_insn;
2967 {
2968   rtx reg;
2969
2970   if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
2971     return 1;
2972
2973   if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
2974     {
2975       if (INSN_CUID (def_insn) < INSN_CUID (insn))
2976         {
2977           if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
2978             return 1;
2979           else if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
2980             reg = XEXP (PATTERN (def_insn), 0);
2981           else if (GET_CODE (PATTERN (def_insn)) == SET)
2982             reg = SET_DEST (PATTERN (def_insn));
2983           else
2984             abort ();
2985
2986           return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
2987         }
2988       else
2989         return 0;
2990     }
2991
2992   return 0;
2993 }
2994
2995 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN.  The
2996    value returned is the number of definitions that reach INSN.  Returning a
2997    value of zero means that [maybe] more than one definition reaches INSN and
2998    the caller can't perform whatever optimization it is trying.  i.e. it is
2999    always safe to return zero.  */
3000
3001 static int
3002 can_disregard_other_sets (addr_this_reg, insn, for_combine)
3003      struct reg_set **addr_this_reg;
3004      rtx insn;
3005      int for_combine;
3006 {
3007   int number_of_reaching_defs = 0;
3008   struct reg_set *this_reg;
3009
3010   for (this_reg = *addr_this_reg; this_reg != 0; this_reg = this_reg->next)
3011     if (def_reaches_here_p (insn, this_reg->insn))
3012       {
3013         number_of_reaching_defs++;
3014         /* Ignore parallels for now.  */
3015         if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3016           return 0;
3017
3018         if (!for_combine
3019             && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3020                 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3021                                   SET_SRC (PATTERN (insn)))))
3022           /* A setting of the reg to a different value reaches INSN.  */
3023           return 0;
3024
3025         if (number_of_reaching_defs > 1)
3026           {
3027             /* If in this setting the value the register is being set to is
3028                equal to the previous value the register was set to and this
3029                setting reaches the insn we are trying to do the substitution
3030                on then we are ok.  */
3031             if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3032               return 0;
3033             else if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3034                                     SET_SRC (PATTERN (insn))))
3035               return 0;
3036           }
3037
3038         *addr_this_reg = this_reg;
3039       }
3040
3041   return number_of_reaching_defs;
3042 }
3043
3044 /* Expression computed by insn is available and the substitution is legal,
3045    so try to perform the substitution.
3046
3047    The result is non-zero if any changes were made.  */
3048
3049 static int
3050 handle_avail_expr (insn, expr)
3051      rtx insn;
3052      struct expr *expr;
3053 {
3054   rtx pat, insn_computes_expr;
3055   rtx to;
3056   struct reg_set *this_reg;
3057   int found_setting, use_src;
3058   int changed = 0;
3059
3060   /* We only handle the case where one computation of the expression
3061      reaches this instruction.  */
3062   insn_computes_expr = computing_insn (expr, insn);
3063   if (insn_computes_expr == NULL)
3064     return 0;
3065
3066   found_setting = 0;
3067   use_src = 0;
3068
3069   /* At this point we know only one computation of EXPR outside of this
3070      block reaches this insn.  Now try to find a register that the
3071      expression is computed into.  */
3072   if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3073     {
3074       /* This is the case when the available expression that reaches
3075          here has already been handled as an available expression.  */
3076       int regnum_for_replacing
3077         = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3078
3079       /* If the register was created by GCSE we can't use `reg_set_table',
3080          however we know it's set only once.  */
3081       if (regnum_for_replacing >= max_gcse_regno
3082           /* If the register the expression is computed into is set only once,
3083              or only one set reaches this insn, we can use it.  */
3084           || (((this_reg = reg_set_table[regnum_for_replacing]),
3085                this_reg->next == NULL)
3086               || can_disregard_other_sets (&this_reg, insn, 0)))
3087        {
3088          use_src = 1;
3089          found_setting = 1;
3090        }
3091     }
3092
3093   if (!found_setting)
3094     {
3095       int regnum_for_replacing
3096         = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3097
3098       /* This shouldn't happen.  */
3099       if (regnum_for_replacing >= max_gcse_regno)
3100         abort ();
3101
3102       this_reg = reg_set_table[regnum_for_replacing];
3103
3104       /* If the register the expression is computed into is set only once,
3105          or only one set reaches this insn, use it.  */
3106       if (this_reg->next == NULL
3107           || can_disregard_other_sets (&this_reg, insn, 0))
3108         found_setting = 1;
3109     }
3110
3111   if (found_setting)
3112     {
3113       pat = PATTERN (insn);
3114       if (use_src)
3115         to = SET_SRC (PATTERN (insn_computes_expr));
3116       else
3117         to = SET_DEST (PATTERN (insn_computes_expr));
3118       changed = validate_change (insn, &SET_SRC (pat), to, 0);
3119
3120       /* We should be able to ignore the return code from validate_change but
3121          to play it safe we check.  */
3122       if (changed)
3123         {
3124           gcse_subst_count++;
3125           if (gcse_file != NULL)
3126             {
3127               fprintf (gcse_file, "GCSE: Replacing the source in insn %d with",
3128                        INSN_UID (insn));
3129               fprintf (gcse_file, " reg %d %s insn %d\n",
3130                        REGNO (to), use_src ? "from" : "set in",
3131                        INSN_UID (insn_computes_expr));
3132             }
3133         }
3134     }
3135
3136   /* The register that the expr is computed into is set more than once.  */
3137   else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3138     {
3139       /* Insert an insn after insnx that copies the reg set in insnx
3140          into a new pseudo register call this new register REGN.
3141          From insnb until end of basic block or until REGB is set
3142          replace all uses of REGB with REGN.  */
3143       rtx new_insn;
3144
3145       to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3146
3147       /* Generate the new insn.  */
3148       /* ??? If the change fails, we return 0, even though we created
3149          an insn.  I think this is ok.  */
3150       new_insn
3151         = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3152                                         SET_DEST (PATTERN
3153                                                   (insn_computes_expr))),
3154                            insn_computes_expr);
3155
3156       /* Keep block number table up to date.  */
3157       set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3158
3159       /* Keep register set table up to date.  */
3160       record_one_set (REGNO (to), new_insn);
3161
3162       gcse_create_count++;
3163       if (gcse_file != NULL)
3164         {
3165           fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d",
3166                    INSN_UID (NEXT_INSN (insn_computes_expr)),
3167                    REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))));
3168           fprintf (gcse_file, ", computed in insn %d,\n",
3169                    INSN_UID (insn_computes_expr));
3170           fprintf (gcse_file, "      into newly allocated reg %d\n",
3171                    REGNO (to));
3172         }
3173
3174       pat = PATTERN (insn);
3175
3176       /* Do register replacement for INSN.  */
3177       changed = validate_change (insn, &SET_SRC (pat),
3178                                  SET_DEST (PATTERN
3179                                            (NEXT_INSN (insn_computes_expr))),
3180                                  0);
3181
3182       /* We should be able to ignore the return code from validate_change but
3183          to play it safe we check.  */
3184       if (changed)
3185         {
3186           gcse_subst_count++;
3187           if (gcse_file != NULL)
3188             {
3189               fprintf (gcse_file,
3190                        "GCSE: Replacing the source in insn %d with reg %d ",
3191                        INSN_UID (insn),
3192                        REGNO (SET_DEST (PATTERN (NEXT_INSN
3193                                                  (insn_computes_expr)))));
3194               fprintf (gcse_file, "set in insn %d\n",
3195                        INSN_UID (insn_computes_expr));
3196             }
3197         }
3198     }
3199
3200   return changed;
3201 }
3202
3203 /* Perform classic GCSE.  This is called by one_classic_gcse_pass after all
3204    the dataflow analysis has been done.
3205
3206    The result is non-zero if a change was made.  */
3207
3208 static int
3209 classic_gcse ()
3210 {
3211   int bb, changed;
3212   rtx insn;
3213
3214   /* Note we start at block 1.  */
3215
3216   changed = 0;
3217   for (bb = 1; bb < n_basic_blocks; bb++)
3218     {
3219       /* Reset tables used to keep track of what's still valid [since the
3220          start of the block].  */
3221       reset_opr_set_tables ();
3222
3223       for (insn = BLOCK_HEAD (bb);
3224            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3225            insn = NEXT_INSN (insn))
3226         {
3227           /* Is insn of form (set (pseudo-reg) ...)?  */
3228           if (GET_CODE (insn) == INSN
3229               && GET_CODE (PATTERN (insn)) == SET
3230               && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3231               && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3232             {
3233               rtx pat = PATTERN (insn);
3234               rtx src = SET_SRC (pat);
3235               struct expr *expr;
3236
3237               if (want_to_gcse_p (src)
3238                   /* Is the expression recorded?  */
3239                   && ((expr = lookup_expr (src)) != NULL)
3240                   /* Is the expression available [at the start of the
3241                      block]?  */
3242                   && TEST_BIT (ae_in[bb], expr->bitmap_index)
3243                   /* Are the operands unchanged since the start of the
3244                      block?  */
3245                   && oprs_not_set_p (src, insn))
3246                 changed |= handle_avail_expr (insn, expr);
3247             }
3248
3249           /* Keep track of everything modified by this insn.  */
3250           /* ??? Need to be careful w.r.t. mods done to INSN.  */
3251           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3252             mark_oprs_set (insn);
3253         }
3254     }
3255
3256   return changed;
3257 }
3258
3259 /* Top level routine to perform one classic GCSE pass.
3260
3261    Return non-zero if a change was made.  */
3262
3263 static int
3264 one_classic_gcse_pass (pass)
3265      int pass;
3266 {
3267   int changed = 0;
3268
3269   gcse_subst_count = 0;
3270   gcse_create_count = 0;
3271
3272   alloc_expr_hash_table (max_cuid);
3273   alloc_rd_mem (n_basic_blocks, max_cuid);
3274   compute_expr_hash_table ();
3275   if (gcse_file)
3276     dump_hash_table (gcse_file, "Expression", expr_hash_table,
3277                      expr_hash_table_size, n_exprs);
3278
3279   if (n_exprs > 0)
3280     {
3281       compute_kill_rd ();
3282       compute_rd ();
3283       alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3284       compute_ae_gen ();
3285       compute_ae_kill (ae_gen, ae_kill);
3286       compute_available (ae_gen, ae_kill, ae_out, ae_in);
3287       changed = classic_gcse ();
3288       free_avail_expr_mem ();
3289     }
3290
3291   free_rd_mem ();
3292   free_expr_hash_table ();
3293
3294   if (gcse_file)
3295     {
3296       fprintf (gcse_file, "\n");
3297       fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs,",
3298                current_function_name, pass, bytes_used, gcse_subst_count);
3299       fprintf (gcse_file, "%d insns created\n", gcse_create_count);
3300     }
3301
3302   return changed;
3303 }
3304 \f
3305 /* Compute copy/constant propagation working variables.  */
3306
3307 /* Local properties of assignments.  */
3308 static sbitmap *cprop_pavloc;
3309 static sbitmap *cprop_absaltered;
3310
3311 /* Global properties of assignments (computed from the local properties).  */
3312 static sbitmap *cprop_avin;
3313 static sbitmap *cprop_avout;
3314
3315 /* Allocate vars used for copy/const propagation.  N_BLOCKS is the number of
3316    basic blocks.  N_SETS is the number of sets.  */
3317
3318 static void
3319 alloc_cprop_mem (n_blocks, n_sets)
3320      int n_blocks, n_sets;
3321 {
3322   cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3323   cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3324
3325   cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3326   cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3327 }
3328
3329 /* Free vars used by copy/const propagation.  */
3330
3331 static void
3332 free_cprop_mem ()
3333 {
3334   free (cprop_pavloc);
3335   free (cprop_absaltered);
3336   free (cprop_avin);
3337   free (cprop_avout);
3338 }
3339
3340 /* For each block, compute whether X is transparent.  X is either an
3341    expression or an assignment [though we don't care which, for this context
3342    an assignment is treated as an expression].  For each block where an
3343    element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
3344    bit in BMAP.  */
3345
3346 static void
3347 compute_transp (x, indx, bmap, set_p)
3348      rtx x;
3349      int indx;
3350      sbitmap *bmap;
3351      int set_p;
3352 {
3353   int bb, i, j;
3354   enum rtx_code code;
3355   reg_set *r;
3356   const char *fmt;
3357
3358   /* repeat is used to turn tail-recursion into iteration since GCC
3359      can't do it when there's no return value.  */
3360  repeat:
3361
3362   if (x == 0)
3363     return;
3364
3365   code = GET_CODE (x);
3366   switch (code)
3367     {
3368     case REG:
3369       if (set_p)
3370         {
3371           if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3372             {
3373               for (bb = 0; bb < n_basic_blocks; bb++)
3374                 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3375                   SET_BIT (bmap[bb], indx);
3376             }
3377           else
3378             {
3379               for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3380                 SET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3381             }
3382         }
3383       else
3384         {
3385           if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3386             {
3387               for (bb = 0; bb < n_basic_blocks; bb++)
3388                 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3389                   RESET_BIT (bmap[bb], indx);
3390             }
3391           else
3392             {
3393               for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3394                 RESET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3395             }
3396         }
3397
3398       return;
3399
3400     case MEM:
3401       if (set_p)
3402         {
3403           for (bb = 0; bb < n_basic_blocks; bb++)
3404             if (mem_set_in_block[bb])
3405               SET_BIT (bmap[bb], indx);
3406         }
3407       else
3408         {
3409           for (bb = 0; bb < n_basic_blocks; bb++)
3410             if (mem_set_in_block[bb])
3411               RESET_BIT (bmap[bb], indx);
3412         }
3413
3414       x = XEXP (x, 0);
3415       goto repeat;
3416
3417     case PC:
3418     case CC0: /*FIXME*/
3419     case CONST:
3420     case CONST_INT:
3421     case CONST_DOUBLE:
3422     case SYMBOL_REF:
3423     case LABEL_REF:
3424     case ADDR_VEC:
3425     case ADDR_DIFF_VEC:
3426       return;
3427
3428     default:
3429       break;
3430     }
3431
3432   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3433     {
3434       if (fmt[i] == 'e')
3435         {
3436           /* If we are about to do the last recursive call
3437              needed at this level, change it into iteration.
3438              This function is called enough to be worth it.  */
3439           if (i == 0)
3440             {
3441               x = XEXP (x, i);
3442               goto repeat;
3443             }
3444
3445           compute_transp (XEXP (x, i), indx, bmap, set_p);
3446         }
3447       else if (fmt[i] == 'E')
3448         for (j = 0; j < XVECLEN (x, i); j++)
3449           compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3450     }
3451 }
3452
3453 /* Top level routine to do the dataflow analysis needed by copy/const
3454    propagation.  */
3455
3456 static void
3457 compute_cprop_data ()
3458 {
3459   compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
3460   compute_available (cprop_pavloc, cprop_absaltered,
3461                      cprop_avout, cprop_avin);
3462 }
3463 \f
3464 /* Copy/constant propagation.  */
3465
3466 /* Maximum number of register uses in an insn that we handle.  */
3467 #define MAX_USES 8
3468
3469 /* Table of uses found in an insn.
3470    Allocated statically to avoid alloc/free complexity and overhead.  */
3471 static struct reg_use reg_use_table[MAX_USES];
3472
3473 /* Index into `reg_use_table' while building it.  */
3474 static int reg_use_count;
3475
3476 /* Set up a list of register numbers used in INSN.  The found uses are stored
3477    in `reg_use_table'.  `reg_use_count' is initialized to zero before entry,
3478    and contains the number of uses in the table upon exit.
3479
3480    ??? If a register appears multiple times we will record it multiple times.
3481    This doesn't hurt anything but it will slow things down.  */
3482
3483 static void
3484 find_used_regs (x)
3485      rtx x;
3486 {
3487   int i, j;
3488   enum rtx_code code;
3489   const char *fmt;
3490
3491   /* repeat is used to turn tail-recursion into iteration since GCC
3492      can't do it when there's no return value.  */
3493  repeat:
3494
3495   if (x == 0)
3496     return;
3497
3498   code = GET_CODE (x);
3499   switch (code)
3500     {
3501     case REG:
3502       if (reg_use_count == MAX_USES)
3503         return;
3504
3505       reg_use_table[reg_use_count].reg_rtx = x;
3506       reg_use_count++;
3507       return;
3508
3509     case MEM:
3510       x = XEXP (x, 0);
3511       goto repeat;
3512
3513     case PC:
3514     case CC0:
3515     case CONST:
3516     case CONST_INT:
3517     case CONST_DOUBLE:
3518     case SYMBOL_REF:
3519     case LABEL_REF:
3520     case CLOBBER:
3521     case ADDR_VEC:
3522     case ADDR_DIFF_VEC:
3523     case ASM_INPUT: /*FIXME*/
3524       return;
3525
3526     case SET:
3527       if (GET_CODE (SET_DEST (x)) == MEM)
3528         find_used_regs (SET_DEST (x));
3529       x = SET_SRC (x);
3530       goto repeat;
3531
3532     default:
3533       break;
3534     }
3535
3536   /* Recursively scan the operands of this expression.  */
3537
3538   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3539     {
3540       if (fmt[i] == 'e')
3541         {
3542           /* If we are about to do the last recursive call
3543              needed at this level, change it into iteration.
3544              This function is called enough to be worth it.  */
3545           if (i == 0)
3546             {
3547               x = XEXP (x, 0);
3548               goto repeat;
3549             }
3550
3551           find_used_regs (XEXP (x, i));
3552         }
3553       else if (fmt[i] == 'E')
3554         for (j = 0; j < XVECLEN (x, i); j++)
3555           find_used_regs (XVECEXP (x, i, j));
3556     }
3557 }
3558
3559 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3560    Returns non-zero is successful.  */
3561
3562 static int
3563 try_replace_reg (from, to, insn)
3564      rtx from, to, insn;
3565 {
3566   rtx note;
3567   rtx src;
3568   int success;
3569   rtx set;
3570
3571   note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3572
3573   if (!note)
3574     note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3575
3576   /* If this fails we could try to simplify the result of the
3577      replacement and attempt to recognize the simplified insn.
3578
3579      But we need a general simplify_rtx that doesn't have pass
3580      specific state variables.  I'm not aware of one at the moment.  */
3581
3582   success = validate_replace_src (from, to, insn);
3583   set = single_set (insn);
3584
3585   /* We've failed to do replacement. Try to add REG_EQUAL note to not loose
3586      information.  */
3587   if (!success && !note)
3588     {
3589       if (!set)
3590         return 0;
3591
3592       note = REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL,
3593                                                    copy_rtx (SET_SRC (set)),
3594                                                    REG_NOTES (insn));
3595     }
3596
3597   /* Always do the replacement in REQ_EQUAL and REG_EQUIV notes.  Also
3598      try to simplify them.  */
3599   if (note)
3600     {
3601       rtx simplified;
3602
3603       src = XEXP (note, 0);
3604       replace_rtx (src, from, to);
3605
3606       /* Try to simplify resulting note. */
3607       simplified = simplify_rtx (src);
3608       if (simplified)
3609         {
3610           src = simplified;
3611           XEXP (note, 0) = src;
3612         }
3613
3614       /* REG_EQUAL may get simplified into register.
3615          We don't allow that. Remove that note. This code ought
3616          not to hapen, because previous code ought to syntetize
3617          reg-reg move, but be on the safe side.  */
3618       else if (REG_P (src))
3619         remove_note (insn, note);
3620     }
3621   return success;
3622 }
3623
3624 /* Find a set of REGNOs that are available on entry to INSN's block.  Returns
3625    NULL no such set is found.  */
3626
3627 static struct expr *
3628 find_avail_set (regno, insn)
3629      int regno;
3630      rtx insn;
3631 {
3632   /* SET1 contains the last set found that can be returned to the caller for
3633      use in a substitution.  */
3634   struct expr *set1 = 0;
3635
3636   /* Loops are not possible here.  To get a loop we would need two sets
3637      available at the start of the block containing INSN.  ie we would
3638      need two sets like this available at the start of the block:
3639
3640        (set (reg X) (reg Y))
3641        (set (reg Y) (reg X))
3642
3643      This can not happen since the set of (reg Y) would have killed the
3644      set of (reg X) making it unavailable at the start of this block.  */
3645   while (1)
3646      {
3647       rtx src;
3648       struct expr *set = lookup_set (regno, NULL_RTX);
3649
3650       /* Find a set that is available at the start of the block
3651          which contains INSN.  */
3652       while (set)
3653         {
3654           if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3655             break;
3656           set = next_set (regno, set);
3657         }
3658
3659       /* If no available set was found we've reached the end of the
3660          (possibly empty) copy chain.  */
3661       if (set == 0)
3662         break;
3663
3664       if (GET_CODE (set->expr) != SET)
3665         abort ();
3666
3667       src = SET_SRC (set->expr);
3668
3669       /* We know the set is available.
3670          Now check that SRC is ANTLOC (i.e. none of the source operands
3671          have changed since the start of the block).
3672
3673          If the source operand changed, we may still use it for the next
3674          iteration of this loop, but we may not use it for substitutions.  */
3675
3676       if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3677         set1 = set;
3678
3679       /* If the source of the set is anything except a register, then
3680          we have reached the end of the copy chain.  */
3681       if (GET_CODE (src) != REG)
3682         break;
3683
3684       /* Follow the copy chain, ie start another iteration of the loop
3685          and see if we have an available copy into SRC.  */
3686       regno = REGNO (src);
3687      }
3688
3689   /* SET1 holds the last set that was available and anticipatable at
3690      INSN.  */
3691   return set1;
3692 }
3693
3694 /* Subroutine of cprop_insn that tries to propagate constants into
3695    JUMP_INSNS.  INSN must be a conditional jump; COPY is a copy of it
3696    that we can use for substitutions.
3697    REG_USED is the use we will try to replace, SRC is the constant we
3698    will try to substitute for it.
3699    Returns nonzero if a change was made.  */
3700
3701 static int
3702 cprop_jump (insn, copy, reg_used, src)
3703      rtx insn, copy;
3704      struct reg_use *reg_used;
3705      rtx src;
3706 {
3707   rtx set = PATTERN (copy);
3708   rtx temp;
3709
3710   /* Replace the register with the appropriate constant.  */
3711   replace_rtx (SET_SRC (set), reg_used->reg_rtx, src);
3712
3713   temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)),
3714                                      GET_MODE (SET_SRC (set)),
3715                                      GET_MODE (XEXP (SET_SRC (set), 0)),
3716                                      XEXP (SET_SRC (set), 0),
3717                                      XEXP (SET_SRC (set), 1),
3718                                      XEXP (SET_SRC (set), 2));
3719
3720   /* If no simplification can be made, then try the next
3721      register.  */
3722   if (temp == 0)
3723     return 0;
3724
3725   SET_SRC (set) = temp;
3726
3727   /* That may have changed the structure of TEMP, so
3728      force it to be rerecognized if it has not turned
3729      into a nop or unconditional jump.  */
3730
3731   INSN_CODE (copy) = -1;
3732   if ((SET_DEST (set) == pc_rtx
3733        && (SET_SRC (set) == pc_rtx
3734            || GET_CODE (SET_SRC (set)) == LABEL_REF))
3735       || recog (PATTERN (copy), copy, NULL) >= 0)
3736     {
3737       /* This has either become an unconditional jump
3738          or a nop-jump.  We'd like to delete nop jumps
3739          here, but doing so confuses gcse.  So we just
3740          make the replacement and let later passes
3741          sort things out.  */
3742       PATTERN (insn) = set;
3743       INSN_CODE (insn) = -1;
3744
3745       /* One less use of the label this insn used to jump to
3746          if we turned this into a NOP jump.  */
3747       if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0)
3748         --LABEL_NUSES (JUMP_LABEL (insn));
3749
3750       /* If this has turned into an unconditional jump,
3751          then put a barrier after it so that the unreachable
3752          code will be deleted.  */
3753       if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3754         emit_barrier_after (insn);
3755
3756       run_jump_opt_after_gcse = 1;
3757
3758       const_prop_count++;
3759       if (gcse_file != NULL)
3760         {
3761           fprintf (gcse_file,
3762                    "CONST-PROP: Replacing reg %d in insn %d with constant ",
3763                    REGNO (reg_used->reg_rtx), INSN_UID (insn));
3764           print_rtl (gcse_file, src);
3765           fprintf (gcse_file, "\n");
3766         }
3767
3768       return 1;
3769     }
3770   return 0;
3771 }
3772
3773 #ifdef HAVE_cc0
3774
3775 /* Subroutine of cprop_insn that tries to propagate constants into JUMP_INSNS
3776    for machines that have CC0.  INSN is a single set that stores into CC0;
3777    the insn following it is a conditional jump.  REG_USED is the use we will
3778    try to replace, SRC is the constant we will try to substitute for it.
3779    Returns nonzero if a change was made.  */
3780
3781 static int
3782 cprop_cc0_jump (insn, reg_used, src)
3783      rtx insn;
3784      struct reg_use *reg_used;
3785      rtx src;
3786 {
3787   rtx jump = NEXT_INSN (insn);
3788   rtx copy = copy_rtx (jump);
3789   rtx set = PATTERN (copy);
3790
3791   /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3792      substitute into it.  */
3793   replace_rtx (SET_SRC (set), cc0_rtx, copy_rtx (SET_SRC (PATTERN (insn))));
3794   if (! cprop_jump (jump, copy, reg_used, src))
3795     return 0;
3796
3797   /* If we succeeded, delete the cc0 setter.  */
3798   PUT_CODE (insn, NOTE);
3799   NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3800   NOTE_SOURCE_FILE (insn) = 0;
3801   return 1;
3802  }
3803 #endif
3804
3805 /* Perform constant and copy propagation on INSN.
3806    The result is non-zero if a change was made.  */
3807
3808 static int
3809 cprop_insn (insn, alter_jumps)
3810      rtx insn;
3811      int alter_jumps;
3812 {
3813   struct reg_use *reg_used;
3814   int changed = 0;
3815   rtx note;
3816
3817   /* Only propagate into SETs.  Note that a conditional jump is a
3818      SET with pc_rtx as the destination.  */
3819   if ((GET_CODE (insn) != INSN
3820        && GET_CODE (insn) != JUMP_INSN)
3821       || GET_CODE (PATTERN (insn)) != SET)
3822     return 0;
3823
3824   reg_use_count = 0;
3825   find_used_regs (PATTERN (insn));
3826
3827   note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3828   if (!note)
3829     note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3830
3831   /* We may win even when propagating constants into notes. */
3832   if (note)
3833     find_used_regs (XEXP (note, 0));
3834
3835   for (reg_used = &reg_use_table[0]; reg_use_count > 0;
3836        reg_used++, reg_use_count--)
3837     {
3838       int regno = REGNO (reg_used->reg_rtx);
3839       rtx pat, src;
3840       struct expr *set;
3841
3842       /* Ignore registers created by GCSE.
3843          We do this because ... */
3844       if (regno >= max_gcse_regno)
3845         continue;
3846
3847       /* If the register has already been set in this block, there's
3848          nothing we can do.  */
3849       if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3850         continue;
3851
3852       /* Find an assignment that sets reg_used and is available
3853          at the start of the block.  */
3854       set = find_avail_set (regno, insn);
3855       if (! set)
3856         continue;
3857
3858       pat = set->expr;
3859       /* ??? We might be able to handle PARALLELs.  Later.  */
3860       if (GET_CODE (pat) != SET)
3861         abort ();
3862
3863       src = SET_SRC (pat);
3864
3865       /* Constant propagation.  */
3866       if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
3867           || GET_CODE (src) == SYMBOL_REF)
3868         {
3869           /* Handle normal insns first.  */
3870           if (GET_CODE (insn) == INSN
3871               && try_replace_reg (reg_used->reg_rtx, src, insn))
3872             {
3873               changed = 1;
3874               const_prop_count++;
3875               if (gcse_file != NULL)
3876                 {
3877                   fprintf (gcse_file, "CONST-PROP: Replacing reg %d in ",
3878                            regno);
3879                   fprintf (gcse_file, "insn %d with constant ",
3880                            INSN_UID (insn));
3881                   print_rtl (gcse_file, src);
3882                   fprintf (gcse_file, "\n");
3883                 }
3884
3885               /* The original insn setting reg_used may or may not now be
3886                  deletable.  We leave the deletion to flow.  */
3887             }
3888
3889           /* Try to propagate a CONST_INT into a conditional jump.
3890              We're pretty specific about what we will handle in this
3891              code, we can extend this as necessary over time.
3892
3893              Right now the insn in question must look like
3894              (set (pc) (if_then_else ...))  */
3895           else if (alter_jumps
3896                    && GET_CODE (insn) == JUMP_INSN
3897                    && condjump_p (insn)
3898                    && ! simplejump_p (insn))
3899             changed |= cprop_jump (insn, copy_rtx (insn), reg_used, src);
3900 #ifdef HAVE_cc0
3901           /* Similar code for machines that use a pair of CC0 setter and
3902              conditional jump insn.  */
3903           else if (alter_jumps
3904                    && GET_CODE (PATTERN (insn)) == SET
3905                    && SET_DEST (PATTERN (insn)) == cc0_rtx
3906                    && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
3907                    && condjump_p (NEXT_INSN (insn))
3908                    && ! simplejump_p (NEXT_INSN (insn)))
3909             changed |= cprop_cc0_jump (insn, reg_used, src);
3910 #endif
3911         }
3912       else if (GET_CODE (src) == REG
3913                && REGNO (src) >= FIRST_PSEUDO_REGISTER
3914                && REGNO (src) != regno)
3915         {
3916           if (try_replace_reg (reg_used->reg_rtx, src, insn))
3917             {
3918               changed = 1;
3919               copy_prop_count++;
3920               if (gcse_file != NULL)
3921                 {
3922                   fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d",
3923                            regno, INSN_UID (insn));
3924                   fprintf (gcse_file, " with reg %d\n", REGNO (src));
3925                 }
3926
3927               /* The original insn setting reg_used may or may not now be
3928                  deletable.  We leave the deletion to flow.  */
3929               /* FIXME: If it turns out that the insn isn't deletable,
3930                  then we may have unnecessarily extended register lifetimes
3931                  and made things worse.  */
3932             }
3933         }
3934     }
3935
3936   return changed;
3937 }
3938
3939 /* Forward propagate copies.  This includes copies and constants.  Return
3940    non-zero if a change was made.  */
3941
3942 static int
3943 cprop (alter_jumps)
3944      int alter_jumps;
3945 {
3946   int bb, changed;
3947   rtx insn;
3948
3949   /* Note we start at block 1.  */
3950
3951   changed = 0;
3952   for (bb = 1; bb < n_basic_blocks; bb++)
3953     {
3954       /* Reset tables used to keep track of what's still valid [since the
3955          start of the block].  */
3956       reset_opr_set_tables ();
3957
3958       for (insn = BLOCK_HEAD (bb);
3959            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3960            insn = NEXT_INSN (insn))
3961         {
3962           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3963             {
3964               changed |= cprop_insn (insn, alter_jumps);
3965
3966               /* Keep track of everything modified by this insn.  */
3967               /* ??? Need to be careful w.r.t. mods done to INSN.  Don't
3968                  call mark_oprs_set if we turned the insn into a NOTE.  */
3969               if (GET_CODE (insn) != NOTE)
3970                 mark_oprs_set (insn);
3971             }
3972         }
3973     }
3974
3975   if (gcse_file != NULL)
3976     fprintf (gcse_file, "\n");
3977
3978   return changed;
3979 }
3980
3981 /* Perform one copy/constant propagation pass.
3982    F is the first insn in the function.
3983    PASS is the pass count.  */
3984
3985 static int
3986 one_cprop_pass (pass, alter_jumps)
3987      int pass;
3988      int alter_jumps;
3989 {
3990   int changed = 0;
3991
3992   const_prop_count = 0;
3993   copy_prop_count = 0;
3994
3995   alloc_set_hash_table (max_cuid);
3996   compute_set_hash_table ();
3997   if (gcse_file)
3998     dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
3999                      n_sets);
4000   if (n_sets > 0)
4001     {
4002       alloc_cprop_mem (n_basic_blocks, n_sets);
4003       compute_cprop_data ();
4004       changed = cprop (alter_jumps);
4005       free_cprop_mem ();
4006     }
4007
4008   free_set_hash_table ();
4009
4010   if (gcse_file)
4011     {
4012       fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, ",
4013                current_function_name, pass, bytes_used);
4014       fprintf (gcse_file, "%d const props, %d copy props\n\n",
4015                const_prop_count, copy_prop_count);
4016     }
4017
4018   return changed;
4019 }
4020 \f
4021 /* Compute PRE+LCM working variables.  */
4022
4023 /* Local properties of expressions.  */
4024 /* Nonzero for expressions that are transparent in the block.  */
4025 static sbitmap *transp;
4026
4027 /* Nonzero for expressions that are transparent at the end of the block.
4028    This is only zero for expressions killed by abnormal critical edge
4029    created by a calls.  */
4030 static sbitmap *transpout;
4031
4032 /* Nonzero for expressions that are computed (available) in the block.  */
4033 static sbitmap *comp;
4034
4035 /* Nonzero for expressions that are locally anticipatable in the block.  */
4036 static sbitmap *antloc;
4037
4038 /* Nonzero for expressions where this block is an optimal computation
4039    point.  */
4040 static sbitmap *pre_optimal;
4041
4042 /* Nonzero for expressions which are redundant in a particular block.  */
4043 static sbitmap *pre_redundant;
4044
4045 /* Nonzero for expressions which should be inserted on a specific edge.  */
4046 static sbitmap *pre_insert_map;
4047
4048 /* Nonzero for expressions which should be deleted in a specific block.  */
4049 static sbitmap *pre_delete_map;
4050
4051 /* Contains the edge_list returned by pre_edge_lcm.  */
4052 static struct edge_list *edge_list;
4053
4054 static sbitmap *temp_bitmap;
4055
4056 /* Redundant insns.  */
4057 static sbitmap pre_redundant_insns;
4058
4059 /* Allocate vars used for PRE analysis.  */
4060
4061 static void
4062 alloc_pre_mem (n_blocks, n_exprs)
4063      int n_blocks, n_exprs;
4064 {
4065   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4066   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4067   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4068   temp_bitmap = sbitmap_vector_alloc (n_blocks, n_exprs);
4069
4070   pre_optimal = NULL;
4071   pre_redundant = NULL;
4072   pre_insert_map = NULL;
4073   pre_delete_map = NULL;
4074   ae_in = NULL;
4075   ae_out = NULL;
4076   u_bitmap = NULL;
4077   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
4078   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
4079
4080   /* pre_insert and pre_delete are allocated later.  */
4081 }
4082
4083 /* Free vars used for PRE analysis.  */
4084
4085 static void
4086 free_pre_mem ()
4087 {
4088   free (transp);
4089   free (comp);
4090   free (antloc);
4091   free (temp_bitmap);
4092
4093   if (pre_optimal)
4094     free (pre_optimal);
4095   if (pre_redundant)
4096     free (pre_redundant);
4097   if (pre_insert_map)
4098     free (pre_insert_map);
4099   if (pre_delete_map)
4100     free (pre_delete_map);
4101   if (transpout)
4102     free (transpout);
4103
4104   if (ae_in)
4105     free (ae_in);
4106   if (ae_out)
4107     free (ae_out);
4108   if (ae_kill)
4109     free (ae_kill);
4110   if (u_bitmap)
4111     free (u_bitmap);
4112
4113   transp = comp = antloc = NULL;
4114   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
4115   transpout = ae_in = ae_out = ae_kill = NULL;
4116   u_bitmap = NULL;
4117
4118 }
4119
4120 /* Top level routine to do the dataflow analysis needed by PRE.  */
4121
4122 static void
4123 compute_pre_data ()
4124 {
4125   compute_local_properties (transp, comp, antloc, 0);
4126   compute_transpout ();
4127   sbitmap_vector_zero (ae_kill, n_basic_blocks);
4128   compute_ae_kill (comp, ae_kill);
4129   edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
4130                             ae_kill, &pre_insert_map, &pre_delete_map);
4131 }
4132 \f
4133 /* PRE utilities */
4134
4135 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4136    block BB.
4137
4138    VISITED is a pointer to a working buffer for tracking which BB's have
4139    been visited.  It is NULL for the top-level call.
4140
4141    We treat reaching expressions that go through blocks containing the same
4142    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
4143    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4144    2 as not reaching.  The intent is to improve the probability of finding
4145    only one reaching expression and to reduce register lifetimes by picking
4146    the closest such expression.  */
4147
4148 static int
4149 pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited)
4150      int occr_bb;
4151      struct expr *expr;
4152      int bb;
4153      char *visited;
4154 {
4155   edge pred;
4156
4157   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
4158     {
4159       int pred_bb = pred->src->index;
4160
4161       if (pred->src == ENTRY_BLOCK_PTR
4162           /* Has predecessor has already been visited?  */
4163           || visited[pred_bb])
4164         ;/* Nothing to do.  */
4165
4166       /* Does this predecessor generate this expression?  */
4167       else if (TEST_BIT (comp[pred_bb], expr->bitmap_index))
4168         {
4169           /* Is this the occurrence we're looking for?
4170              Note that there's only one generating occurrence per block
4171              so we just need to check the block number.  */
4172           if (occr_bb == pred_bb)
4173             return 1;
4174
4175           visited[pred_bb] = 1;
4176         }
4177       /* Ignore this predecessor if it kills the expression.  */
4178       else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
4179         visited[pred_bb] = 1;
4180
4181       /* Neither gen nor kill.  */
4182       else
4183         {
4184           visited[pred_bb] = 1;
4185           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
4186             return 1;
4187         }
4188     }
4189
4190   /* All paths have been checked.  */
4191   return 0;
4192 }
4193
4194 /* The wrapper for pre_expr_reaches_here_work that ensures that any
4195    memory allocated for that function is returned. */
4196
4197 static int
4198 pre_expr_reaches_here_p (occr_bb, expr, bb)
4199      int occr_bb;
4200      struct expr *expr;
4201      int bb;
4202 {
4203   int rval;
4204   char *visited = (char *) xcalloc (n_basic_blocks, 1);
4205
4206   rval = pre_expr_reaches_here_p_work(occr_bb, expr, bb, visited);
4207
4208   free (visited);
4209   return rval;
4210 }
4211 \f
4212
4213 /* Given an expr, generate RTL which we can insert at the end of a BB,
4214    or on an edge.  Set the block number of any insns generated to
4215    the value of BB.  */
4216
4217 static rtx
4218 process_insert_insn (expr)
4219      struct expr *expr;
4220 {
4221   rtx reg = expr->reaching_reg;
4222   rtx pat, copied_expr;
4223   rtx first_new_insn;
4224
4225   start_sequence ();
4226   copied_expr = copy_rtx (expr->expr);
4227   emit_move_insn (reg, copied_expr);
4228   first_new_insn = get_insns ();
4229   pat = gen_sequence ();
4230   end_sequence ();
4231
4232   return pat;
4233 }
4234
4235 /* Add EXPR to the end of basic block BB.
4236
4237    This is used by both the PRE and code hoisting.
4238
4239    For PRE, we want to verify that the expr is either transparent
4240    or locally anticipatable in the target block.  This check makes
4241    no sense for code hoisting.  */
4242
4243 static void
4244 insert_insn_end_bb (expr, bb, pre)
4245      struct expr *expr;
4246      int bb;
4247      int pre;
4248 {
4249   rtx insn = BLOCK_END (bb);
4250   rtx new_insn;
4251   rtx reg = expr->reaching_reg;
4252   int regno = REGNO (reg);
4253   rtx pat;
4254   int i;
4255
4256   pat = process_insert_insn (expr);
4257
4258   /* If the last insn is a jump, insert EXPR in front [taking care to
4259      handle cc0, etc. properly].  */
4260
4261   if (GET_CODE (insn) == JUMP_INSN)
4262     {
4263 #ifdef HAVE_cc0
4264       rtx note;
4265 #endif
4266
4267       /* If this is a jump table, then we can't insert stuff here.  Since
4268          we know the previous real insn must be the tablejump, we insert
4269          the new instruction just before the tablejump.  */
4270       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4271           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4272         insn = prev_real_insn (insn);
4273
4274 #ifdef HAVE_cc0
4275       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4276          if cc0 isn't set.  */
4277       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4278       if (note)
4279         insn = XEXP (note, 0);
4280       else
4281         {
4282           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4283           if (maybe_cc0_setter
4284               && GET_RTX_CLASS (GET_CODE (maybe_cc0_setter)) == 'i'
4285               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4286             insn = maybe_cc0_setter;
4287         }
4288 #endif
4289       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
4290       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4291     }
4292
4293   /* Likewise if the last insn is a call, as will happen in the presence
4294      of exception handling.  */
4295   else if (GET_CODE (insn) == CALL_INSN)
4296     {
4297       HARD_REG_SET parm_regs;
4298       int nparm_regs;
4299       rtx p;
4300
4301       /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4302          we search backward and place the instructions before the first
4303          parameter is loaded.  Do this for everyone for consistency and a
4304          presumtion that we'll get better code elsewhere as well.
4305
4306          It should always be the case that we can put these instructions
4307          anywhere in the basic block with performing PRE optimizations.
4308          Check this.  */
4309
4310       if (pre
4311           && !TEST_BIT (antloc[bb], expr->bitmap_index)
4312           && !TEST_BIT (transp[bb], expr->bitmap_index))
4313         abort ();
4314
4315       /* Since different machines initialize their parameter registers
4316          in different orders, assume nothing.  Collect the set of all
4317          parameter registers.  */
4318       CLEAR_HARD_REG_SET (parm_regs);
4319       nparm_regs = 0;
4320       for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4321         if (GET_CODE (XEXP (p, 0)) == USE
4322             && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4323           {
4324             if (REGNO (XEXP (XEXP (p, 0), 0)) >= FIRST_PSEUDO_REGISTER)
4325               abort ();
4326
4327             SET_HARD_REG_BIT (parm_regs, REGNO (XEXP (XEXP (p, 0), 0)));
4328             nparm_regs++;
4329           }
4330
4331       /* Search backward for the first set of a register in this set.  */
4332       while (nparm_regs && BLOCK_HEAD (bb) != insn)
4333         {
4334           insn = PREV_INSN (insn);
4335           p = single_set (insn);
4336           if (p && GET_CODE (SET_DEST (p)) == REG
4337               && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4338               && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4339             {
4340               CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4341               nparm_regs--;
4342             }
4343         }
4344
4345       /* If we found all the parameter loads, then we want to insert
4346          before the first parameter load.
4347
4348          If we did not find all the parameter loads, then we might have
4349          stopped on the head of the block, which could be a CODE_LABEL.
4350          If we inserted before the CODE_LABEL, then we would be putting
4351          the insn in the wrong basic block.  In that case, put the insn
4352          after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
4353       if (GET_CODE (insn) == CODE_LABEL)
4354         insn = NEXT_INSN (insn);
4355       else if (GET_CODE (insn) == NOTE
4356                && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
4357         insn = NEXT_INSN (insn);
4358
4359       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4360     }
4361   else
4362     {
4363       new_insn = emit_insn_after (pat, insn);
4364       BLOCK_END (bb) = new_insn;
4365     }
4366
4367   /* Keep block number table up to date.
4368      Note, PAT could be a multiple insn sequence, we have to make
4369      sure that each insn in the sequence is handled.  */
4370   if (GET_CODE (pat) == SEQUENCE)
4371     {
4372       for (i = 0; i < XVECLEN (pat, 0); i++)
4373         {
4374           rtx insn = XVECEXP (pat, 0, i);
4375
4376           set_block_num (insn, bb);
4377           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4378             add_label_notes (PATTERN (insn), new_insn);
4379
4380           note_stores (PATTERN (insn), record_set_info, insn);
4381         }
4382     }
4383   else
4384     {
4385       add_label_notes (SET_SRC (pat), new_insn);
4386       set_block_num (new_insn, bb);
4387
4388       /* Keep register set table up to date.  */
4389       record_one_set (regno, new_insn);
4390     }
4391
4392   gcse_create_count++;
4393
4394   if (gcse_file)
4395     {
4396       fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, ",
4397                bb, INSN_UID (new_insn));
4398       fprintf (gcse_file, "copying expression %d to reg %d\n",
4399                expr->bitmap_index, regno);
4400     }
4401 }
4402
4403 /* Insert partially redundant expressions on edges in the CFG to make
4404    the expressions fully redundant.  */
4405
4406 static int
4407 pre_edge_insert (edge_list, index_map)
4408      struct edge_list *edge_list;
4409      struct expr **index_map;
4410 {
4411   int e, i, j, num_edges, set_size, did_insert = 0;
4412   sbitmap *inserted;
4413
4414   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4415      if it reaches any of the deleted expressions.  */
4416
4417   set_size = pre_insert_map[0]->size;
4418   num_edges = NUM_EDGES (edge_list);
4419   inserted = sbitmap_vector_alloc (num_edges, n_exprs);
4420   sbitmap_vector_zero (inserted, num_edges);
4421
4422   for (e = 0; e < num_edges; e++)
4423     {
4424       int indx;
4425       basic_block pred = INDEX_EDGE_PRED_BB (edge_list, e);
4426       int bb = pred->index;
4427
4428       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4429         {
4430           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
4431
4432           for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
4433             if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4434               {
4435                 struct expr *expr = index_map[j];
4436                 struct occr *occr;
4437
4438                 /* Now look at each deleted occurence of this expression.  */
4439                 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4440                   {
4441                     if (! occr->deleted_p)
4442                       continue;
4443
4444                     /* Insert this expression on this edge if if it would
4445                        reach the deleted occurence in BB.  */
4446                     if (!TEST_BIT (inserted[e], j))
4447                       {
4448                         rtx insn;
4449                         edge eg = INDEX_EDGE (edge_list, e);
4450
4451                         /* We can't insert anything on an abnormal and
4452                            critical edge, so we insert the insn at the end of
4453                            the previous block. There are several alternatives
4454                            detailed in Morgans book P277 (sec 10.5) for
4455                            handling this situation.  This one is easiest for
4456                            now.  */
4457
4458                         if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
4459                           insert_insn_end_bb (index_map[j], bb, 0);
4460                         else
4461                           {
4462                             insn = process_insert_insn (index_map[j]);
4463                             insert_insn_on_edge (insn, eg);
4464                           }
4465
4466                         if (gcse_file)
4467                           {
4468                             fprintf (gcse_file, "PRE/HOIST: edge (%d,%d), ",
4469                                      bb,
4470                                      INDEX_EDGE_SUCC_BB (edge_list, e)->index);
4471                             fprintf (gcse_file, "copy expression %d\n",
4472                                      expr->bitmap_index);
4473                           }
4474
4475                         SET_BIT (inserted[e], j);
4476                         did_insert = 1;
4477                         gcse_create_count++;
4478                       }
4479                   }
4480               }
4481         }
4482     }
4483
4484   free (inserted);
4485   return did_insert;
4486 }
4487
4488 /* Copy the result of INSN to REG.  INDX is the expression number.  */
4489
4490 static void
4491 pre_insert_copy_insn (expr, insn)
4492      struct expr *expr;
4493      rtx insn;
4494 {
4495   rtx reg = expr->reaching_reg;
4496   int regno = REGNO (reg);
4497   int indx = expr->bitmap_index;
4498   rtx set = single_set (insn);
4499   rtx new_insn;
4500   int bb = BLOCK_NUM (insn);
4501
4502   if (!set)
4503     abort ();
4504
4505   new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4506                               insn);
4507
4508   /* Keep block number table up to date.  */
4509   set_block_num (new_insn, bb);
4510
4511   /* Keep register set table up to date.  */
4512   record_one_set (regno, new_insn);
4513   if (insn == BLOCK_END (bb))
4514     BLOCK_END (bb) = new_insn;
4515
4516   gcse_create_count++;
4517
4518   if (gcse_file)
4519     fprintf (gcse_file,
4520              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4521               BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4522               INSN_UID (insn), regno);
4523 }
4524
4525 /* Copy available expressions that reach the redundant expression
4526    to `reaching_reg'.  */
4527
4528 static void
4529 pre_insert_copies ()
4530 {
4531   int i;
4532   struct expr *expr;
4533   struct occr *occr;
4534   struct occr *avail;
4535
4536   /* For each available expression in the table, copy the result to
4537      `reaching_reg' if the expression reaches a deleted one.
4538
4539      ??? The current algorithm is rather brute force.
4540      Need to do some profiling.  */
4541
4542   for (i = 0; i < expr_hash_table_size; i++)
4543     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4544       {
4545         /* If the basic block isn't reachable, PPOUT will be TRUE.  However,
4546            we don't want to insert a copy here because the expression may not
4547            really be redundant.  So only insert an insn if the expression was
4548            deleted.  This test also avoids further processing if the
4549            expression wasn't deleted anywhere.  */
4550         if (expr->reaching_reg == NULL)
4551           continue;
4552
4553         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4554           {
4555             if (! occr->deleted_p)
4556               continue;
4557
4558             for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4559               {
4560                 rtx insn = avail->insn;
4561
4562                 /* No need to handle this one if handled already.  */
4563                 if (avail->copied_p)
4564                   continue;
4565
4566                 /* Don't handle this one if it's a redundant one.  */
4567                 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4568                   continue;
4569
4570                 /* Or if the expression doesn't reach the deleted one.  */
4571                 if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
4572                                                BLOCK_NUM (occr->insn)))
4573                   continue;
4574
4575                 /* Copy the result of avail to reaching_reg.  */
4576                 pre_insert_copy_insn (expr, insn);
4577                 avail->copied_p = 1;
4578               }
4579           }
4580       }
4581 }
4582
4583 /* Delete redundant computations.
4584    Deletion is done by changing the insn to copy the `reaching_reg' of
4585    the expression into the result of the SET.  It is left to later passes
4586    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4587
4588    Returns non-zero if a change is made.  */
4589
4590 static int
4591 pre_delete ()
4592 {
4593   int i, bb, changed;
4594   struct expr *expr;
4595   struct occr *occr;
4596
4597   /* Compute the expressions which are redundant and need to be replaced by
4598      copies from the reaching reg to the target reg.  */
4599   for (bb = 0; bb < n_basic_blocks; bb++)
4600     sbitmap_copy (temp_bitmap[bb], pre_delete_map[bb]);
4601
4602   changed = 0;
4603   for (i = 0; i < expr_hash_table_size; i++)
4604     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4605       {
4606         int indx = expr->bitmap_index;
4607
4608         /* We only need to search antic_occr since we require
4609            ANTLOC != 0.  */
4610
4611         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4612           {
4613             rtx insn = occr->insn;
4614             rtx set;
4615             int bb = BLOCK_NUM (insn);
4616
4617             if (TEST_BIT (temp_bitmap[bb], indx))
4618               {
4619                 set = single_set (insn);
4620                 if (! set)
4621                   abort ();
4622
4623                 /* Create a pseudo-reg to store the result of reaching
4624                    expressions into.  Get the mode for the new pseudo from
4625                    the mode of the original destination pseudo.  */
4626                 if (expr->reaching_reg == NULL)
4627                   expr->reaching_reg
4628                     = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4629
4630                 /* In theory this should never fail since we're creating
4631                    a reg->reg copy.
4632
4633                    However, on the x86 some of the movXX patterns actually
4634                    contain clobbers of scratch regs.  This may cause the
4635                    insn created by validate_change to not match any pattern
4636                    and thus cause validate_change to fail.   */
4637                 if (validate_change (insn, &SET_SRC (set),
4638                                      expr->reaching_reg, 0))
4639                   {
4640                     occr->deleted_p = 1;
4641                     SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4642                     changed = 1;
4643                     gcse_subst_count++;
4644                   }
4645
4646                 if (gcse_file)
4647                   {
4648                     fprintf (gcse_file,
4649                              "PRE: redundant insn %d (expression %d) in ",
4650                                INSN_UID (insn), indx);
4651                     fprintf (gcse_file, "bb %d, reaching reg is %d\n",
4652                              bb, REGNO (expr->reaching_reg));
4653                   }
4654               }
4655           }
4656       }
4657
4658   return changed;
4659 }
4660
4661 /* Perform GCSE optimizations using PRE.
4662    This is called by one_pre_gcse_pass after all the dataflow analysis
4663    has been done.
4664
4665    This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
4666    lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
4667    Compiler Design and Implementation.
4668
4669    ??? A new pseudo reg is created to hold the reaching expression.  The nice
4670    thing about the classical approach is that it would try to use an existing
4671    reg.  If the register can't be adequately optimized [i.e. we introduce
4672    reload problems], one could add a pass here to propagate the new register
4673    through the block.
4674
4675    ??? We don't handle single sets in PARALLELs because we're [currently] not
4676    able to copy the rest of the parallel when we insert copies to create full
4677    redundancies from partial redundancies.  However, there's no reason why we
4678    can't handle PARALLELs in the cases where there are no partial
4679    redundancies.  */
4680
4681 static int
4682 pre_gcse ()
4683 {
4684   int i, did_insert;
4685   int changed;
4686   struct expr **index_map;
4687   struct expr *expr;
4688
4689   /* Compute a mapping from expression number (`bitmap_index') to
4690      hash table entry.  */
4691
4692   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
4693   for (i = 0; i < expr_hash_table_size; i++)
4694     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4695       index_map[expr->bitmap_index] = expr;
4696
4697   /* Reset bitmap used to track which insns are redundant.  */
4698   pre_redundant_insns = sbitmap_alloc (max_cuid);
4699   sbitmap_zero (pre_redundant_insns);
4700
4701   /* Delete the redundant insns first so that
4702      - we know what register to use for the new insns and for the other
4703        ones with reaching expressions
4704      - we know which insns are redundant when we go to create copies  */
4705
4706   changed = pre_delete ();
4707
4708   did_insert = pre_edge_insert (edge_list, index_map);
4709
4710   /* In other places with reaching expressions, copy the expression to the
4711      specially allocated pseudo-reg that reaches the redundant expr.  */
4712   pre_insert_copies ();
4713   if (did_insert)
4714     {
4715       commit_edge_insertions ();
4716       changed = 1;
4717     }
4718
4719   free (index_map);
4720   free (pre_redundant_insns);
4721   return changed;
4722 }
4723
4724 /* Top level routine to perform one PRE GCSE pass.
4725
4726    Return non-zero if a change was made.  */
4727
4728 static int
4729 one_pre_gcse_pass (pass)
4730      int pass;
4731 {
4732   int changed = 0;
4733
4734   gcse_subst_count = 0;
4735   gcse_create_count = 0;
4736
4737   alloc_expr_hash_table (max_cuid);
4738   add_noreturn_fake_exit_edges ();
4739   compute_expr_hash_table ();
4740   if (gcse_file)
4741     dump_hash_table (gcse_file, "Expression", expr_hash_table,
4742                      expr_hash_table_size, n_exprs);
4743
4744   if (n_exprs > 0)
4745     {
4746       alloc_pre_mem (n_basic_blocks, n_exprs);
4747       compute_pre_data ();
4748       changed |= pre_gcse ();
4749       free_edge_list (edge_list);
4750       free_pre_mem ();
4751     }
4752
4753   remove_fake_edges ();
4754   free_expr_hash_table ();
4755
4756   if (gcse_file)
4757     {
4758       fprintf (gcse_file, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
4759                current_function_name, pass, bytes_used);
4760       fprintf (gcse_file, "%d substs, %d insns created\n",
4761                gcse_subst_count, gcse_create_count);
4762     }
4763
4764   return changed;
4765 }
4766 \f
4767 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4768    We have to add REG_LABEL notes, because the following loop optimization
4769    pass requires them.  */
4770
4771 /* ??? This is very similar to the loop.c add_label_notes function.  We
4772    could probably share code here.  */
4773
4774 /* ??? If there was a jump optimization pass after gcse and before loop,
4775    then we would not need to do this here, because jump would add the
4776    necessary REG_LABEL notes.  */
4777
4778 static void
4779 add_label_notes (x, insn)
4780      rtx x;
4781      rtx insn;
4782 {
4783   enum rtx_code code = GET_CODE (x);
4784   int i, j;
4785   const char *fmt;
4786
4787   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4788     {
4789       /* This code used to ignore labels that referred to dispatch tables to
4790          avoid flow generating (slighly) worse code.
4791
4792          We no longer ignore such label references (see LABEL_REF handling in
4793          mark_jump_label for additional information).  */
4794
4795       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4796                                             REG_NOTES (insn));
4797       return;
4798     }
4799
4800   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
4801     {
4802       if (fmt[i] == 'e')
4803         add_label_notes (XEXP (x, i), insn);
4804       else if (fmt[i] == 'E')
4805         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4806           add_label_notes (XVECEXP (x, i, j), insn);
4807     }
4808 }
4809
4810 /* Compute transparent outgoing information for each block.
4811
4812    An expression is transparent to an edge unless it is killed by
4813    the edge itself.  This can only happen with abnormal control flow,
4814    when the edge is traversed through a call.  This happens with
4815    non-local labels and exceptions.
4816
4817    This would not be necessary if we split the edge.  While this is
4818    normally impossible for abnormal critical edges, with some effort
4819    it should be possible with exception handling, since we still have
4820    control over which handler should be invoked.  But due to increased
4821    EH table sizes, this may not be worthwhile.  */
4822
4823 static void
4824 compute_transpout ()
4825 {
4826   int bb;
4827   int i;
4828   struct expr *expr;
4829
4830   sbitmap_vector_ones (transpout, n_basic_blocks);
4831
4832   for (bb = 0; bb < n_basic_blocks; ++bb)
4833     {
4834       /* Note that flow inserted a nop a the end of basic blocks that
4835          end in call instructions for reasons other than abnormal
4836          control flow.  */
4837       if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4838         continue;
4839
4840       for (i = 0; i < expr_hash_table_size; i++)
4841         for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4842           if (GET_CODE (expr->expr) == MEM)
4843             {
4844               if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
4845                   && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
4846                 continue;
4847
4848               /* ??? Optimally, we would use interprocedural alias
4849                  analysis to determine if this mem is actually killed
4850                  by this call.  */
4851               RESET_BIT (transpout[bb], expr->bitmap_index);
4852             }
4853     }
4854 }
4855
4856 /* Removal of useless null pointer checks */
4857
4858 /* Called via note_stores.  X is set by SETTER.  If X is a register we must
4859    invalidate nonnull_local and set nonnull_killed.  DATA is really a
4860    `null_pointer_info *'.
4861
4862    We ignore hard registers.  */
4863
4864 static void
4865 invalidate_nonnull_info (x, setter, data)
4866      rtx x;
4867      rtx setter ATTRIBUTE_UNUSED;
4868      void *data;
4869 {
4870   int offset, regno;
4871   struct null_pointer_info* npi = (struct null_pointer_info *) data;
4872
4873   offset = 0;
4874
4875   while (GET_CODE (x) == SUBREG)
4876     x = SUBREG_REG (x);
4877
4878   /* Ignore anything that is not a register or is a hard register.  */
4879   if (GET_CODE (x) != REG
4880       || REGNO (x) < npi->min_reg
4881       || REGNO (x) >= npi->max_reg)
4882     return;
4883
4884   regno = REGNO (x) - npi->min_reg;
4885
4886   RESET_BIT (npi->nonnull_local[npi->current_block], regno);
4887   SET_BIT (npi->nonnull_killed[npi->current_block], regno);
4888 }
4889
4890 /* Do null-pointer check elimination for the registers indicated in
4891    NPI.  NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
4892    they are not our responsibility to free.  */
4893
4894 static void
4895 delete_null_pointer_checks_1 (block_reg, nonnull_avin, nonnull_avout, npi)
4896      int *block_reg;
4897      sbitmap *nonnull_avin;
4898      sbitmap *nonnull_avout;
4899      struct null_pointer_info *npi;
4900 {
4901   int bb;
4902   int current_block;
4903   sbitmap *nonnull_local = npi->nonnull_local;
4904   sbitmap *nonnull_killed = npi->nonnull_killed;
4905
4906   /* Compute local properties, nonnull and killed.  A register will have
4907      the nonnull property if at the end of the current block its value is
4908      known to be nonnull.  The killed property indicates that somewhere in
4909      the block any information we had about the register is killed.
4910
4911      Note that a register can have both properties in a single block.  That
4912      indicates that it's killed, then later in the block a new value is
4913      computed.  */
4914   sbitmap_vector_zero (nonnull_local, n_basic_blocks);
4915   sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
4916
4917   for (current_block = 0; current_block < n_basic_blocks; current_block++)
4918     {
4919       rtx insn, stop_insn;
4920
4921       /* Set the current block for invalidate_nonnull_info.  */
4922       npi->current_block = current_block;
4923
4924       /* Scan each insn in the basic block looking for memory references and
4925          register sets.  */
4926       stop_insn = NEXT_INSN (BLOCK_END (current_block));
4927       for (insn = BLOCK_HEAD (current_block);
4928            insn != stop_insn;
4929            insn = NEXT_INSN (insn))
4930         {
4931           rtx set;
4932           rtx reg;
4933
4934           /* Ignore anything that is not a normal insn.  */
4935           if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
4936             continue;
4937
4938           /* Basically ignore anything that is not a simple SET.  We do have
4939              to make sure to invalidate nonnull_local and set nonnull_killed
4940              for such insns though.  */
4941           set = single_set (insn);
4942           if (!set)
4943             {
4944               note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
4945               continue;
4946             }
4947
4948           /* See if we've got a useable memory load.  We handle it first
4949              in case it uses its address register as a dest (which kills
4950              the nonnull property).  */
4951           if (GET_CODE (SET_SRC (set)) == MEM
4952               && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
4953               && REGNO (reg) >= npi->min_reg
4954               && REGNO (reg) < npi->max_reg)
4955             SET_BIT (nonnull_local[current_block],
4956                      REGNO (reg) - npi->min_reg);
4957
4958           /* Now invalidate stuff clobbered by this insn.  */
4959           note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
4960
4961           /* And handle stores, we do these last since any sets in INSN can
4962              not kill the nonnull property if it is derived from a MEM
4963              appearing in a SET_DEST.  */
4964           if (GET_CODE (SET_DEST (set)) == MEM
4965               && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
4966               && REGNO (reg) >= npi->min_reg
4967               && REGNO (reg) < npi->max_reg)
4968             SET_BIT (nonnull_local[current_block],
4969                      REGNO (reg) - npi->min_reg);
4970         }
4971     }
4972
4973   /* Now compute global properties based on the local properties.   This
4974      is a classic global availablity algorithm.  */
4975   compute_available (nonnull_local, nonnull_killed,
4976                      nonnull_avout, nonnull_avin);
4977
4978   /* Now look at each bb and see if it ends with a compare of a value
4979      against zero.  */
4980   for (bb = 0; bb < n_basic_blocks; bb++)
4981     {
4982       rtx last_insn = BLOCK_END (bb);
4983       rtx condition, earliest;
4984       int compare_and_branch;
4985
4986       /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
4987          since BLOCK_REG[BB] is zero if this block did not end with a
4988          comparison against zero, this condition works.  */
4989       if (block_reg[bb] < npi->min_reg
4990           || block_reg[bb] >= npi->max_reg)
4991         continue;
4992
4993       /* LAST_INSN is a conditional jump.  Get its condition.  */
4994       condition = get_condition (last_insn, &earliest);
4995
4996       /* If we can't determine the condition then skip.  */
4997       if (! condition)
4998         continue;
4999
5000       /* Is the register known to have a nonzero value?  */
5001       if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
5002         continue;
5003
5004       /* Try to compute whether the compare/branch at the loop end is one or
5005          two instructions.  */
5006       if (earliest == last_insn)
5007         compare_and_branch = 1;
5008       else if (earliest == prev_nonnote_insn (last_insn))
5009         compare_and_branch = 2;
5010       else
5011         continue;
5012
5013       /* We know the register in this comparison is nonnull at exit from
5014          this block.  We can optimize this comparison.  */
5015       if (GET_CODE (condition) == NE)
5016         {
5017           rtx new_jump;
5018
5019           new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
5020                                             last_insn);
5021           JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5022           LABEL_NUSES (JUMP_LABEL (new_jump))++;
5023           emit_barrier_after (new_jump);
5024         }
5025       delete_insn (last_insn);
5026       if (compare_and_branch == 2)
5027         delete_insn (earliest);
5028
5029       /* Don't check this block again.  (Note that BLOCK_END is
5030          invalid here; we deleted the last instruction in the
5031          block.)  */
5032       block_reg[bb] = 0;
5033     }
5034 }
5035
5036 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5037    at compile time.
5038
5039    This is conceptually similar to global constant/copy propagation and
5040    classic global CSE (it even uses the same dataflow equations as cprop).
5041
5042    If a register is used as memory address with the form (mem (reg)), then we
5043    know that REG can not be zero at that point in the program.  Any instruction
5044    which sets REG "kills" this property.
5045
5046    So, if every path leading to a conditional branch has an available memory
5047    reference of that form, then we know the register can not have the value
5048    zero at the conditional branch.
5049
5050    So we merely need to compute the local properies and propagate that data
5051    around the cfg, then optimize where possible.
5052
5053    We run this pass two times.  Once before CSE, then again after CSE.  This
5054    has proven to be the most profitable approach.  It is rare for new
5055    optimization opportunities of this nature to appear after the first CSE
5056    pass.
5057
5058    This could probably be integrated with global cprop with a little work.  */
5059
5060 void
5061 delete_null_pointer_checks (f)
5062      rtx f;
5063 {
5064   sbitmap *nonnull_avin, *nonnull_avout;
5065   int *block_reg;
5066   int bb;
5067   int reg;
5068   int regs_per_pass;
5069   int max_reg;
5070   struct null_pointer_info npi;
5071
5072   /* First break the program into basic blocks.  */
5073   find_basic_blocks (f, max_reg_num (), NULL);
5074   cleanup_cfg (f);
5075
5076   /* If we have only a single block, then there's nothing to do.  */
5077   if (n_basic_blocks <= 1)
5078     {
5079       /* Free storage allocated by find_basic_blocks.  */
5080       free_basic_block_vars (0);
5081       return;
5082     }
5083
5084   /* Trying to perform global optimizations on flow graphs which have
5085      a high connectivity will take a long time and is unlikely to be
5086      particularly useful.
5087
5088      In normal circumstances a cfg should have about twice has many edges
5089      as blocks.  But we do not want to punish small functions which have
5090      a couple switch statements.  So we require a relatively large number
5091      of basic blocks and the ratio of edges to blocks to be high.  */
5092   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
5093     {
5094       /* Free storage allocated by find_basic_blocks.  */
5095       free_basic_block_vars (0);
5096       return;
5097     }
5098
5099   /* We need four bitmaps, each with a bit for each register in each
5100      basic block.  */
5101   max_reg = max_reg_num ();
5102   regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
5103
5104   /* Allocate bitmaps to hold local and global properties.  */
5105   npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5106   npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5107   nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5108   nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5109
5110   /* Go through the basic blocks, seeing whether or not each block
5111      ends with a conditional branch whose condition is a comparison
5112      against zero.  Record the register compared in BLOCK_REG.  */
5113   block_reg = (int *) xcalloc (n_basic_blocks, sizeof (int));
5114   for (bb = 0; bb < n_basic_blocks; bb++)
5115     {
5116       rtx last_insn = BLOCK_END (bb);
5117       rtx condition, earliest, reg;
5118
5119       /* We only want conditional branches.  */
5120       if (GET_CODE (last_insn) != JUMP_INSN
5121           || !condjump_p (last_insn)
5122           || simplejump_p (last_insn))
5123         continue;
5124
5125       /* LAST_INSN is a conditional jump.  Get its condition.  */
5126       condition = get_condition (last_insn, &earliest);
5127
5128       /* If we were unable to get the condition, or it is not a equality
5129          comparison against zero then there's nothing we can do.  */
5130       if (!condition
5131           || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5132           || GET_CODE (XEXP (condition, 1)) != CONST_INT
5133           || (XEXP (condition, 1)
5134               != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
5135         continue;
5136
5137       /* We must be checking a register against zero.  */
5138       reg = XEXP (condition, 0);
5139       if (GET_CODE (reg) != REG)
5140         continue;
5141
5142       block_reg[bb] = REGNO (reg);
5143     }
5144
5145   /* Go through the algorithm for each block of registers.  */
5146   for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
5147     {
5148       npi.min_reg = reg;
5149       npi.max_reg = MIN (reg + regs_per_pass, max_reg);
5150       delete_null_pointer_checks_1 (block_reg, nonnull_avin,
5151                                     nonnull_avout, &npi);
5152     }
5153
5154   /* Free storage allocated by find_basic_blocks.  */
5155   free_basic_block_vars (0);
5156
5157   /* Free the table of registers compared at the end of every block.  */
5158   free (block_reg);
5159
5160   /* Free bitmaps.  */
5161   free (npi.nonnull_local);
5162   free (npi.nonnull_killed);
5163   free (nonnull_avin);
5164   free (nonnull_avout);
5165 }
5166
5167 /* Code Hoisting variables and subroutines.  */
5168
5169 /* Very busy expressions.  */
5170 static sbitmap *hoist_vbein;
5171 static sbitmap *hoist_vbeout;
5172
5173 /* Hoistable expressions.  */
5174 static sbitmap *hoist_exprs;
5175
5176 /* Dominator bitmaps.  */
5177 static sbitmap *dominators;
5178
5179 /* ??? We could compute post dominators and run this algorithm in
5180    reverse to to perform tail merging, doing so would probably be
5181    more effective than the tail merging code in jump.c.
5182
5183    It's unclear if tail merging could be run in parallel with
5184    code hoisting.  It would be nice.  */
5185
5186 /* Allocate vars used for code hoisting analysis.  */
5187
5188 static void
5189 alloc_code_hoist_mem (n_blocks, n_exprs)
5190      int n_blocks, n_exprs;
5191 {
5192   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5193   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5194   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5195
5196   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5197   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5198   hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5199   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5200
5201   dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5202 }
5203
5204 /* Free vars used for code hoisting analysis.  */
5205
5206 static void
5207 free_code_hoist_mem ()
5208 {
5209   free (antloc);
5210   free (transp);
5211   free (comp);
5212
5213   free (hoist_vbein);
5214   free (hoist_vbeout);
5215   free (hoist_exprs);
5216   free (transpout);
5217
5218   free (dominators);
5219 }
5220
5221 /* Compute the very busy expressions at entry/exit from each block.
5222
5223    An expression is very busy if all paths from a given point
5224    compute the expression.  */
5225
5226 static void
5227 compute_code_hoist_vbeinout ()
5228 {
5229   int bb, changed, passes;
5230
5231   sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5232   sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5233
5234   passes = 0;
5235   changed = 1;
5236
5237   while (changed)
5238     {
5239       changed = 0;
5240
5241       /* We scan the blocks in the reverse order to speed up
5242          the convergence.  */
5243       for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5244         {
5245           changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5246                                            hoist_vbeout[bb], transp[bb]);
5247           if (bb != n_basic_blocks - 1)
5248             sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
5249         }
5250
5251       passes++;
5252     }
5253
5254   if (gcse_file)
5255     fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5256 }
5257
5258 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
5259
5260 static void
5261 compute_code_hoist_data ()
5262 {
5263   compute_local_properties (transp, comp, antloc, 0);
5264   compute_transpout ();
5265   compute_code_hoist_vbeinout ();
5266   compute_flow_dominators (dominators, NULL);
5267   if (gcse_file)
5268     fprintf (gcse_file, "\n");
5269 }
5270
5271 /* Determine if the expression identified by EXPR_INDEX would
5272    reach BB unimpared if it was placed at the end of EXPR_BB.
5273
5274    It's unclear exactly what Muchnick meant by "unimpared".  It seems
5275    to me that the expression must either be computed or transparent in
5276    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
5277    would allow the expression to be hoisted out of loops, even if
5278    the expression wasn't a loop invariant.
5279
5280    Contrast this to reachability for PRE where an expression is
5281    considered reachable if *any* path reaches instead of *all*
5282    paths.  */
5283
5284 static int
5285 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5286      int expr_bb;
5287      int expr_index;
5288      int bb;
5289      char *visited;
5290 {
5291   edge pred;
5292   int visited_allocated_locally = 0;
5293
5294
5295   if (visited == NULL)
5296     {
5297        visited_allocated_locally = 1;
5298        visited = xcalloc (n_basic_blocks, 1);
5299     }
5300
5301   visited[expr_bb] = 1;
5302   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5303     {
5304       int pred_bb = pred->src->index;
5305
5306       if (pred->src == ENTRY_BLOCK_PTR)
5307         break;
5308       else if (visited[pred_bb])
5309         continue;
5310
5311       /* Does this predecessor generate this expression?  */
5312       else if (TEST_BIT (comp[pred_bb], expr_index))
5313         break;
5314       else if (! TEST_BIT (transp[pred_bb], expr_index))
5315         break;
5316
5317       /* Not killed.  */
5318       else
5319         {
5320           visited[pred_bb] = 1;
5321           if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5322                                            pred_bb, visited))
5323             break;
5324         }
5325     }
5326   if (visited_allocated_locally)
5327     free (visited);
5328
5329   return (pred == NULL);
5330 }
5331 \f
5332 /* Actually perform code hoisting.  */
5333
5334 static void
5335 hoist_code ()
5336 {
5337   int bb, dominated, i;
5338   struct expr **index_map;
5339   struct expr *expr;
5340
5341   sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5342
5343   /* Compute a mapping from expression number (`bitmap_index') to
5344      hash table entry.  */
5345
5346   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
5347   for (i = 0; i < expr_hash_table_size; i++)
5348     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5349       index_map[expr->bitmap_index] = expr;
5350
5351   /* Walk over each basic block looking for potentially hoistable
5352      expressions, nothing gets hoisted from the entry block.  */
5353   for (bb = 0; bb < n_basic_blocks; bb++)
5354     {
5355       int found = 0;
5356       int insn_inserted_p;
5357
5358       /* Examine each expression that is very busy at the exit of this
5359          block.  These are the potentially hoistable expressions.  */
5360       for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5361         {
5362           int hoistable = 0;
5363
5364           if (TEST_BIT (hoist_vbeout[bb], i) && TEST_BIT (transpout[bb], i))
5365             {
5366               /* We've found a potentially hoistable expression, now
5367                  we look at every block BB dominates to see if it
5368                  computes the expression.  */
5369               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5370                 {
5371                   /* Ignore self dominance.  */
5372                   if (bb == dominated
5373                       || ! TEST_BIT (dominators[dominated], bb))
5374                     continue;
5375
5376                   /* We've found a dominated block, now see if it computes
5377                      the busy expression and whether or not moving that
5378                      expression to the "beginning" of that block is safe.  */
5379                   if (!TEST_BIT (antloc[dominated], i))
5380                     continue;
5381
5382                   /* Note if the expression would reach the dominated block
5383                      unimpared if it was placed at the end of BB.
5384
5385                      Keep track of how many times this expression is hoistable
5386                      from a dominated block into BB.  */
5387                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5388                     hoistable++;
5389                 }
5390
5391               /* If we found more than one hoistable occurence of this
5392                  expression, then note it in the bitmap of expressions to
5393                  hoist.  It makes no sense to hoist things which are computed
5394                  in only one BB, and doing so tends to pessimize register
5395                  allocation.  One could increase this value to try harder
5396                  to avoid any possible code expansion due to register
5397                  allocation issues; however experiments have shown that
5398                  the vast majority of hoistable expressions are only movable
5399                  from two successors, so raising this threshhold is likely
5400                  to nullify any benefit we get from code hoisting.  */
5401               if (hoistable > 1)
5402                 {
5403                   SET_BIT (hoist_exprs[bb], i);
5404                   found = 1;
5405                 }
5406             }
5407         }
5408
5409       /* If we found nothing to hoist, then quit now.  */
5410       if (! found)
5411         continue;
5412
5413       /* Loop over all the hoistable expressions.  */
5414       for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5415         {
5416           /* We want to insert the expression into BB only once, so
5417              note when we've inserted it.  */
5418           insn_inserted_p = 0;
5419
5420           /* These tests should be the same as the tests above.  */
5421           if (TEST_BIT (hoist_vbeout[bb], i))
5422             {
5423               /* We've found a potentially hoistable expression, now
5424                  we look at every block BB dominates to see if it
5425                  computes the expression.  */
5426               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5427                 {
5428                   /* Ignore self dominance.  */
5429                   if (bb == dominated
5430                       || ! TEST_BIT (dominators[dominated], bb))
5431                     continue;
5432
5433                   /* We've found a dominated block, now see if it computes
5434                      the busy expression and whether or not moving that
5435                      expression to the "beginning" of that block is safe.  */
5436                   if (!TEST_BIT (antloc[dominated], i))
5437                     continue;
5438
5439                   /* The expression is computed in the dominated block and
5440                      it would be safe to compute it at the start of the
5441                      dominated block.  Now we have to determine if the
5442                      expresion would reach the dominated block if it was
5443                      placed at the end of BB.  */
5444                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5445                     {
5446                       struct expr *expr = index_map[i];
5447                       struct occr *occr = expr->antic_occr;
5448                       rtx insn;
5449                       rtx set;
5450
5451                       /* Find the right occurence of this expression.  */
5452                       while (BLOCK_NUM (occr->insn) != dominated && occr)
5453                         occr = occr->next;
5454
5455                       /* Should never happen.  */
5456                       if (!occr)
5457                         abort ();
5458
5459                       insn = occr->insn;
5460
5461                       set = single_set (insn);
5462                       if (! set)
5463                         abort ();
5464
5465                       /* Create a pseudo-reg to store the result of reaching
5466                          expressions into.  Get the mode for the new pseudo
5467                          from the mode of the original destination pseudo.  */
5468                       if (expr->reaching_reg == NULL)
5469                         expr->reaching_reg
5470                           = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5471
5472                       /* In theory this should never fail since we're creating
5473                          a reg->reg copy.
5474
5475                          However, on the x86 some of the movXX patterns
5476                          actually contain clobbers of scratch regs.  This may
5477                          cause the insn created by validate_change to not
5478                          match any pattern and thus cause validate_change to
5479                          fail.  */
5480                       if (validate_change (insn, &SET_SRC (set),
5481                                            expr->reaching_reg, 0))
5482                         {
5483                           occr->deleted_p = 1;
5484                           if (!insn_inserted_p)
5485                             {
5486                               insert_insn_end_bb (index_map[i], bb, 0);
5487                               insn_inserted_p = 1;
5488                             }
5489                         }
5490                     }
5491                 }
5492             }
5493         }
5494     }
5495
5496     free (index_map);
5497 }
5498
5499 /* Top level routine to perform one code hoisting (aka unification) pass
5500
5501    Return non-zero if a change was made.  */
5502
5503 static int
5504 one_code_hoisting_pass ()
5505 {
5506   int changed = 0;
5507
5508   alloc_expr_hash_table (max_cuid);
5509   compute_expr_hash_table ();
5510   if (gcse_file)
5511     dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5512                      expr_hash_table_size, n_exprs);
5513
5514   if (n_exprs > 0)
5515     {
5516       alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5517       compute_code_hoist_data ();
5518       hoist_code ();
5519       free_code_hoist_mem ();
5520     }
5521
5522   free_expr_hash_table ();
5523
5524   return changed;
5525 }