gcc/gcse.c

   1 /* Global common subexpression elimination/Partial redundancy elimination
   2    and global constant/copy propagation for GNU compiler.
   3    Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* TODO
  23    - reordering of memory allocation and freeing to be more space efficient
  24    - do rough calc of how many regs are needed in each block, and a rough
  25      calc of how many regs are available in each class and use that to
  26      throttle back the code in cases where RTX_COST is minimal.
  27    - dead store elimination
  28    - a store to the same address as a load does not kill the load if the
  29      source of the store is also the destination of the load.  Handling this
  30      allows more load motion, particularly out of loops.
  31    - ability to realloc sbitmap vectors would allow one initial computation
  32      of reg_set_in_block with only subsequent additions, rather than
  33      recomputing it for each pass
  34
  35 */
  36
  37 /* References searched while implementing this.
  38
  39    Compilers Principles, Techniques and Tools
  40    Aho, Sethi, Ullman
  41    Addison-Wesley, 1988
  42
  43    Global Optimization by Suppression of Partial Redundancies
  44    E. Morel, C. Renvoise
  45    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  46
  47    A Portable Machine-Independent Global Optimizer - Design and Measurements
  48    Frederick Chow
  49    Stanford Ph.D. thesis, Dec. 1983
  50
  51    A Fast Algorithm for Code Movement Optimization
  52    D.M. Dhamdhere
  53    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  54
  55    A Solution to a Problem with Morel and Renvoise's
  56    Global Optimization by Suppression of Partial Redundancies
  57    K-H Drechsler, M.P. Stadel
  58    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  59
  60    Practical Adaptation of the Global Optimization
  61    Algorithm of Morel and Renvoise
  62    D.M. Dhamdhere
  63    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  64
  65    Efficiently Computing Static Single Assignment Form and the Control
  66    Dependence Graph
  67    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  68    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  69
  70    Lazy Code Motion
  71    J. Knoop, O. Ruthing, B. Steffen
  72    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  73
  74    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  75    Time for Reducible Flow Control
  76    Thomas Ball
  77    ACM Letters on Programming Languages and Systems,
  78    Vol. 2, Num. 1-4, Mar-Dec 1993
  79
  80    An Efficient Representation for Sparse Sets
  81    Preston Briggs, Linda Torczon
  82    ACM Letters on Programming Languages and Systems,
  83    Vol. 2, Num. 1-4, Mar-Dec 1993
  84
  85    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  86    K-H Drechsler, M.P. Stadel
  87    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  88
  89    Partial Dead Code Elimination
  90    J. Knoop, O. Ruthing, B. Steffen
  91    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  92
  93    Effective Partial Redundancy Elimination
  94    P. Briggs, K.D. Cooper
  95    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  96
  97    The Program Structure Tree: Computing Control Regions in Linear Time
  98    R. Johnson, D. Pearson, K. Pingali
  99    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
 100
 101    Optimal Code Motion: Theory and Practice
 102    J. Knoop, O. Ruthing, B. Steffen
 103    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
 104
 105    The power of assignment motion
 106    J. Knoop, O. Ruthing, B. Steffen
 107    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 108
 109    Global code motion / global value numbering
 110    C. Click
 111    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 112
 113    Value Driven Redundancy Elimination
 114    L.T. Simpson
 115    Rice University Ph.D. thesis, Apr. 1996
 116
 117    Value Numbering
 118    L.T. Simpson
 119    Massively Scalar Compiler Project, Rice University, Sep. 1996
 120
 121    High Performance Compilers for Parallel Computing
 122    Michael Wolfe
 123    Addison-Wesley, 1996
 124
 125    Advanced Compiler Design and Implementation
 126    Steven Muchnick
 127    Morgan Kaufmann, 1997
 128
 129    Building an Optimizing Compiler
 130    Robert Morgan
 131    Digital Press, 1998
 132
 133    People wishing to speed up the code here should read:
 134      Elimination Algorithms for Data Flow Analysis
 135      B.G. Ryder, M.C. Paull
 136      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 137
 138      How to Analyze Large Programs Efficiently and Informatively
 139      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 140      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 141
 142    People wishing to do something different can find various possibilities
 143    in the above papers and elsewhere.
 144 */
 145
 146 #include "config.h"
 147 #include "system.h"
 148 #include "toplev.h"
 149
 150 #include "rtl.h"
 151 #include "tm_p.h"
 152 #include "regs.h"
 153 #include "hard-reg-set.h"
 154 #include "flags.h"
 155 #include "real.h"
 156 #include "insn-config.h"
 157 #include "recog.h"
 158 #include "basic-block.h"
 159 #include "output.h"
 160 #include "function.h"
 161 #include "expr.h"
 162
 163 #include "obstack.h"
 164 #define obstack_chunk_alloc gmalloc
 165 #define obstack_chunk_free free
 166
 167 /* Maximum number of passes to perform.  */
 168 #define MAX_PASSES 1
 169
 170 /* Propagate flow information through back edges and thus enable PRE's
 171    moving loop invariant calculations out of loops.
 172
 173    Originally this tended to create worse overall code, but several
 174    improvements during the development of PRE seem to have made following
 175    back edges generally a win.
 176
 177    Note much of the loop invariant code motion done here would normally
 178    be done by loop.c, which has more heuristics for when to move invariants
 179    out of loops.  At some point we might need to move some of those
 180    heuristics into gcse.c.  */
 181 #define FOLLOW_BACK_EDGES 1
 182
 183 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 184    are a superset of those done by GCSE.
 185
 186    We perform the following steps:
 187
 188    1) Compute basic block information.
 189
 190    2) Compute table of places where registers are set.
 191
 192    3) Perform copy/constant propagation.
 193
 194    4) Perform global cse.
 195
 196    5) Perform another pass of copy/constant propagation.
 197
 198    Two passes of copy/constant propagation are done because the first one
 199    enables more GCSE and the second one helps to clean up the copies that
 200    GCSE creates.  This is needed more for PRE than for Classic because Classic
 201    GCSE will try to use an existing register containing the common
 202    subexpression rather than create a new one.  This is harder to do for PRE
 203    because of the code motion (which Classic GCSE doesn't do).
 204
 205    Expressions we are interested in GCSE-ing are of the form
 206    (set (pseudo-reg) (expression)).
 207    Function want_to_gcse_p says what these are.
 208
 209    PRE handles moving invariant expressions out of loops (by treating them as
 210    partially redundant).
 211
 212    Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
 213    assignment) based GVN (global value numbering).  L. T. Simpson's paper
 214    (Rice University) on value numbering is a useful reference for this.
 215
 216    **********************
 217
 218    We used to support multiple passes but there are diminishing returns in
 219    doing so.  The first pass usually makes 90% of the changes that are doable.
 220    A second pass can make a few more changes made possible by the first pass.
 221    Experiments show any further passes don't make enough changes to justify
 222    the expense.
 223
 224    A study of spec92 using an unlimited number of passes:
 225    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 226    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 227    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 228
 229    It was found doing copy propagation between each pass enables further
 230    substitutions.
 231
 232    PRE is quite expensive in complicated functions because the DFA can take
 233    awhile to converge.  Hence we only perform one pass.  Macro MAX_PASSES can
 234    be modified if one wants to experiment.
 235
 236    **********************
 237
 238    The steps for PRE are:
 239
 240    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 241
 242    2) Perform the data flow analysis for PRE.
 243
 244    3) Delete the redundant instructions
 245
 246    4) Insert the required copies [if any] that make the partially
 247       redundant instructions fully redundant.
 248
 249    5) For other reaching expressions, insert an instruction to copy the value
 250       to a newly created pseudo that will reach the redundant instruction.
 251
 252    The deletion is done first so that when we do insertions we
 253    know which pseudo reg to use.
 254
 255    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 256    argue it is not.  The number of iterations for the algorithm to converge
 257    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 258
 259    PRE GCSE depends heavily on the second CSE pass to clean up the copies
 260    we create.  To make an expression reach the place where it's redundant,
 261    the result of the expression is copied to a new register, and the redundant
 262    expression is deleted by replacing it with this new register.  Classic GCSE
 263    doesn't have this problem as much as it computes the reaching defs of
 264    each register in each block and thus can try to use an existing register.
 265
 266    **********************
 267
 268    A fair bit of simplicity is created by creating small functions for simple
 269    tasks, even when the function is only called in one place.  This may
 270    measurably slow things down [or may not] by creating more function call
 271    overhead than is necessary.  The source is laid out so that it's trivial
 272    to make the affected functions inline so that one can measure what speed
 273    up, if any, can be achieved, and maybe later when things settle things can
 274    be rearranged.
 275
 276    Help stamp out big monolithic functions!  */
 277 \f
 278 /* GCSE global vars.  */
 279
 280 /* -dG dump file.  */
 281 static FILE *gcse_file;
 282
 283 /* Note whether or not we should run jump optimization after gcse.  We
 284    want to do this for two cases.
 285
 286     * If we changed any jumps via cprop.
 287
 288     * If we added any labels via edge splitting.  */
 289
 290 static int run_jump_opt_after_gcse;
 291
 292 /* Bitmaps are normally not included in debugging dumps.
 293    However it's useful to be able to print them from GDB.
 294    We could create special functions for this, but it's simpler to
 295    just allow passing stderr to the dump_foo fns.  Since stderr can
 296    be a macro, we store a copy here.  */
 297 static FILE *debug_stderr;
 298
 299 /* An obstack for our working variables.  */
 300 static struct obstack gcse_obstack;
 301
 302 /* Non-zero for each mode that supports (set (reg) (reg)).
 303    This is trivially true for integer and floating point values.
 304    It may or may not be true for condition codes.  */
 305 static char can_copy_p[(int) NUM_MACHINE_MODES];
 306
 307 /* Non-zero if can_copy_p has been initialized.  */
 308 static int can_copy_init_p;
 309
 310 struct reg_use {rtx reg_rtx; };
 311
 312 /* Hash table of expressions.  */
 313
 314 struct expr
 315 {
 316   /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
 317   rtx expr;
 318   /* Index in the available expression bitmaps.  */
 319   int bitmap_index;
 320   /* Next entry with the same hash.  */
 321   struct expr *next_same_hash;
 322   /* List of anticipatable occurrences in basic blocks in the function.
 323      An "anticipatable occurrence" is one that is the first occurrence in the
 324      basic block, the operands are not modified in the basic block prior
 325      to the occurrence and the output is not used between the start of
 326      the block and the occurrence.  */
 327   struct occr *antic_occr;
 328   /* List of available occurrence in basic blocks in the function.
 329      An "available occurrence" is one that is the last occurrence in the
 330      basic block and the operands are not modified by following statements in
 331      the basic block [including this insn].  */
 332   struct occr *avail_occr;
 333   /* Non-null if the computation is PRE redundant.
 334      The value is the newly created pseudo-reg to record a copy of the
 335      expression in all the places that reach the redundant copy.  */
 336   rtx reaching_reg;
 337 };
 338
 339 /* Occurrence of an expression.
 340    There is one per basic block.  If a pattern appears more than once the
 341    last appearance is used [or first for anticipatable expressions].  */
 342
 343 struct occr
 344 {
 345   /* Next occurrence of this expression.  */
 346   struct occr *next;
 347   /* The insn that computes the expression.  */
 348   rtx insn;
 349   /* Non-zero if this [anticipatable] occurrence has been deleted.  */
 350   char deleted_p;
 351   /* Non-zero if this [available] occurrence has been copied to
 352      reaching_reg.  */
 353   /* ??? This is mutually exclusive with deleted_p, so they could share
 354      the same byte.  */
 355   char copied_p;
 356 };
 357
 358 /* Expression and copy propagation hash tables.
 359    Each hash table is an array of buckets.
 360    ??? It is known that if it were an array of entries, structure elements
 361    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 362    not clear whether in the final analysis a sufficient amount of memory would
 363    be saved as the size of the available expression bitmaps would be larger
 364    [one could build a mapping table without holes afterwards though].
 365    Someday I'll perform the computation and figure it out.  */
 366
 367 /* Total size of the expression hash table, in elements.  */
 368 static unsigned int expr_hash_table_size;
 369
 370 /* The table itself.
 371    This is an array of `expr_hash_table_size' elements.  */
 372 static struct expr **expr_hash_table;
 373
 374 /* Total size of the copy propagation hash table, in elements.  */
 375 static int set_hash_table_size;
 376
 377 /* The table itself.
 378    This is an array of `set_hash_table_size' elements.  */
 379 static struct expr **set_hash_table;
 380
 381 /* Mapping of uids to cuids.
 382    Only real insns get cuids.  */
 383 static int *uid_cuid;
 384
 385 /* Highest UID in UID_CUID.  */
 386 static int max_uid;
 387
 388 /* Get the cuid of an insn.  */
 389 #ifdef ENABLE_CHECKING
 390 #define INSN_CUID(INSN) (INSN_UID (INSN) > max_uid ? (abort (), 0) : uid_cuid[INSN_UID (INSN)])
 391 #else
 392 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
 393 #endif
 394
 395 /* Number of cuids.  */
 396 static int max_cuid;
 397
 398 /* Mapping of cuids to insns.  */
 399 static rtx *cuid_insn;
 400
 401 /* Get insn from cuid.  */
 402 #define CUID_INSN(CUID) (cuid_insn[CUID])
 403
 404 /* Maximum register number in function prior to doing gcse + 1.
 405    Registers created during this pass have regno >= max_gcse_regno.
 406    This is named with "gcse" to not collide with global of same name.  */
 407 static unsigned int max_gcse_regno;
 408
 409 /* Maximum number of cse-able expressions found.  */
 410 static int n_exprs;
 411
 412 /* Maximum number of assignments for copy propagation found.  */
 413 static int n_sets;
 414
 415 /* Table of registers that are modified.
 416
 417    For each register, each element is a list of places where the pseudo-reg
 418    is set.
 419
 420    For simplicity, GCSE is done on sets of pseudo-regs only.  PRE GCSE only
 421    requires knowledge of which blocks kill which regs [and thus could use
 422    a bitmap instead of the lists `reg_set_table' uses].
 423
 424    `reg_set_table' and could be turned into an array of bitmaps (num-bbs x
 425    num-regs) [however perhaps it may be useful to keep the data as is].  One
 426    advantage of recording things this way is that `reg_set_table' is fairly
 427    sparse with respect to pseudo regs but for hard regs could be fairly dense
 428    [relatively speaking].  And recording sets of pseudo-regs in lists speeds
 429    up functions like compute_transp since in the case of pseudo-regs we only
 430    need to iterate over the number of times a pseudo-reg is set, not over the
 431    number of basic blocks [clearly there is a bit of a slow down in the cases
 432    where a pseudo is set more than once in a block, however it is believed
 433    that the net effect is to speed things up].  This isn't done for hard-regs
 434    because recording call-clobbered hard-regs in `reg_set_table' at each
 435    function call can consume a fair bit of memory, and iterating over
 436    hard-regs stored this way in compute_transp will be more expensive.  */
 437
 438 typedef struct reg_set
 439 {
 440   /* The next setting of this register.  */
 441   struct reg_set *next;
 442   /* The insn where it was set.  */
 443   rtx insn;
 444 } reg_set;
 445
 446 static reg_set **reg_set_table;
 447
 448 /* Size of `reg_set_table'.
 449    The table starts out at max_gcse_regno + slop, and is enlarged as
 450    necessary.  */
 451 static int reg_set_table_size;
 452
 453 /* Amount to grow `reg_set_table' by when it's full.  */
 454 #define REG_SET_TABLE_SLOP 100
 455
 456 /* Bitmap containing one bit for each register in the program.
 457    Used when performing GCSE to track which registers have been set since
 458    the start of the basic block.  */
 459 static sbitmap reg_set_bitmap;
 460
 461 /* For each block, a bitmap of registers set in the block.
 462    This is used by expr_killed_p and compute_transp.
 463    It is computed during hash table computation and not by compute_sets
 464    as it includes registers added since the last pass (or between cprop and
 465    gcse) and it's currently not easy to realloc sbitmap vectors.  */
 466 static sbitmap *reg_set_in_block;
 467
 468 /* For each block, non-zero if memory is set in that block.
 469    This is computed during hash table computation and is used by
 470    expr_killed_p and compute_transp.
 471    ??? Handling of memory is very simple, we don't make any attempt
 472    to optimize things (later).
 473    ??? This can be computed by compute_sets since the information
 474    doesn't change.  */
 475 static char *mem_set_in_block;
 476
 477 /* Various variables for statistics gathering.  */
 478
 479 /* Memory used in a pass.
 480    This isn't intended to be absolutely precise.  Its intent is only
 481    to keep an eye on memory usage.  */
 482 static int bytes_used;
 483
 484 /* GCSE substitutions made.  */
 485 static int gcse_subst_count;
 486 /* Number of copy instructions created.  */
 487 static int gcse_create_count;
 488 /* Number of constants propagated.  */
 489 static int const_prop_count;
 490 /* Number of copys propagated.  */
 491 static int copy_prop_count;
 492 \f
 493 /* These variables are used by classic GCSE.
 494    Normally they'd be defined a bit later, but `rd_gen' needs to
 495    be declared sooner.  */
 496
 497 /* Each block has a bitmap of each type.
 498    The length of each blocks bitmap is:
 499
 500        max_cuid  - for reaching definitions
 501        n_exprs - for available expressions
 502
 503    Thus we view the bitmaps as 2 dimensional arrays.  i.e.
 504    rd_kill[block_num][cuid_num]
 505    ae_kill[block_num][expr_num]                  */
 506
 507 /* For reaching defs */
 508 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
 509
 510 /* for available exprs */
 511 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
 512
 513 /* Objects of this type are passed around by the null-pointer check
 514    removal routines.  */
 515 struct null_pointer_info
 516 {
 517   /* The basic block being processed.  */
 518   int current_block;
 519   /* The first register to be handled in this pass.  */
 520   unsigned int min_reg;
 521   /* One greater than the last register to be handled in this pass.  */
 522   unsigned int max_reg;
 523   sbitmap *nonnull_local;
 524   sbitmap *nonnull_killed;
 525 };
 526 \f
 527 static void compute_can_copy    PARAMS ((void));
 528 static char *gmalloc            PARAMS ((unsigned int));
 529 static char *grealloc           PARAMS ((char *, unsigned int));
 530 static char *gcse_alloc         PARAMS ((unsigned long));
 531 static void alloc_gcse_mem      PARAMS ((rtx));
 532 static void free_gcse_mem       PARAMS ((void));
 533 static void alloc_reg_set_mem   PARAMS ((int));
 534 static void free_reg_set_mem    PARAMS ((void));
 535 static int get_bitmap_width     PARAMS ((int, int, int));
 536 static void record_one_set      PARAMS ((int, rtx));
 537 static void record_set_info     PARAMS ((rtx, rtx, void *));
 538 static void compute_sets        PARAMS ((rtx));
 539 static void hash_scan_insn      PARAMS ((rtx, int, int));
 540 static void hash_scan_set       PARAMS ((rtx, rtx, int));
 541 static void hash_scan_clobber   PARAMS ((rtx, rtx));
 542 static void hash_scan_call      PARAMS ((rtx, rtx));
 543 static int want_to_gcse_p       PARAMS ((rtx));
 544 static int oprs_unchanged_p     PARAMS ((rtx, rtx, int));
 545 static int oprs_anticipatable_p PARAMS ((rtx, rtx));
 546 static int oprs_available_p     PARAMS ((rtx, rtx));
 547 static void insert_expr_in_table PARAMS ((rtx, enum machine_mode, rtx,
 548                                           int, int));
 549 static void insert_set_in_table PARAMS ((rtx, rtx));
 550 static unsigned int hash_expr   PARAMS ((rtx, enum machine_mode, int *, int));
 551 static unsigned int hash_expr_1 PARAMS ((rtx, enum machine_mode, int *));
 552 static unsigned int hash_set    PARAMS ((int, int));
 553 static int expr_equiv_p         PARAMS ((rtx, rtx));
 554 static void record_last_reg_set_info PARAMS ((rtx, int));
 555 static void record_last_mem_set_info PARAMS ((rtx));
 556 static void record_last_set_info PARAMS ((rtx, rtx, void *));
 557 static void compute_hash_table  PARAMS ((int));
 558 static void alloc_set_hash_table PARAMS ((int));
 559 static void free_set_hash_table PARAMS ((void));
 560 static void compute_set_hash_table PARAMS ((void));
 561 static void alloc_expr_hash_table PARAMS ((unsigned int));
 562 static void free_expr_hash_table PARAMS ((void));
 563 static void compute_expr_hash_table PARAMS ((void));
 564 static void dump_hash_table     PARAMS ((FILE *, const char *, struct expr **,
 565                                          int, int));
 566 static struct expr *lookup_expr PARAMS ((rtx));
 567 static struct expr *lookup_set  PARAMS ((unsigned int, rtx));
 568 static struct expr *next_set    PARAMS ((unsigned int, struct expr *));
 569 static void reset_opr_set_tables PARAMS ((void));
 570 static int oprs_not_set_p       PARAMS ((rtx, rtx));
 571 static void mark_call           PARAMS ((rtx));
 572 static void mark_set            PARAMS ((rtx, rtx));
 573 static void mark_clobber        PARAMS ((rtx, rtx));
 574 static void mark_oprs_set       PARAMS ((rtx));
 575 static void alloc_cprop_mem     PARAMS ((int, int));
 576 static void free_cprop_mem      PARAMS ((void));
 577 static void compute_transp      PARAMS ((rtx, int, sbitmap *, int));
 578 static void compute_transpout   PARAMS ((void));
 579 static void compute_local_properties PARAMS ((sbitmap *, sbitmap *, sbitmap *,
 580                                               int));
 581 static void compute_cprop_data  PARAMS ((void));
 582 static void find_used_regs      PARAMS ((rtx));
 583 static int try_replace_reg      PARAMS ((rtx, rtx, rtx));
 584 static struct expr *find_avail_set PARAMS ((int, rtx));
 585 static int cprop_jump           PARAMS ((rtx, rtx, struct reg_use *, rtx));
 586 #ifdef HAVE_cc0
 587 static int cprop_cc0_jump       PARAMS ((rtx, struct reg_use *, rtx));
 588 #endif
 589 static int cprop_insn           PARAMS ((rtx, int));
 590 static int cprop                PARAMS ((int));
 591 static int one_cprop_pass       PARAMS ((int, int));
 592 static void alloc_pre_mem       PARAMS ((int, int));
 593 static void free_pre_mem        PARAMS ((void));
 594 static void compute_pre_data    PARAMS ((void));
 595 static int pre_expr_reaches_here_p PARAMS ((int, struct expr *, int));
 596 static void insert_insn_end_bb  PARAMS ((struct expr *, int, int));
 597 static void pre_insert_copy_insn PARAMS ((struct expr *, rtx));
 598 static void pre_insert_copies   PARAMS ((void));
 599 static int pre_delete           PARAMS ((void));
 600 static int pre_gcse             PARAMS ((void));
 601 static int one_pre_gcse_pass    PARAMS ((int));
 602 static void add_label_notes     PARAMS ((rtx, rtx));
 603 static void alloc_code_hoist_mem PARAMS ((int, int));
 604 static void free_code_hoist_mem PARAMS ((void));
 605 static void compute_code_hoist_vbeinout PARAMS ((void));
 606 static void compute_code_hoist_data PARAMS ((void));
 607 static int hoist_expr_reaches_here_p PARAMS ((int, int, int, char *));
 608 static void hoist_code          PARAMS ((void));
 609 static int one_code_hoisting_pass PARAMS ((void));
 610 static void alloc_rd_mem        PARAMS ((int, int));
 611 static void free_rd_mem         PARAMS ((void));
 612 static void handle_rd_kill_set  PARAMS ((rtx, int, int));
 613 static void compute_kill_rd     PARAMS ((void));
 614 static void compute_rd          PARAMS ((void));
 615 static void alloc_avail_expr_mem PARAMS ((int, int));
 616 static void free_avail_expr_mem PARAMS ((void));
 617 static void compute_ae_gen      PARAMS ((void));
 618 static int expr_killed_p        PARAMS ((rtx, int));
 619 static void compute_ae_kill     PARAMS ((sbitmap *, sbitmap *));
 620 static int expr_reaches_here_p  PARAMS ((struct occr *, struct expr *,
 621                                          int, int));
 622 static rtx computing_insn       PARAMS ((struct expr *, rtx));
 623 static int def_reaches_here_p   PARAMS ((rtx, rtx));
 624 static int can_disregard_other_sets PARAMS ((struct reg_set **, rtx, int));
 625 static int handle_avail_expr    PARAMS ((rtx, struct expr *));
 626 static int classic_gcse         PARAMS ((void));
 627 static int one_classic_gcse_pass PARAMS ((int));
 628 static void invalidate_nonnull_info PARAMS ((rtx, rtx, void *));
 629 static void delete_null_pointer_checks_1 PARAMS ((unsigned int *, sbitmap *,
 630                                                   sbitmap *,
 631                                                   struct null_pointer_info *));
 632 static rtx process_insert_insn  PARAMS ((struct expr *));
 633 static int pre_edge_insert      PARAMS ((struct edge_list *, struct expr **));
 634 static int expr_reaches_here_p_work PARAMS ((struct occr *, struct expr *,
 635                                              int, int, char *));
 636 static int pre_expr_reaches_here_p_work PARAMS ((int, struct expr *,
 637                                                  int, char *));
 638 \f
 639 /* Entry point for global common subexpression elimination.
 640    F is the first instruction in the function.  */
 641
 642 int
 643 gcse_main (f, file)
 644      rtx f;
 645      FILE *file;
 646 {
 647   int changed, pass;
 648   /* Bytes used at start of pass.  */
 649   int initial_bytes_used;
 650   /* Maximum number of bytes used by a pass.  */
 651   int max_pass_bytes;
 652   /* Point to release obstack data from for each pass.  */
 653   char *gcse_obstack_bottom;
 654
 655   /* We do not construct an accurate cfg in functions which call
 656      setjmp, so just punt to be safe.  */
 657   if (current_function_calls_setjmp)
 658     return 0;
 659
 660   /* Assume that we do not need to run jump optimizations after gcse.  */
 661   run_jump_opt_after_gcse = 0;
 662
 663   /* For calling dump_foo fns from gdb.  */
 664   debug_stderr = stderr;
 665   gcse_file = file;
 666
 667   /* Identify the basic block information for this function, including
 668      successors and predecessors.  */
 669   max_gcse_regno = max_reg_num ();
 670
 671   if (file)
 672     dump_flow_info (file);
 673
 674   /* Return if there's nothing to do.  */
 675   if (n_basic_blocks <= 1)
 676     return 0;
 677
 678   /* Trying to perform global optimizations on flow graphs which have
 679      a high connectivity will take a long time and is unlikely to be
 680      particularly useful.
 681
 682      In normal circumstances a cfg should have about twice has many edges
 683      as blocks.  But we do not want to punish small functions which have
 684      a couple switch statements.  So we require a relatively large number
 685      of basic blocks and the ratio of edges to blocks to be high.  */
 686   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
 687     return 0;
 688
 689   /* See what modes support reg/reg copy operations.  */
 690   if (! can_copy_init_p)
 691     {
 692       compute_can_copy ();
 693       can_copy_init_p = 1;
 694     }
 695
 696   gcc_obstack_init (&gcse_obstack);
 697   bytes_used = 0;
 698
 699   /* Record where pseudo-registers are set.  This data is kept accurate
 700      during each pass.  ??? We could also record hard-reg information here
 701      [since it's unchanging], however it is currently done during hash table
 702      computation.
 703
 704      It may be tempting to compute MEM set information here too, but MEM sets
 705      will be subject to code motion one day and thus we need to compute
 706      information about memory sets when we build the hash tables.  */
 707
 708   alloc_reg_set_mem (max_gcse_regno);
 709   compute_sets (f);
 710
 711   pass = 0;
 712   initial_bytes_used = bytes_used;
 713   max_pass_bytes = 0;
 714   gcse_obstack_bottom = gcse_alloc (1);
 715   changed = 1;
 716   while (changed && pass < MAX_PASSES)
 717     {
 718       changed = 0;
 719       if (file)
 720         fprintf (file, "GCSE pass %d\n\n", pass + 1);
 721
 722       /* Initialize bytes_used to the space for the pred/succ lists,
 723          and the reg_set_table data.  */
 724       bytes_used = initial_bytes_used;
 725
 726       /* Each pass may create new registers, so recalculate each time.  */
 727       max_gcse_regno = max_reg_num ();
 728
 729       alloc_gcse_mem (f);
 730
 731       /* Don't allow constant propagation to modify jumps
 732          during this pass.  */
 733       changed = one_cprop_pass (pass + 1, 0);
 734
 735       if (optimize_size)
 736         changed |= one_classic_gcse_pass (pass + 1);
 737       else
 738         {
 739           changed |= one_pre_gcse_pass (pass + 1);
 740           free_reg_set_mem ();
 741           alloc_reg_set_mem (max_reg_num ());
 742           compute_sets (f);
 743           run_jump_opt_after_gcse = 1;
 744         }
 745
 746       if (max_pass_bytes < bytes_used)
 747         max_pass_bytes = bytes_used;
 748
 749       /* Free up memory, then reallocate for code hoisting.  We can
 750          not re-use the existing allocated memory because the tables
 751          will not have info for the insns or registers created by
 752          partial redundancy elimination.  */
 753       free_gcse_mem ();
 754
 755       /* It does not make sense to run code hoisting unless we optimizing
 756          for code size -- it rarely makes programs faster, and can make
 757          them bigger if we did partial redundancy elimination (when optimizing
 758          for space, we use a classic gcse algorithm instead of partial
 759          redundancy algorithms).  */
 760       if (optimize_size)
 761         {
 762           max_gcse_regno = max_reg_num ();
 763           alloc_gcse_mem (f);
 764           changed |= one_code_hoisting_pass ();
 765           free_gcse_mem ();
 766
 767           if (max_pass_bytes < bytes_used)
 768             max_pass_bytes = bytes_used;
 769         }
 770
 771       if (file)
 772         {
 773           fprintf (file, "\n");
 774           fflush (file);
 775         }
 776
 777       obstack_free (&gcse_obstack, gcse_obstack_bottom);
 778       pass++;
 779     }
 780
 781   /* Do one last pass of copy propagation, including cprop into
 782      conditional jumps.  */
 783
 784   max_gcse_regno = max_reg_num ();
 785   alloc_gcse_mem (f);
 786   /* This time, go ahead and allow cprop to alter jumps.  */
 787   one_cprop_pass (pass + 1, 1);
 788   free_gcse_mem ();
 789
 790   if (file)
 791     {
 792       fprintf (file, "GCSE of %s: %d basic blocks, ",
 793                current_function_name, n_basic_blocks);
 794       fprintf (file, "%d pass%s, %d bytes\n\n",
 795                pass, pass > 1 ? "es" : "", max_pass_bytes);
 796     }
 797
 798   obstack_free (&gcse_obstack, NULL_PTR);
 799   free_reg_set_mem ();
 800   return run_jump_opt_after_gcse;
 801 }
 802 \f
 803 /* Misc. utilities.  */
 804
 805 /* Compute which modes support reg/reg copy operations.  */
 806
 807 static void
 808 compute_can_copy ()
 809 {
 810   int i;
 811 #ifndef AVOID_CCMODE_COPIES
 812   rtx reg,insn;
 813 #endif
 814   char *free_point = (char *) oballoc (1);
 815
 816   bzero (can_copy_p, NUM_MACHINE_MODES);
 817
 818   start_sequence ();
 819   for (i = 0; i < NUM_MACHINE_MODES; i++)
 820     if (GET_MODE_CLASS (i) == MODE_CC)
 821       {
 822 #ifdef AVOID_CCMODE_COPIES
 823         can_copy_p[i] = 0;
 824 #else
 825         reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 826         insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 827         if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
 828           can_copy_p[i] = 1;
 829 #endif
 830       }
 831     else
 832       can_copy_p[i] = 1;
 833
 834   end_sequence ();
 835
 836   /* Free the objects we just allocated.  */
 837   obfree (free_point);
 838 }
 839 \f
 840 /* Cover function to xmalloc to record bytes allocated.  */
 841
 842 static char *
 843 gmalloc (size)
 844      unsigned int size;
 845 {
 846   bytes_used += size;
 847   return xmalloc (size);
 848 }
 849
 850 /* Cover function to xrealloc.
 851    We don't record the additional size since we don't know it.
 852    It won't affect memory usage stats much anyway.  */
 853
 854 static char *
 855 grealloc (ptr, size)
 856      char *ptr;
 857      unsigned int size;
 858 {
 859   return xrealloc (ptr, size);
 860 }
 861
 862 /* Cover function to obstack_alloc.
 863    We don't need to record the bytes allocated here since
 864    obstack_chunk_alloc is set to gmalloc.  */
 865
 866 static char *
 867 gcse_alloc (size)
 868      unsigned long size;
 869 {
 870   return (char *) obstack_alloc (&gcse_obstack, size);
 871 }
 872
 873 /* Allocate memory for the cuid mapping array,
 874    and reg/memory set tracking tables.
 875
 876    This is called at the start of each pass.  */
 877
 878 static void
 879 alloc_gcse_mem (f)
 880      rtx f;
 881 {
 882   int i,n;
 883   rtx insn;
 884
 885   /* Find the largest UID and create a mapping from UIDs to CUIDs.
 886      CUIDs are like UIDs except they increase monotonically, have no gaps,
 887      and only apply to real insns.  */
 888
 889   max_uid = get_max_uid ();
 890   n = (max_uid + 1) * sizeof (int);
 891   uid_cuid = (int *) gmalloc (n);
 892   bzero ((char *) uid_cuid, n);
 893   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 894     {
 895       if (INSN_P (insn))
 896         uid_cuid[INSN_UID (insn)] = i++;
 897       else
 898         uid_cuid[INSN_UID (insn)] = i;
 899     }
 900
 901   /* Create a table mapping cuids to insns.  */
 902
 903   max_cuid = i;
 904   n = (max_cuid + 1) * sizeof (rtx);
 905   cuid_insn = (rtx *) gmalloc (n);
 906   bzero ((char *) cuid_insn, n);
 907   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 908     if (INSN_P (insn))
 909       CUID_INSN (i++) = insn;
 910
 911   /* Allocate vars to track sets of regs.  */
 912   reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
 913
 914   /* Allocate vars to track sets of regs, memory per block.  */
 915   reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
 916                                                        max_gcse_regno);
 917   mem_set_in_block = (char *) gmalloc (n_basic_blocks);
 918 }
 919
 920 /* Free memory allocated by alloc_gcse_mem.  */
 921
 922 static void
 923 free_gcse_mem ()
 924 {
 925   free (uid_cuid);
 926   free (cuid_insn);
 927
 928   free (reg_set_bitmap);
 929
 930   free (reg_set_in_block);
 931   free (mem_set_in_block);
 932 }
 933
 934 /* Many of the global optimization algorithms work by solving dataflow
 935    equations for various expressions.  Initially, some local value is
 936    computed for each expression in each block.  Then, the values across the
 937    various blocks are combined (by following flow graph edges) to arrive at
 938    global values.  Conceptually, each set of equations is independent.  We
 939    may therefore solve all the equations in parallel, solve them one at a
 940    time, or pick any intermediate approach.
 941
 942    When you're going to need N two-dimensional bitmaps, each X (say, the
 943    number of blocks) by Y (say, the number of expressions), call this
 944    function.  It's not important what X and Y represent; only that Y
 945    correspond to the things that can be done in parallel.  This function will
 946    return an appropriate chunking factor C; you should solve C sets of
 947    equations in parallel.  By going through this function, we can easily
 948    trade space against time; by solving fewer equations in parallel we use
 949    less space.  */
 950
 951 static int
 952 get_bitmap_width (n, x, y)
 953      int n;
 954      int x;
 955      int y;
 956 {
 957   /* It's not really worth figuring out *exactly* how much memory will
 958      be used by a particular choice.  The important thing is to get
 959      something approximately right.  */
 960   size_t max_bitmap_memory = 10 * 1024 * 1024;
 961
 962   /* The number of bytes we'd use for a single column of minimum
 963      width.  */
 964   size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
 965
 966   /* Often, it's reasonable just to solve all the equations in
 967      parallel.  */
 968   if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
 969     return y;
 970
 971   /* Otherwise, pick the largest width we can, without going over the
 972      limit.  */
 973   return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
 974                              / column_size);
 975 }
 976 \f
 977 /* Compute the local properties of each recorded expression.
 978
 979    Local properties are those that are defined by the block, irrespective of
 980    other blocks.
 981
 982    An expression is transparent in a block if its operands are not modified
 983    in the block.
 984
 985    An expression is computed (locally available) in a block if it is computed
 986    at least once and expression would contain the same value if the
 987    computation was moved to the end of the block.
 988
 989    An expression is locally anticipatable in a block if it is computed at
 990    least once and expression would contain the same value if the computation
 991    was moved to the beginning of the block.
 992
 993    We call this routine for cprop, pre and code hoisting.  They all compute
 994    basically the same information and thus can easily share this code.
 995
 996    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
 997    properties.  If NULL, then it is not necessary to compute or record that
 998    particular property.
 999
1000    SETP controls which hash table to look at.  If zero, this routine looks at
1001    the expr hash table; if nonzero this routine looks at the set hash table.
1002    Additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
1003    ABSALTERED.  */
1004
1005 static void
1006 compute_local_properties (transp, comp, antloc, setp)
1007      sbitmap *transp;
1008      sbitmap *comp;
1009      sbitmap *antloc;
1010      int setp;
1011 {
1012   unsigned int i, hash_table_size;
1013   struct expr **hash_table;
1014
1015   /* Initialize any bitmaps that were passed in.  */
1016   if (transp)
1017     {
1018       if (setp)
1019         sbitmap_vector_zero (transp, n_basic_blocks);
1020       else
1021         sbitmap_vector_ones (transp, n_basic_blocks);
1022     }
1023
1024   if (comp)
1025     sbitmap_vector_zero (comp, n_basic_blocks);
1026   if (antloc)
1027     sbitmap_vector_zero (antloc, n_basic_blocks);
1028
1029   /* We use the same code for cprop, pre and hoisting.  For cprop
1030      we care about the set hash table, for pre and hoisting we
1031      care about the expr hash table.  */
1032   hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1033   hash_table = setp ? set_hash_table : expr_hash_table;
1034
1035   for (i = 0; i < hash_table_size; i++)
1036     {
1037       struct expr *expr;
1038
1039       for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1040         {
1041           int indx = expr->bitmap_index;
1042           struct occr *occr;
1043
1044           /* The expression is transparent in this block if it is not killed.
1045              We start by assuming all are transparent [none are killed], and
1046              then reset the bits for those that are.  */
1047           if (transp)
1048             compute_transp (expr->expr, indx, transp, setp);
1049
1050           /* The occurrences recorded in antic_occr are exactly those that
1051              we want to set to non-zero in ANTLOC.  */
1052           if (antloc)
1053             for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1054               {
1055                 SET_BIT (antloc[BLOCK_NUM (occr->insn)], indx);
1056
1057                 /* While we're scanning the table, this is a good place to
1058                    initialize this.  */
1059                 occr->deleted_p = 0;
1060               }
1061
1062           /* The occurrences recorded in avail_occr are exactly those that
1063              we want to set to non-zero in COMP.  */
1064           if (comp)
1065             for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1066               {
1067                 SET_BIT (comp[BLOCK_NUM (occr->insn)], indx);
1068
1069                 /* While we're scanning the table, this is a good place to
1070                    initialize this.  */
1071                 occr->copied_p = 0;
1072               }
1073
1074           /* While we're scanning the table, this is a good place to
1075              initialize this.  */
1076           expr->reaching_reg = 0;
1077         }
1078     }
1079 }
1080 \f
1081 /* Register set information.
1082
1083    `reg_set_table' records where each register is set or otherwise
1084    modified.  */
1085
1086 static struct obstack reg_set_obstack;
1087
1088 static void
1089 alloc_reg_set_mem (n_regs)
1090      int n_regs;
1091 {
1092   unsigned int n;
1093
1094   reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1095   n = reg_set_table_size * sizeof (struct reg_set *);
1096   reg_set_table = (struct reg_set **) gmalloc (n);
1097   bzero ((char *) reg_set_table, n);
1098
1099   gcc_obstack_init (&reg_set_obstack);
1100 }
1101
1102 static void
1103 free_reg_set_mem ()
1104 {
1105   free (reg_set_table);
1106   obstack_free (&reg_set_obstack, NULL_PTR);
1107 }
1108
1109 /* Record REGNO in the reg_set table.  */
1110
1111 static void
1112 record_one_set (regno, insn)
1113      int regno;
1114      rtx insn;
1115 {
1116   /* allocate a new reg_set element and link it onto the list */
1117   struct reg_set *new_reg_info;
1118
1119   /* If the table isn't big enough, enlarge it.  */
1120   if (regno >= reg_set_table_size)
1121     {
1122       int new_size = regno + REG_SET_TABLE_SLOP;
1123
1124       reg_set_table
1125         = (struct reg_set **) grealloc ((char *) reg_set_table,
1126                                         new_size * sizeof (struct reg_set *));
1127       bzero ((char *) (reg_set_table + reg_set_table_size),
1128              (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1129       reg_set_table_size = new_size;
1130     }
1131
1132   new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1133                                                    sizeof (struct reg_set));
1134   bytes_used += sizeof (struct reg_set);
1135   new_reg_info->insn = insn;
1136   new_reg_info->next = reg_set_table[regno];
1137   reg_set_table[regno] = new_reg_info;
1138 }
1139
1140 /* Called from compute_sets via note_stores to handle one SET or CLOBBER in
1141    an insn.  The DATA is really the instruction in which the SET is
1142    occurring.  */
1143
1144 static void
1145 record_set_info (dest, setter, data)
1146      rtx dest, setter ATTRIBUTE_UNUSED;
1147      void *data;
1148 {
1149   rtx record_set_insn = (rtx) data;
1150
1151   if (GET_CODE (dest) == REG && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1152     record_one_set (REGNO (dest), record_set_insn);
1153 }
1154
1155 /* Scan the function and record each set of each pseudo-register.
1156
1157    This is called once, at the start of the gcse pass.  See the comments for
1158    `reg_set_table' for further documenation.  */
1159
1160 static void
1161 compute_sets (f)
1162      rtx f;
1163 {
1164   rtx insn;
1165
1166   for (insn = f; insn != 0; insn = NEXT_INSN (insn))
1167     if (INSN_P (insn))
1168       note_stores (PATTERN (insn), record_set_info, insn);
1169 }
1170 \f
1171 /* Hash table support.  */
1172
1173 /* For each register, the cuid of the first/last insn in the block to set it,
1174    or -1 if not set.  */
1175 #define NEVER_SET -1
1176 static int *reg_first_set;
1177 static int *reg_last_set;
1178
1179 /* While computing "first/last set" info, this is the CUID of first/last insn
1180    to set memory or -1 if not set.  `mem_last_set' is also used when
1181    performing GCSE to record whether memory has been set since the beginning
1182    of the block.
1183
1184    Note that handling of memory is very simple, we don't make any attempt
1185    to optimize things (later).  */
1186 static int mem_first_set;
1187 static int mem_last_set;
1188
1189 /* Perform a quick check whether X, the source of a set, is something
1190    we want to consider for GCSE.  */
1191
1192 static int
1193 want_to_gcse_p (x)
1194      rtx x;
1195 {
1196   switch (GET_CODE (x))
1197     {
1198     case REG:
1199     case SUBREG:
1200     case CONST_INT:
1201     case CONST_DOUBLE:
1202     case CALL:
1203       return 0;
1204
1205     default:
1206       break;
1207     }
1208
1209   return 1;
1210 }
1211
1212 /* Return non-zero if the operands of expression X are unchanged from the
1213    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1214    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
1215
1216 static int
1217 oprs_unchanged_p (x, insn, avail_p)
1218      rtx x, insn;
1219      int avail_p;
1220 {
1221   int i, j;
1222   enum rtx_code code;
1223   const char *fmt;
1224
1225   if (x == 0)
1226     return 1;
1227
1228   code = GET_CODE (x);
1229   switch (code)
1230     {
1231     case REG:
1232       if (avail_p)
1233         return (reg_last_set[REGNO (x)] == NEVER_SET
1234                 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1235       else
1236         return (reg_first_set[REGNO (x)] == NEVER_SET
1237                 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1238
1239     case MEM:
1240       if (avail_p && mem_last_set != NEVER_SET
1241           && mem_last_set >= INSN_CUID (insn))
1242         return 0;
1243       else if (! avail_p && mem_first_set != NEVER_SET
1244                && mem_first_set < INSN_CUID (insn))
1245         return 0;
1246       else
1247         return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
1248
1249     case PRE_DEC:
1250     case PRE_INC:
1251     case POST_DEC:
1252     case POST_INC:
1253     case PRE_MODIFY:
1254     case POST_MODIFY:
1255       return 0;
1256
1257     case PC:
1258     case CC0: /*FIXME*/
1259     case CONST:
1260     case CONST_INT:
1261     case CONST_DOUBLE:
1262     case SYMBOL_REF:
1263     case LABEL_REF:
1264     case ADDR_VEC:
1265     case ADDR_DIFF_VEC:
1266       return 1;
1267
1268     default:
1269       break;
1270     }
1271
1272   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1273     {
1274       if (fmt[i] == 'e')
1275         {
1276           /* If we are about to do the last recursive call needed at this
1277              level, change it into iteration.  This function is called enough
1278              to be worth it.  */
1279           if (i == 0)
1280             return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
1281
1282           else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
1283             return 0;
1284         }
1285       else if (fmt[i] == 'E')
1286         for (j = 0; j < XVECLEN (x, i); j++)
1287           if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1288             return 0;
1289     }
1290
1291   return 1;
1292 }
1293
1294 /* Return non-zero if the operands of expression X are unchanged from
1295    the start of INSN's basic block up to but not including INSN.  */
1296
1297 static int
1298 oprs_anticipatable_p (x, insn)
1299      rtx x, insn;
1300 {
1301   return oprs_unchanged_p (x, insn, 0);
1302 }
1303
1304 /* Return non-zero if the operands of expression X are unchanged from
1305    INSN to the end of INSN's basic block.  */
1306
1307 static int
1308 oprs_available_p (x, insn)
1309      rtx x, insn;
1310 {
1311   return oprs_unchanged_p (x, insn, 1);
1312 }
1313
1314 /* Hash expression X.
1315
1316    MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
1317    indicating if a volatile operand is found or if the expression contains
1318    something we don't want to insert in the table.
1319
1320    ??? One might want to merge this with canon_hash.  Later.  */
1321
1322 static unsigned int
1323 hash_expr (x, mode, do_not_record_p, hash_table_size)
1324      rtx x;
1325      enum machine_mode mode;
1326      int *do_not_record_p;
1327      int hash_table_size;
1328 {
1329   unsigned int hash;
1330
1331   *do_not_record_p = 0;
1332
1333   hash = hash_expr_1 (x, mode, do_not_record_p);
1334   return hash % hash_table_size;
1335 }
1336
1337 /* Subroutine of hash_expr to do the actual work.  */
1338
1339 static unsigned int
1340 hash_expr_1 (x, mode, do_not_record_p)
1341      rtx x;
1342      enum machine_mode mode;
1343      int *do_not_record_p;
1344 {
1345   int i, j;
1346   unsigned hash = 0;
1347   enum rtx_code code;
1348   const char *fmt;
1349
1350   /* Used to turn recursion into iteration.  We can't rely on GCC's
1351      tail-recursion eliminatio since we need to keep accumulating values
1352      in HASH.  */
1353
1354   if (x == 0)
1355     return hash;
1356
1357  repeat:
1358   code = GET_CODE (x);
1359   switch (code)
1360     {
1361     case REG:
1362       hash += ((unsigned int) REG << 7) + REGNO (x);
1363       return hash;
1364
1365     case CONST_INT:
1366       hash += (((unsigned int) CONST_INT << 7) + (unsigned int) mode
1367                + (unsigned int) INTVAL (x));
1368       return hash;
1369
1370     case CONST_DOUBLE:
1371       /* This is like the general case, except that it only counts
1372          the integers representing the constant.  */
1373       hash += (unsigned int) code + (unsigned int) GET_MODE (x);
1374       if (GET_MODE (x) != VOIDmode)
1375         for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1376           hash += (unsigned int) XWINT (x, i);
1377       else
1378         hash += ((unsigned int) CONST_DOUBLE_LOW (x)
1379                  + (unsigned int) CONST_DOUBLE_HIGH (x));
1380       return hash;
1381
1382       /* Assume there is only one rtx object for any given label.  */
1383     case LABEL_REF:
1384       /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1385          differences and differences between each stage's debugging dumps.  */
1386       hash += (((unsigned int) LABEL_REF << 7)
1387                + CODE_LABEL_NUMBER (XEXP (x, 0)));
1388       return hash;
1389
1390     case SYMBOL_REF:
1391       {
1392         /* Don't hash on the symbol's address to avoid bootstrap differences.
1393            Different hash values may cause expressions to be recorded in
1394            different orders and thus different registers to be used in the
1395            final assembler.  This also avoids differences in the dump files
1396            between various stages.  */
1397         unsigned int h = 0;
1398         const unsigned char *p = (const unsigned char *) XSTR (x, 0);
1399
1400         while (*p)
1401           h += (h << 7) + *p++; /* ??? revisit */
1402
1403         hash += ((unsigned int) SYMBOL_REF << 7) + h;
1404         return hash;
1405       }
1406
1407     case MEM:
1408       if (MEM_VOLATILE_P (x))
1409         {
1410           *do_not_record_p = 1;
1411           return 0;
1412         }
1413
1414       hash += (unsigned int) MEM;
1415       hash += MEM_ALIAS_SET (x);
1416       x = XEXP (x, 0);
1417       goto repeat;
1418
1419     case PRE_DEC:
1420     case PRE_INC:
1421     case POST_DEC:
1422     case POST_INC:
1423     case PC:
1424     case CC0:
1425     case CALL:
1426     case UNSPEC_VOLATILE:
1427       *do_not_record_p = 1;
1428       return 0;
1429
1430     case ASM_OPERANDS:
1431       if (MEM_VOLATILE_P (x))
1432         {
1433           *do_not_record_p = 1;
1434           return 0;
1435         }
1436
1437     default:
1438       break;
1439     }
1440
1441   hash += (unsigned) code + (unsigned) GET_MODE (x);
1442   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1443     {
1444       if (fmt[i] == 'e')
1445         {
1446           /* If we are about to do the last recursive call
1447              needed at this level, change it into iteration.
1448              This function is called enough to be worth it.  */
1449           if (i == 0)
1450             {
1451               x = XEXP (x, i);
1452               goto repeat;
1453             }
1454
1455           hash += hash_expr_1 (XEXP (x, i), 0, do_not_record_p);
1456           if (*do_not_record_p)
1457             return 0;
1458         }
1459
1460       else if (fmt[i] == 'E')
1461         for (j = 0; j < XVECLEN (x, i); j++)
1462           {
1463             hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1464             if (*do_not_record_p)
1465               return 0;
1466           }
1467
1468       else if (fmt[i] == 's')
1469         {
1470           register const unsigned char *p =
1471             (const unsigned char *) XSTR (x, i);
1472
1473           if (p)
1474             while (*p)
1475               hash += *p++;
1476         }
1477       else if (fmt[i] == 'i')
1478         hash += (unsigned int) XINT (x, i);
1479       else
1480         abort ();
1481     }
1482
1483   return hash;
1484 }
1485
1486 /* Hash a set of register REGNO.
1487
1488    Sets are hashed on the register that is set.  This simplifies the PRE copy
1489    propagation code.
1490
1491    ??? May need to make things more elaborate.  Later, as necessary.  */
1492
1493 static unsigned int
1494 hash_set (regno, hash_table_size)
1495      int regno;
1496      int hash_table_size;
1497 {
1498   unsigned int hash;
1499
1500   hash = regno;
1501   return hash % hash_table_size;
1502 }
1503
1504 /* Return non-zero if exp1 is equivalent to exp2.
1505    ??? Borrowed from cse.c.  Might want to remerge with cse.c.  Later.  */
1506
1507 static int
1508 expr_equiv_p (x, y)
1509      rtx x, y;
1510 {
1511   register int i, j;
1512   register enum rtx_code code;
1513   register const char *fmt;
1514
1515   if (x == y)
1516     return 1;
1517
1518   if (x == 0 || y == 0)
1519     return x == y;
1520
1521   code = GET_CODE (x);
1522   if (code != GET_CODE (y))
1523     return 0;
1524
1525   /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.  */
1526   if (GET_MODE (x) != GET_MODE (y))
1527     return 0;
1528
1529   switch (code)
1530     {
1531     case PC:
1532     case CC0:
1533       return x == y;
1534
1535     case CONST_INT:
1536       return INTVAL (x) == INTVAL (y);
1537
1538     case LABEL_REF:
1539       return XEXP (x, 0) == XEXP (y, 0);
1540
1541     case SYMBOL_REF:
1542       return XSTR (x, 0) == XSTR (y, 0);
1543
1544     case REG:
1545       return REGNO (x) == REGNO (y);
1546
1547     case MEM:
1548       /* Can't merge two expressions in different alias sets, since we can
1549          decide that the expression is transparent in a block when it isn't,
1550          due to it being set with the different alias set.  */
1551       if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1552         return 0;
1553       break;
1554
1555     /*  For commutative operations, check both orders.  */
1556     case PLUS:
1557     case MULT:
1558     case AND:
1559     case IOR:
1560     case XOR:
1561     case NE:
1562     case EQ:
1563       return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1564                && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1565               || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1566                   && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1567
1568     default:
1569       break;
1570     }
1571
1572   /* Compare the elements.  If any pair of corresponding elements
1573      fail to match, return 0 for the whole thing.  */
1574
1575   fmt = GET_RTX_FORMAT (code);
1576   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1577     {
1578       switch (fmt[i])
1579         {
1580         case 'e':
1581           if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1582             return 0;
1583           break;
1584
1585         case 'E':
1586           if (XVECLEN (x, i) != XVECLEN (y, i))
1587             return 0;
1588           for (j = 0; j < XVECLEN (x, i); j++)
1589             if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1590               return 0;
1591           break;
1592
1593         case 's':
1594           if (strcmp (XSTR (x, i), XSTR (y, i)))
1595             return 0;
1596           break;
1597
1598         case 'i':
1599           if (XINT (x, i) != XINT (y, i))
1600             return 0;
1601           break;
1602
1603         case 'w':
1604           if (XWINT (x, i) != XWINT (y, i))
1605             return 0;
1606         break;
1607
1608         case '0':
1609           break;
1610
1611         default:
1612           abort ();
1613         }
1614       }
1615
1616   return 1;
1617 }
1618
1619 /* Insert expression X in INSN in the hash table.
1620    If it is already present, record it as the last occurrence in INSN's
1621    basic block.
1622
1623    MODE is the mode of the value X is being stored into.
1624    It is only used if X is a CONST_INT.
1625
1626    ANTIC_P is non-zero if X is an anticipatable expression.
1627    AVAIL_P is non-zero if X is an available expression.  */
1628
1629 static void
1630 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1631      rtx x;
1632      enum machine_mode mode;
1633      rtx insn;
1634      int antic_p, avail_p;
1635 {
1636   int found, do_not_record_p;
1637   unsigned int hash;
1638   struct expr *cur_expr, *last_expr = NULL;
1639   struct occr *antic_occr, *avail_occr;
1640   struct occr *last_occr = NULL;
1641
1642   hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1643
1644   /* Do not insert expression in table if it contains volatile operands,
1645      or if hash_expr determines the expression is something we don't want
1646      to or can't handle.  */
1647   if (do_not_record_p)
1648     return;
1649
1650   cur_expr = expr_hash_table[hash];
1651   found = 0;
1652
1653   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1654     {
1655       /* If the expression isn't found, save a pointer to the end of
1656          the list.  */
1657       last_expr = cur_expr;
1658       cur_expr = cur_expr->next_same_hash;
1659     }
1660
1661   if (! found)
1662     {
1663       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1664       bytes_used += sizeof (struct expr);
1665       if (expr_hash_table[hash] == NULL)
1666         /* This is the first pattern that hashed to this index.  */
1667         expr_hash_table[hash] = cur_expr;
1668       else
1669         /* Add EXPR to end of this hash chain.  */
1670         last_expr->next_same_hash = cur_expr;
1671
1672       /* Set the fields of the expr element.  */
1673       cur_expr->expr = x;
1674       cur_expr->bitmap_index = n_exprs++;
1675       cur_expr->next_same_hash = NULL;
1676       cur_expr->antic_occr = NULL;
1677       cur_expr->avail_occr = NULL;
1678     }
1679
1680   /* Now record the occurrence(s).  */
1681   if (antic_p)
1682     {
1683       antic_occr = cur_expr->antic_occr;
1684
1685       /* Search for another occurrence in the same basic block.  */
1686       while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1687         {
1688           /* If an occurrence isn't found, save a pointer to the end of
1689              the list.  */
1690           last_occr = antic_occr;
1691           antic_occr = antic_occr->next;
1692         }
1693
1694       if (antic_occr)
1695         /* Found another instance of the expression in the same basic block.
1696            Prefer the currently recorded one.  We want the first one in the
1697            block and the block is scanned from start to end.  */
1698         ; /* nothing to do */
1699       else
1700         {
1701           /* First occurrence of this expression in this basic block.  */
1702           antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1703           bytes_used += sizeof (struct occr);
1704           /* First occurrence of this expression in any block?  */
1705           if (cur_expr->antic_occr == NULL)
1706             cur_expr->antic_occr = antic_occr;
1707           else
1708             last_occr->next = antic_occr;
1709
1710           antic_occr->insn = insn;
1711           antic_occr->next = NULL;
1712         }
1713     }
1714
1715   if (avail_p)
1716     {
1717       avail_occr = cur_expr->avail_occr;
1718
1719       /* Search for another occurrence in the same basic block.  */
1720       while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1721         {
1722           /* If an occurrence isn't found, save a pointer to the end of
1723              the list.  */
1724           last_occr = avail_occr;
1725           avail_occr = avail_occr->next;
1726         }
1727
1728       if (avail_occr)
1729         /* Found another instance of the expression in the same basic block.
1730            Prefer this occurrence to the currently recorded one.  We want
1731            the last one in the block and the block is scanned from start
1732            to end.  */
1733         avail_occr->insn = insn;
1734       else
1735         {
1736           /* First occurrence of this expression in this basic block.  */
1737           avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1738           bytes_used += sizeof (struct occr);
1739
1740           /* First occurrence of this expression in any block?  */
1741           if (cur_expr->avail_occr == NULL)
1742             cur_expr->avail_occr = avail_occr;
1743           else
1744             last_occr->next = avail_occr;
1745
1746           avail_occr->insn = insn;
1747           avail_occr->next = NULL;
1748         }
1749     }
1750 }
1751
1752 /* Insert pattern X in INSN in the hash table.
1753    X is a SET of a reg to either another reg or a constant.
1754    If it is already present, record it as the last occurrence in INSN's
1755    basic block.  */
1756
1757 static void
1758 insert_set_in_table (x, insn)
1759      rtx x;
1760      rtx insn;
1761 {
1762   int found;
1763   unsigned int hash;
1764   struct expr *cur_expr, *last_expr = NULL;
1765   struct occr *cur_occr, *last_occr = NULL;
1766
1767   if (GET_CODE (x) != SET
1768       || GET_CODE (SET_DEST (x)) != REG)
1769     abort ();
1770
1771   hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1772
1773   cur_expr = set_hash_table[hash];
1774   found = 0;
1775
1776   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1777     {
1778       /* If the expression isn't found, save a pointer to the end of
1779          the list.  */
1780       last_expr = cur_expr;
1781       cur_expr = cur_expr->next_same_hash;
1782     }
1783
1784   if (! found)
1785     {
1786       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1787       bytes_used += sizeof (struct expr);
1788       if (set_hash_table[hash] == NULL)
1789         /* This is the first pattern that hashed to this index.  */
1790         set_hash_table[hash] = cur_expr;
1791       else
1792         /* Add EXPR to end of this hash chain.  */
1793         last_expr->next_same_hash = cur_expr;
1794
1795       /* Set the fields of the expr element.
1796          We must copy X because it can be modified when copy propagation is
1797          performed on its operands.  */
1798       /* ??? Should this go in a different obstack?  */
1799       cur_expr->expr = copy_rtx (x);
1800       cur_expr->bitmap_index = n_sets++;
1801       cur_expr->next_same_hash = NULL;
1802       cur_expr->antic_occr = NULL;
1803       cur_expr->avail_occr = NULL;
1804     }
1805
1806   /* Now record the occurrence.  */
1807   cur_occr = cur_expr->avail_occr;
1808
1809   /* Search for another occurrence in the same basic block.  */
1810   while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1811     {
1812       /* If an occurrence isn't found, save a pointer to the end of
1813          the list.  */
1814       last_occr = cur_occr;
1815       cur_occr = cur_occr->next;
1816     }
1817
1818   if (cur_occr)
1819     /* Found another instance of the expression in the same basic block.
1820        Prefer this occurrence to the currently recorded one.  We want the
1821        last one in the block and the block is scanned from start to end.  */
1822     cur_occr->insn = insn;
1823   else
1824     {
1825       /* First occurrence of this expression in this basic block.  */
1826       cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1827       bytes_used += sizeof (struct occr);
1828
1829       /* First occurrence of this expression in any block?  */
1830       if (cur_expr->avail_occr == NULL)
1831         cur_expr->avail_occr = cur_occr;
1832       else
1833         last_occr->next = cur_occr;
1834
1835       cur_occr->insn = insn;
1836       cur_occr->next = NULL;
1837     }
1838 }
1839
1840 /* Scan pattern PAT of INSN and add an entry to the hash table.  If SET_P is
1841    non-zero, this is for the assignment hash table, otherwise it is for the
1842    expression hash table.  */
1843
1844 static void
1845 hash_scan_set (pat, insn, set_p)
1846      rtx pat, insn;
1847      int set_p;
1848 {
1849   rtx src = SET_SRC (pat);
1850   rtx dest = SET_DEST (pat);
1851
1852   if (GET_CODE (src) == CALL)
1853     hash_scan_call (src, insn);
1854
1855   if (GET_CODE (dest) == REG)
1856     {
1857       int regno = REGNO (dest);
1858       rtx tmp;
1859
1860       /* Only record sets of pseudo-regs in the hash table.  */
1861       if (! set_p
1862           && regno >= FIRST_PSEUDO_REGISTER
1863           /* Don't GCSE something if we can't do a reg/reg copy.  */
1864           && can_copy_p [GET_MODE (dest)]
1865           /* Is SET_SRC something we want to gcse?  */
1866           && want_to_gcse_p (src))
1867         {
1868           /* An expression is not anticipatable if its operands are
1869              modified before this insn.  */
1870           int antic_p = oprs_anticipatable_p (src, insn);
1871           /* An expression is not available if its operands are
1872              subsequently modified, including this insn.  */
1873           int avail_p = oprs_available_p (src, insn);
1874
1875           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1876         }
1877
1878       /* Record sets for constant/copy propagation.  */
1879       else if (set_p
1880                && regno >= FIRST_PSEUDO_REGISTER
1881                && ((GET_CODE (src) == REG
1882                     && REGNO (src) >= FIRST_PSEUDO_REGISTER
1883                     && can_copy_p [GET_MODE (dest)])
1884                    || GET_CODE (src) == CONST_INT
1885                    || GET_CODE (src) == SYMBOL_REF
1886                    || GET_CODE (src) == CONST_DOUBLE)
1887                /* A copy is not available if its src or dest is subsequently
1888                   modified.  Here we want to search from INSN+1 on, but
1889                   oprs_available_p searches from INSN on.  */
1890                && (insn == BLOCK_END (BLOCK_NUM (insn))
1891                    || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1892                        && oprs_available_p (pat, tmp))))
1893         insert_set_in_table (pat, insn);
1894     }
1895 }
1896
1897 static void
1898 hash_scan_clobber (x, insn)
1899      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1900 {
1901   /* Currently nothing to do.  */
1902 }
1903
1904 static void
1905 hash_scan_call (x, insn)
1906      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1907 {
1908   /* Currently nothing to do.  */
1909 }
1910
1911 /* Process INSN and add hash table entries as appropriate.
1912
1913    Only available expressions that set a single pseudo-reg are recorded.
1914
1915    Single sets in a PARALLEL could be handled, but it's an extra complication
1916    that isn't dealt with right now.  The trick is handling the CLOBBERs that
1917    are also in the PARALLEL.  Later.
1918
1919    If SET_P is non-zero, this is for the assignment hash table,
1920    otherwise it is for the expression hash table.
1921    If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
1922    not record any expressions.  */
1923
1924 static void
1925 hash_scan_insn (insn, set_p, in_libcall_block)
1926      rtx insn;
1927      int set_p;
1928      int in_libcall_block;
1929 {
1930   rtx pat = PATTERN (insn);
1931   int i;
1932
1933   /* Pick out the sets of INSN and for other forms of instructions record
1934      what's been modified.  */
1935
1936   if (GET_CODE (pat) == SET && ! in_libcall_block)
1937     {
1938       /* Ignore obvious no-ops.  */
1939       if (SET_SRC (pat) != SET_DEST (pat))
1940         hash_scan_set (pat, insn, set_p);
1941     }
1942   else if (GET_CODE (pat) == PARALLEL)
1943     for (i = 0; i < XVECLEN (pat, 0); i++)
1944       {
1945         rtx x = XVECEXP (pat, 0, i);
1946
1947         if (GET_CODE (x) == SET)
1948           {
1949             if (GET_CODE (SET_SRC (x)) == CALL)
1950               hash_scan_call (SET_SRC (x), insn);
1951           }
1952         else if (GET_CODE (x) == CLOBBER)
1953           hash_scan_clobber (x, insn);
1954         else if (GET_CODE (x) == CALL)
1955           hash_scan_call (x, insn);
1956       }
1957
1958   else if (GET_CODE (pat) == CLOBBER)
1959     hash_scan_clobber (pat, insn);
1960   else if (GET_CODE (pat) == CALL)
1961     hash_scan_call (pat, insn);
1962 }
1963
1964 static void
1965 dump_hash_table (file, name, table, table_size, total_size)
1966      FILE *file;
1967      const char *name;
1968      struct expr **table;
1969      int table_size, total_size;
1970 {
1971   int i;
1972   /* Flattened out table, so it's printed in proper order.  */
1973   struct expr **flat_table;
1974   unsigned int *hash_val;
1975   struct expr *expr;
1976
1977   flat_table
1978     = (struct expr **) xcalloc (total_size, sizeof (struct expr *));
1979   hash_val = (unsigned int *) xmalloc (total_size * sizeof (unsigned int));
1980
1981   for (i = 0; i < table_size; i++)
1982     for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
1983       {
1984         flat_table[expr->bitmap_index] = expr;
1985         hash_val[expr->bitmap_index] = i;
1986       }
1987
1988   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1989            name, table_size, total_size);
1990
1991   for (i = 0; i < total_size; i++)
1992     if (flat_table[i] != 0)
1993       {
1994         expr = flat_table[i];
1995         fprintf (file, "Index %d (hash value %d)\n  ",
1996                  expr->bitmap_index, hash_val[i]);
1997         print_rtl (file, expr->expr);
1998         fprintf (file, "\n");
1999       }
2000
2001   fprintf (file, "\n");
2002
2003   free (flat_table);
2004   free (hash_val);
2005 }
2006
2007 /* Record register first/last/block set information for REGNO in INSN.
2008
2009    reg_first_set records the first place in the block where the register
2010    is set and is used to compute "anticipatability".
2011
2012    reg_last_set records the last place in the block where the register
2013    is set and is used to compute "availability".
2014
2015    reg_set_in_block records whether the register is set in the block
2016    and is used to compute "transparency".  */
2017
2018 static void
2019 record_last_reg_set_info (insn, regno)
2020      rtx insn;
2021      int regno;
2022 {
2023   if (reg_first_set[regno] == NEVER_SET)
2024     reg_first_set[regno] = INSN_CUID (insn);
2025
2026   reg_last_set[regno] = INSN_CUID (insn);
2027   SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2028 }
2029
2030 /* Record memory first/last/block set information for INSN.  */
2031
2032 static void
2033 record_last_mem_set_info (insn)
2034      rtx insn;
2035 {
2036   if (mem_first_set == NEVER_SET)
2037     mem_first_set = INSN_CUID (insn);
2038
2039   mem_last_set = INSN_CUID (insn);
2040   mem_set_in_block[BLOCK_NUM (insn)] = 1;
2041 }
2042
2043 /* Called from compute_hash_table via note_stores to handle one
2044    SET or CLOBBER in an insn.  DATA is really the instruction in which
2045    the SET is taking place.  */
2046
2047 static void
2048 record_last_set_info (dest, setter, data)
2049      rtx dest, setter ATTRIBUTE_UNUSED;
2050      void *data;
2051 {
2052   rtx last_set_insn = (rtx) data;
2053
2054   if (GET_CODE (dest) == SUBREG)
2055     dest = SUBREG_REG (dest);
2056
2057   if (GET_CODE (dest) == REG)
2058     record_last_reg_set_info (last_set_insn, REGNO (dest));
2059   else if (GET_CODE (dest) == MEM
2060            /* Ignore pushes, they clobber nothing.  */
2061            && ! push_operand (dest, GET_MODE (dest)))
2062     record_last_mem_set_info (last_set_insn);
2063 }
2064
2065 /* Top level function to create an expression or assignment hash table.
2066
2067    Expression entries are placed in the hash table if
2068    - they are of the form (set (pseudo-reg) src),
2069    - src is something we want to perform GCSE on,
2070    - none of the operands are subsequently modified in the block
2071
2072    Assignment entries are placed in the hash table if
2073    - they are of the form (set (pseudo-reg) src),
2074    - src is something we want to perform const/copy propagation on,
2075    - none of the operands or target are subsequently modified in the block
2076
2077    Currently src must be a pseudo-reg or a const_int.
2078
2079    F is the first insn.
2080    SET_P is non-zero for computing the assignment hash table.  */
2081
2082 static void
2083 compute_hash_table (set_p)
2084      int set_p;
2085 {
2086   int bb;
2087
2088   /* While we compute the hash table we also compute a bit array of which
2089      registers are set in which blocks.
2090      We also compute which blocks set memory, in the absence of aliasing
2091      support [which is TODO].
2092      ??? This isn't needed during const/copy propagation, but it's cheap to
2093      compute.  Later.  */
2094   sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2095   bzero ((char *) mem_set_in_block, n_basic_blocks);
2096
2097   /* Some working arrays used to track first and last set in each block.  */
2098   /* ??? One could use alloca here, but at some size a threshold is crossed
2099      beyond which one should use malloc.  Are we at that threshold here?  */
2100   reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2101   reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2102
2103   for (bb = 0; bb < n_basic_blocks; bb++)
2104     {
2105       rtx insn;
2106       unsigned int regno;
2107       int in_libcall_block;
2108       unsigned int i;
2109
2110       /* First pass over the instructions records information used to
2111          determine when registers and memory are first and last set.
2112          ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2113          could be moved to compute_sets since they currently don't change.  */
2114
2115       for (i = 0; i < max_gcse_regno; i++)
2116         reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2117
2118       mem_first_set = NEVER_SET;
2119       mem_last_set = NEVER_SET;
2120
2121       for (insn = BLOCK_HEAD (bb);
2122            insn && insn != NEXT_INSN (BLOCK_END (bb));
2123            insn = NEXT_INSN (insn))
2124         {
2125 #ifdef NON_SAVING_SETJMP
2126           if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2127               && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2128             {
2129               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2130                 record_last_reg_set_info (insn, regno);
2131               continue;
2132             }
2133 #endif
2134
2135           if (! INSN_P (insn))
2136             continue;
2137
2138           if (GET_CODE (insn) == CALL_INSN)
2139             {
2140               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2141                 if ((call_used_regs[regno]
2142                      && regno != STACK_POINTER_REGNUM
2143 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2144                      && regno != HARD_FRAME_POINTER_REGNUM
2145 #endif
2146 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2147                      && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2148 #endif
2149 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2150                      && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2151 #endif
2152
2153                      && regno != FRAME_POINTER_REGNUM)
2154                     || global_regs[regno])
2155                   record_last_reg_set_info (insn, regno);
2156
2157               if (! CONST_CALL_P (insn))
2158                 record_last_mem_set_info (insn);
2159             }
2160
2161           note_stores (PATTERN (insn), record_last_set_info, insn);
2162         }
2163
2164       /* The next pass builds the hash table.  */
2165
2166       for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2167            insn && insn != NEXT_INSN (BLOCK_END (bb));
2168            insn = NEXT_INSN (insn))
2169         if (INSN_P (insn))
2170           {
2171             if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2172               in_libcall_block = 1;
2173             else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2174               in_libcall_block = 0;
2175             hash_scan_insn (insn, set_p, in_libcall_block);
2176         }
2177     }
2178
2179   free (reg_first_set);
2180   free (reg_last_set);
2181
2182   /* Catch bugs early.  */
2183   reg_first_set = reg_last_set = 0;
2184 }
2185
2186 /* Allocate space for the set hash table.
2187    N_INSNS is the number of instructions in the function.
2188    It is used to determine the number of buckets to use.  */
2189
2190 static void
2191 alloc_set_hash_table (n_insns)
2192      int n_insns;
2193 {
2194   int n;
2195
2196   set_hash_table_size = n_insns / 4;
2197   if (set_hash_table_size < 11)
2198     set_hash_table_size = 11;
2199
2200   /* Attempt to maintain efficient use of hash table.
2201      Making it an odd number is simplest for now.
2202      ??? Later take some measurements.  */
2203   set_hash_table_size |= 1;
2204   n = set_hash_table_size * sizeof (struct expr *);
2205   set_hash_table = (struct expr **) gmalloc (n);
2206 }
2207
2208 /* Free things allocated by alloc_set_hash_table.  */
2209
2210 static void
2211 free_set_hash_table ()
2212 {
2213   free (set_hash_table);
2214 }
2215
2216 /* Compute the hash table for doing copy/const propagation.  */
2217
2218 static void
2219 compute_set_hash_table ()
2220 {
2221   /* Initialize count of number of entries in hash table.  */
2222   n_sets = 0;
2223   bzero ((char *) set_hash_table,
2224          set_hash_table_size * sizeof (struct expr *));
2225
2226   compute_hash_table (1);
2227 }
2228
2229 /* Allocate space for the expression hash table.
2230    N_INSNS is the number of instructions in the function.
2231    It is used to determine the number of buckets to use.  */
2232
2233 static void
2234 alloc_expr_hash_table (n_insns)
2235      unsigned int n_insns;
2236 {
2237   int n;
2238
2239   expr_hash_table_size = n_insns / 2;
2240   /* Make sure the amount is usable.  */
2241   if (expr_hash_table_size < 11)
2242     expr_hash_table_size = 11;
2243
2244   /* Attempt to maintain efficient use of hash table.
2245      Making it an odd number is simplest for now.
2246      ??? Later take some measurements.  */
2247   expr_hash_table_size |= 1;
2248   n = expr_hash_table_size * sizeof (struct expr *);
2249   expr_hash_table = (struct expr **) gmalloc (n);
2250 }
2251
2252 /* Free things allocated by alloc_expr_hash_table.  */
2253
2254 static void
2255 free_expr_hash_table ()
2256 {
2257   free (expr_hash_table);
2258 }
2259
2260 /* Compute the hash table for doing GCSE.  */
2261
2262 static void
2263 compute_expr_hash_table ()
2264 {
2265   /* Initialize count of number of entries in hash table.  */
2266   n_exprs = 0;
2267   bzero ((char *) expr_hash_table,
2268          expr_hash_table_size * sizeof (struct expr *));
2269
2270   compute_hash_table (0);
2271 }
2272 \f
2273 /* Expression tracking support.  */
2274
2275 /* Lookup pattern PAT in the expression table.
2276    The result is a pointer to the table entry, or NULL if not found.  */
2277
2278 static struct expr *
2279 lookup_expr (pat)
2280      rtx pat;
2281 {
2282   int do_not_record_p;
2283   unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2284                                  expr_hash_table_size);
2285   struct expr *expr;
2286
2287   if (do_not_record_p)
2288     return NULL;
2289
2290   expr = expr_hash_table[hash];
2291
2292   while (expr && ! expr_equiv_p (expr->expr, pat))
2293     expr = expr->next_same_hash;
2294
2295   return expr;
2296 }
2297
2298 /* Lookup REGNO in the set table.  If PAT is non-NULL look for the entry that
2299    matches it, otherwise return the first entry for REGNO.  The result is a
2300    pointer to the table entry, or NULL if not found.  */
2301
2302 static struct expr *
2303 lookup_set (regno, pat)
2304      unsigned int regno;
2305      rtx pat;
2306 {
2307   unsigned int hash = hash_set (regno, set_hash_table_size);
2308   struct expr *expr;
2309
2310   expr = set_hash_table[hash];
2311
2312   if (pat)
2313     {
2314       while (expr && ! expr_equiv_p (expr->expr, pat))
2315         expr = expr->next_same_hash;
2316     }
2317   else
2318     {
2319       while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2320         expr = expr->next_same_hash;
2321     }
2322
2323   return expr;
2324 }
2325
2326 /* Return the next entry for REGNO in list EXPR.  */
2327
2328 static struct expr *
2329 next_set (regno, expr)
2330      unsigned int regno;
2331      struct expr *expr;
2332 {
2333   do
2334     expr = expr->next_same_hash;
2335   while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2336
2337   return expr;
2338 }
2339
2340 /* Reset tables used to keep track of what's still available [since the
2341    start of the block].  */
2342
2343 static void
2344 reset_opr_set_tables ()
2345 {
2346   /* Maintain a bitmap of which regs have been set since beginning of
2347      the block.  */
2348   sbitmap_zero (reg_set_bitmap);
2349
2350   /* Also keep a record of the last instruction to modify memory.
2351      For now this is very trivial, we only record whether any memory
2352      location has been modified.  */
2353   mem_last_set = 0;
2354 }
2355
2356 /* Return non-zero if the operands of X are not set before INSN in
2357    INSN's basic block.  */
2358
2359 static int
2360 oprs_not_set_p (x, insn)
2361      rtx x, insn;
2362 {
2363   int i, j;
2364   enum rtx_code code;
2365   const char *fmt;
2366
2367   if (x == 0)
2368     return 1;
2369
2370   code = GET_CODE (x);
2371   switch (code)
2372     {
2373     case PC:
2374     case CC0:
2375     case CONST:
2376     case CONST_INT:
2377     case CONST_DOUBLE:
2378     case SYMBOL_REF:
2379     case LABEL_REF:
2380     case ADDR_VEC:
2381     case ADDR_DIFF_VEC:
2382       return 1;
2383
2384     case MEM:
2385       if (mem_last_set != 0)
2386         return 0;
2387       else
2388         return oprs_not_set_p (XEXP (x, 0), insn);
2389
2390     case REG:
2391       return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2392
2393     default:
2394       break;
2395     }
2396
2397   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2398     {
2399       if (fmt[i] == 'e')
2400         {
2401           /* If we are about to do the last recursive call
2402              needed at this level, change it into iteration.
2403              This function is called enough to be worth it.  */
2404           if (i == 0)
2405             return oprs_not_set_p (XEXP (x, i), insn);
2406
2407           if (! oprs_not_set_p (XEXP (x, i), insn))
2408             return 0;
2409         }
2410       else if (fmt[i] == 'E')
2411         for (j = 0; j < XVECLEN (x, i); j++)
2412           if (! oprs_not_set_p (XVECEXP (x, i, j), insn))
2413             return 0;
2414     }
2415
2416   return 1;
2417 }
2418
2419 /* Mark things set by a CALL.  */
2420
2421 static void
2422 mark_call (insn)
2423      rtx insn;
2424 {
2425   mem_last_set = INSN_CUID (insn);
2426 }
2427
2428 /* Mark things set by a SET.  */
2429
2430 static void
2431 mark_set (pat, insn)
2432      rtx pat, insn;
2433 {
2434   rtx dest = SET_DEST (pat);
2435
2436   while (GET_CODE (dest) == SUBREG
2437          || GET_CODE (dest) == ZERO_EXTRACT
2438          || GET_CODE (dest) == SIGN_EXTRACT
2439          || GET_CODE (dest) == STRICT_LOW_PART)
2440     dest = XEXP (dest, 0);
2441
2442   if (GET_CODE (dest) == REG)
2443     SET_BIT (reg_set_bitmap, REGNO (dest));
2444   else if (GET_CODE (dest) == MEM)
2445     mem_last_set = INSN_CUID (insn);
2446
2447   if (GET_CODE (SET_SRC (pat)) == CALL)
2448     mark_call (insn);
2449 }
2450
2451 /* Record things set by a CLOBBER.  */
2452
2453 static void
2454 mark_clobber (pat, insn)
2455      rtx pat, insn;
2456 {
2457   rtx clob = XEXP (pat, 0);
2458
2459   while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2460     clob = XEXP (clob, 0);
2461
2462   if (GET_CODE (clob) == REG)
2463     SET_BIT (reg_set_bitmap, REGNO (clob));
2464   else
2465     mem_last_set = INSN_CUID (insn);
2466 }
2467
2468 /* Record things set by INSN.
2469    This data is used by oprs_not_set_p.  */
2470
2471 static void
2472 mark_oprs_set (insn)
2473      rtx insn;
2474 {
2475   rtx pat = PATTERN (insn);
2476   int i;
2477
2478   if (GET_CODE (pat) == SET)
2479     mark_set (pat, insn);
2480   else if (GET_CODE (pat) == PARALLEL)
2481     for (i = 0; i < XVECLEN (pat, 0); i++)
2482       {
2483         rtx x = XVECEXP (pat, 0, i);
2484
2485         if (GET_CODE (x) == SET)
2486           mark_set (x, insn);
2487         else if (GET_CODE (x) == CLOBBER)
2488           mark_clobber (x, insn);
2489         else if (GET_CODE (x) == CALL)
2490           mark_call (insn);
2491       }
2492
2493   else if (GET_CODE (pat) == CLOBBER)
2494     mark_clobber (pat, insn);
2495   else if (GET_CODE (pat) == CALL)
2496     mark_call (insn);
2497 }
2498
2499 \f
2500 /* Classic GCSE reaching definition support.  */
2501
2502 /* Allocate reaching def variables.  */
2503
2504 static void
2505 alloc_rd_mem (n_blocks, n_insns)
2506      int n_blocks, n_insns;
2507 {
2508   rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2509   sbitmap_vector_zero (rd_kill, n_basic_blocks);
2510
2511   rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2512   sbitmap_vector_zero (rd_gen, n_basic_blocks);
2513
2514   reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2515   sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2516
2517   rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2518   sbitmap_vector_zero (rd_out, n_basic_blocks);
2519 }
2520
2521 /* Free reaching def variables.  */
2522
2523 static void
2524 free_rd_mem ()
2525 {
2526   free (rd_kill);
2527   free (rd_gen);
2528   free (reaching_defs);
2529   free (rd_out);
2530 }
2531
2532 /* Add INSN to the kills of BB.  REGNO, set in BB, is killed by INSN.  */
2533
2534 static void
2535 handle_rd_kill_set (insn, regno, bb)
2536      rtx insn;
2537      int regno, bb;
2538 {
2539   struct reg_set *this_reg;
2540
2541   for (this_reg = reg_set_table[regno]; this_reg; this_reg = this_reg ->next)
2542     if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2543       SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2544 }
2545
2546 /* Compute the set of kill's for reaching definitions.  */
2547
2548 static void
2549 compute_kill_rd ()
2550 {
2551   int bb, cuid;
2552   int regno, i;
2553
2554   /* For each block
2555        For each set bit in `gen' of the block (i.e each insn which
2556            generates a definition in the block)
2557          Call the reg set by the insn corresponding to that bit regx
2558          Look at the linked list starting at reg_set_table[regx]
2559          For each setting of regx in the linked list, which is not in
2560              this block
2561            Set the bit in `kill' corresponding to that insn.   */
2562   for (bb = 0; bb < n_basic_blocks; bb++)
2563     for (cuid = 0; cuid < max_cuid; cuid++)
2564       if (TEST_BIT (rd_gen[bb], cuid))
2565         {
2566           rtx insn = CUID_INSN (cuid);
2567           rtx pat = PATTERN (insn);
2568
2569           if (GET_CODE (insn) == CALL_INSN)
2570             {
2571               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2572                 {
2573                   if ((call_used_regs[regno]
2574                        && regno != STACK_POINTER_REGNUM
2575 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2576                        && regno != HARD_FRAME_POINTER_REGNUM
2577 #endif
2578 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2579                        && ! (regno == ARG_POINTER_REGNUM
2580                              && fixed_regs[regno])
2581 #endif
2582 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2583                        && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2584 #endif
2585                        && regno != FRAME_POINTER_REGNUM)
2586                       || global_regs[regno])
2587                     handle_rd_kill_set (insn, regno, bb);
2588                 }
2589             }
2590
2591           if (GET_CODE (pat) == PARALLEL)
2592             {
2593               for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2594                 {
2595                   enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2596
2597                   if ((code == SET || code == CLOBBER)
2598                       && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2599                     handle_rd_kill_set (insn,
2600                                         REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2601                                         bb);
2602                 }
2603             }
2604           else if (GET_CODE (pat) == SET && GET_CODE (SET_DEST (pat)) == REG)
2605             /* Each setting of this register outside of this block
2606                must be marked in the set of kills in this block.  */
2607             handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2608         }
2609 }
2610
2611 /* Compute the reaching definitions as in
2612    Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2613    Chapter 10.  It is the same algorithm as used for computing available
2614    expressions but applied to the gens and kills of reaching definitions.  */
2615
2616 static void
2617 compute_rd ()
2618 {
2619   int bb, changed, passes;
2620
2621   for (bb = 0; bb < n_basic_blocks; bb++)
2622     sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2623
2624   passes = 0;
2625   changed = 1;
2626   while (changed)
2627     {
2628       changed = 0;
2629       for (bb = 0; bb < n_basic_blocks; bb++)
2630         {
2631           sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
2632           changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2633                                             reaching_defs[bb], rd_kill[bb]);
2634         }
2635       passes++;
2636     }
2637
2638   if (gcse_file)
2639     fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2640 }
2641 \f
2642 /* Classic GCSE available expression support.  */
2643
2644 /* Allocate memory for available expression computation.  */
2645
2646 static void
2647 alloc_avail_expr_mem (n_blocks, n_exprs)
2648      int n_blocks, n_exprs;
2649 {
2650   ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2651   sbitmap_vector_zero (ae_kill, n_basic_blocks);
2652
2653   ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2654   sbitmap_vector_zero (ae_gen, n_basic_blocks);
2655
2656   ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2657   sbitmap_vector_zero (ae_in, n_basic_blocks);
2658
2659   ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2660   sbitmap_vector_zero (ae_out, n_basic_blocks);
2661 }
2662
2663 static void
2664 free_avail_expr_mem ()
2665 {
2666   free (ae_kill);
2667   free (ae_gen);
2668   free (ae_in);
2669   free (ae_out);
2670 }
2671
2672 /* Compute the set of available expressions generated in each basic block.  */
2673
2674 static void
2675 compute_ae_gen ()
2676 {
2677   unsigned int i;
2678   struct expr *expr;
2679   struct occr *occr;
2680
2681   /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2682      This is all we have to do because an expression is not recorded if it
2683      is not available, and the only expressions we want to work with are the
2684      ones that are recorded.  */
2685   for (i = 0; i < expr_hash_table_size; i++)
2686     for (expr = expr_hash_table[i]; expr != 0; expr = expr->next_same_hash)
2687       for (occr = expr->avail_occr; occr != 0; occr = occr->next)
2688         SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2689 }
2690
2691 /* Return non-zero if expression X is killed in BB.  */
2692
2693 static int
2694 expr_killed_p (x, bb)
2695      rtx x;
2696      int bb;
2697 {
2698   int i, j;
2699   enum rtx_code code;
2700   const char *fmt;
2701
2702   if (x == 0)
2703     return 1;
2704
2705   code = GET_CODE (x);
2706   switch (code)
2707     {
2708     case REG:
2709       return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2710
2711     case MEM:
2712       if (mem_set_in_block[bb])
2713         return 1;
2714       else
2715         return expr_killed_p (XEXP (x, 0), bb);
2716
2717     case PC:
2718     case CC0: /*FIXME*/
2719     case CONST:
2720     case CONST_INT:
2721     case CONST_DOUBLE:
2722     case SYMBOL_REF:
2723     case LABEL_REF:
2724     case ADDR_VEC:
2725     case ADDR_DIFF_VEC:
2726       return 0;
2727
2728     default:
2729       break;
2730     }
2731
2732   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2733     {
2734       if (fmt[i] == 'e')
2735         {
2736           /* If we are about to do the last recursive call
2737              needed at this level, change it into iteration.
2738              This function is called enough to be worth it.  */
2739           if (i == 0)
2740             return expr_killed_p (XEXP (x, i), bb);
2741           else if (expr_killed_p (XEXP (x, i), bb))
2742             return 1;
2743         }
2744       else if (fmt[i] == 'E')
2745         for (j = 0; j < XVECLEN (x, i); j++)
2746           if (expr_killed_p (XVECEXP (x, i, j), bb))
2747             return 1;
2748     }
2749
2750   return 0;
2751 }
2752
2753 /* Compute the set of available expressions killed in each basic block.  */
2754
2755 static void
2756 compute_ae_kill (ae_gen, ae_kill)
2757      sbitmap *ae_gen, *ae_kill;
2758 {
2759   int bb;
2760   unsigned int i;
2761   struct expr *expr;
2762
2763   for (bb = 0; bb < n_basic_blocks; bb++)
2764     for (i = 0; i < expr_hash_table_size; i++)
2765       for (expr = expr_hash_table[i]; expr; expr = expr->next_same_hash)
2766         {
2767           /* Skip EXPR if generated in this block.  */
2768           if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2769             continue;
2770
2771           if (expr_killed_p (expr->expr, bb))
2772             SET_BIT (ae_kill[bb], expr->bitmap_index);
2773         }
2774 }
2775 \f
2776 /* Actually perform the Classic GCSE optimizations.  */
2777
2778 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2779
2780    CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2781    as a positive reach.  We want to do this when there are two computations
2782    of the expression in the block.
2783
2784    VISITED is a pointer to a working buffer for tracking which BB's have
2785    been visited.  It is NULL for the top-level call.
2786
2787    We treat reaching expressions that go through blocks containing the same
2788    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2789    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2790    2 as not reaching.  The intent is to improve the probability of finding
2791    only one reaching expression and to reduce register lifetimes by picking
2792    the closest such expression.  */
2793
2794 static int
2795 expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
2796      struct occr *occr;
2797      struct expr *expr;
2798      int bb;
2799      int check_self_loop;
2800      char *visited;
2801 {
2802   edge pred;
2803
2804   for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
2805     {
2806       int pred_bb = pred->src->index;
2807
2808       if (visited[pred_bb])
2809         /* This predecessor has already been visited. Nothing to do.  */
2810           ;
2811       else if (pred_bb == bb)
2812         {
2813           /* BB loops on itself.  */
2814           if (check_self_loop
2815               && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2816               && BLOCK_NUM (occr->insn) == pred_bb)
2817             return 1;
2818
2819           visited[pred_bb] = 1;
2820         }
2821
2822       /* Ignore this predecessor if it kills the expression.  */
2823       else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2824         visited[pred_bb] = 1;
2825
2826       /* Does this predecessor generate this expression?  */
2827       else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2828         {
2829           /* Is this the occurrence we're looking for?
2830              Note that there's only one generating occurrence per block
2831              so we just need to check the block number.  */
2832           if (BLOCK_NUM (occr->insn) == pred_bb)
2833             return 1;
2834
2835           visited[pred_bb] = 1;
2836         }
2837
2838       /* Neither gen nor kill.  */
2839       else
2840         {
2841           visited[pred_bb] = 1;
2842           if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
2843               visited))
2844
2845             return 1;
2846         }
2847     }
2848
2849   /* All paths have been checked.  */
2850   return 0;
2851 }
2852
2853 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
2854    memory allocated for that function is returned. */
2855
2856 static int
2857 expr_reaches_here_p (occr, expr, bb, check_self_loop)
2858      struct occr *occr;
2859      struct expr *expr;
2860      int bb;
2861      int check_self_loop;
2862 {
2863   int rval;
2864   char *visited = (char *) xcalloc (n_basic_blocks, 1);
2865
2866   rval = expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited);
2867
2868   free (visited);
2869   return rval;
2870 }
2871
2872 /* Return the instruction that computes EXPR that reaches INSN's basic block.
2873    If there is more than one such instruction, return NULL.
2874
2875    Called only by handle_avail_expr.  */
2876
2877 static rtx
2878 computing_insn (expr, insn)
2879      struct expr *expr;
2880      rtx insn;
2881 {
2882   int bb = BLOCK_NUM (insn);
2883
2884   if (expr->avail_occr->next == NULL)
2885     {
2886       if (BLOCK_NUM (expr->avail_occr->insn) == bb)
2887         /* The available expression is actually itself
2888            (i.e. a loop in the flow graph) so do nothing.  */
2889         return NULL;
2890
2891       /* (FIXME) Case that we found a pattern that was created by
2892          a substitution that took place.  */
2893       return expr->avail_occr->insn;
2894     }
2895   else
2896     {
2897       /* Pattern is computed more than once.
2898          Search backwards from this insn to see how many of these
2899          computations actually reach this insn.  */
2900       struct occr *occr;
2901       rtx insn_computes_expr = NULL;
2902       int can_reach = 0;
2903
2904       for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
2905         {
2906           if (BLOCK_NUM (occr->insn) == bb)
2907             {
2908               /* The expression is generated in this block.
2909                  The only time we care about this is when the expression
2910                  is generated later in the block [and thus there's a loop].
2911                  We let the normal cse pass handle the other cases.  */
2912               if (INSN_CUID (insn) < INSN_CUID (occr->insn)
2913                   && expr_reaches_here_p (occr, expr, bb, 1))
2914                 {
2915                   can_reach++;
2916                   if (can_reach > 1)
2917                     return NULL;
2918
2919                   insn_computes_expr = occr->insn;
2920                 }
2921             }
2922           else if (expr_reaches_here_p (occr, expr, bb, 0))
2923             {
2924               can_reach++;
2925               if (can_reach > 1)
2926                 return NULL;
2927
2928               insn_computes_expr = occr->insn;
2929             }
2930         }
2931
2932       if (insn_computes_expr == NULL)
2933         abort ();
2934
2935       return insn_computes_expr;
2936     }
2937 }
2938
2939 /* Return non-zero if the definition in DEF_INSN can reach INSN.
2940    Only called by can_disregard_other_sets.  */
2941
2942 static int
2943 def_reaches_here_p (insn, def_insn)
2944      rtx insn, def_insn;
2945 {
2946   rtx reg;
2947
2948   if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
2949     return 1;
2950
2951   if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
2952     {
2953       if (INSN_CUID (def_insn) < INSN_CUID (insn))
2954         {
2955           if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
2956             return 1;
2957           else if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
2958             reg = XEXP (PATTERN (def_insn), 0);
2959           else if (GET_CODE (PATTERN (def_insn)) == SET)
2960             reg = SET_DEST (PATTERN (def_insn));
2961           else
2962             abort ();
2963
2964           return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
2965         }
2966       else
2967         return 0;
2968     }
2969
2970   return 0;
2971 }
2972
2973 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN.  The
2974    value returned is the number of definitions that reach INSN.  Returning a
2975    value of zero means that [maybe] more than one definition reaches INSN and
2976    the caller can't perform whatever optimization it is trying.  i.e. it is
2977    always safe to return zero.  */
2978
2979 static int
2980 can_disregard_other_sets (addr_this_reg, insn, for_combine)
2981      struct reg_set **addr_this_reg;
2982      rtx insn;
2983      int for_combine;
2984 {
2985   int number_of_reaching_defs = 0;
2986   struct reg_set *this_reg;
2987
2988   for (this_reg = *addr_this_reg; this_reg != 0; this_reg = this_reg->next)
2989     if (def_reaches_here_p (insn, this_reg->insn))
2990       {
2991         number_of_reaching_defs++;
2992         /* Ignore parallels for now.  */
2993         if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
2994           return 0;
2995
2996         if (!for_combine
2997             && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
2998                 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
2999                                   SET_SRC (PATTERN (insn)))))
3000           /* A setting of the reg to a different value reaches INSN.  */
3001           return 0;
3002
3003         if (number_of_reaching_defs > 1)
3004           {
3005             /* If in this setting the value the register is being set to is
3006                equal to the previous value the register was set to and this
3007                setting reaches the insn we are trying to do the substitution
3008                on then we are ok.  */
3009             if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3010               return 0;
3011             else if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3012                                     SET_SRC (PATTERN (insn))))
3013               return 0;
3014           }
3015
3016         *addr_this_reg = this_reg;
3017       }
3018
3019   return number_of_reaching_defs;
3020 }
3021
3022 /* Expression computed by insn is available and the substitution is legal,
3023    so try to perform the substitution.
3024
3025    The result is non-zero if any changes were made.  */
3026
3027 static int
3028 handle_avail_expr (insn, expr)
3029      rtx insn;
3030      struct expr *expr;
3031 {
3032   rtx pat, insn_computes_expr;
3033   rtx to;
3034   struct reg_set *this_reg;
3035   int found_setting, use_src;
3036   int changed = 0;
3037
3038   /* We only handle the case where one computation of the expression
3039      reaches this instruction.  */
3040   insn_computes_expr = computing_insn (expr, insn);
3041   if (insn_computes_expr == NULL)
3042     return 0;
3043
3044   found_setting = 0;
3045   use_src = 0;
3046
3047   /* At this point we know only one computation of EXPR outside of this
3048      block reaches this insn.  Now try to find a register that the
3049      expression is computed into.  */
3050   if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3051     {
3052       /* This is the case when the available expression that reaches
3053          here has already been handled as an available expression.  */
3054       unsigned int regnum_for_replacing
3055         = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3056
3057       /* If the register was created by GCSE we can't use `reg_set_table',
3058          however we know it's set only once.  */
3059       if (regnum_for_replacing >= max_gcse_regno
3060           /* If the register the expression is computed into is set only once,
3061              or only one set reaches this insn, we can use it.  */
3062           || (((this_reg = reg_set_table[regnum_for_replacing]),
3063                this_reg->next == NULL)
3064               || can_disregard_other_sets (&this_reg, insn, 0)))
3065        {
3066          use_src = 1;
3067          found_setting = 1;
3068        }
3069     }
3070
3071   if (!found_setting)
3072     {
3073       unsigned int regnum_for_replacing
3074         = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3075
3076       /* This shouldn't happen.  */
3077       if (regnum_for_replacing >= max_gcse_regno)
3078         abort ();
3079
3080       this_reg = reg_set_table[regnum_for_replacing];
3081
3082       /* If the register the expression is computed into is set only once,
3083          or only one set reaches this insn, use it.  */
3084       if (this_reg->next == NULL
3085           || can_disregard_other_sets (&this_reg, insn, 0))
3086         found_setting = 1;
3087     }
3088
3089   if (found_setting)
3090     {
3091       pat = PATTERN (insn);
3092       if (use_src)
3093         to = SET_SRC (PATTERN (insn_computes_expr));
3094       else
3095         to = SET_DEST (PATTERN (insn_computes_expr));
3096       changed = validate_change (insn, &SET_SRC (pat), to, 0);
3097
3098       /* We should be able to ignore the return code from validate_change but
3099          to play it safe we check.  */
3100       if (changed)
3101         {
3102           gcse_subst_count++;
3103           if (gcse_file != NULL)
3104             {
3105               fprintf (gcse_file, "GCSE: Replacing the source in insn %d with",
3106                        INSN_UID (insn));
3107               fprintf (gcse_file, " reg %d %s insn %d\n",
3108                        REGNO (to), use_src ? "from" : "set in",
3109                        INSN_UID (insn_computes_expr));
3110             }
3111         }
3112     }
3113
3114   /* The register that the expr is computed into is set more than once.  */
3115   else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3116     {
3117       /* Insert an insn after insnx that copies the reg set in insnx
3118          into a new pseudo register call this new register REGN.
3119          From insnb until end of basic block or until REGB is set
3120          replace all uses of REGB with REGN.  */
3121       rtx new_insn;
3122
3123       to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3124
3125       /* Generate the new insn.  */
3126       /* ??? If the change fails, we return 0, even though we created
3127          an insn.  I think this is ok.  */
3128       new_insn
3129         = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3130                                         SET_DEST (PATTERN
3131                                                   (insn_computes_expr))),
3132                            insn_computes_expr);
3133
3134       /* Keep block number table up to date.  */
3135       set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3136
3137       /* Keep register set table up to date.  */
3138       record_one_set (REGNO (to), new_insn);
3139
3140       gcse_create_count++;
3141       if (gcse_file != NULL)
3142         {
3143           fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d",
3144                    INSN_UID (NEXT_INSN (insn_computes_expr)),
3145                    REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))));
3146           fprintf (gcse_file, ", computed in insn %d,\n",
3147                    INSN_UID (insn_computes_expr));
3148           fprintf (gcse_file, "      into newly allocated reg %d\n",
3149                    REGNO (to));
3150         }
3151
3152       pat = PATTERN (insn);
3153
3154       /* Do register replacement for INSN.  */
3155       changed = validate_change (insn, &SET_SRC (pat),
3156                                  SET_DEST (PATTERN
3157                                            (NEXT_INSN (insn_computes_expr))),
3158                                  0);
3159
3160       /* We should be able to ignore the return code from validate_change but
3161          to play it safe we check.  */
3162       if (changed)
3163         {
3164           gcse_subst_count++;
3165           if (gcse_file != NULL)
3166             {
3167               fprintf (gcse_file,
3168                        "GCSE: Replacing the source in insn %d with reg %d ",
3169                        INSN_UID (insn),
3170                        REGNO (SET_DEST (PATTERN (NEXT_INSN
3171                                                  (insn_computes_expr)))));
3172               fprintf (gcse_file, "set in insn %d\n",
3173                        INSN_UID (insn_computes_expr));
3174             }
3175         }
3176     }
3177
3178   return changed;
3179 }
3180
3181 /* Perform classic GCSE.  This is called by one_classic_gcse_pass after all
3182    the dataflow analysis has been done.
3183
3184    The result is non-zero if a change was made.  */
3185
3186 static int
3187 classic_gcse ()
3188 {
3189   int bb, changed;
3190   rtx insn;
3191
3192   /* Note we start at block 1.  */
3193
3194   changed = 0;
3195   for (bb = 1; bb < n_basic_blocks; bb++)
3196     {
3197       /* Reset tables used to keep track of what's still valid [since the
3198          start of the block].  */
3199       reset_opr_set_tables ();
3200
3201       for (insn = BLOCK_HEAD (bb);
3202            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3203            insn = NEXT_INSN (insn))
3204         {
3205           /* Is insn of form (set (pseudo-reg) ...)?  */
3206           if (GET_CODE (insn) == INSN
3207               && GET_CODE (PATTERN (insn)) == SET
3208               && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3209               && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3210             {
3211               rtx pat = PATTERN (insn);
3212               rtx src = SET_SRC (pat);
3213               struct expr *expr;
3214
3215               if (want_to_gcse_p (src)
3216                   /* Is the expression recorded?  */
3217                   && ((expr = lookup_expr (src)) != NULL)
3218                   /* Is the expression available [at the start of the
3219                      block]?  */
3220                   && TEST_BIT (ae_in[bb], expr->bitmap_index)
3221                   /* Are the operands unchanged since the start of the
3222                      block?  */
3223                   && oprs_not_set_p (src, insn))
3224                 changed |= handle_avail_expr (insn, expr);
3225             }
3226
3227           /* Keep track of everything modified by this insn.  */
3228           /* ??? Need to be careful w.r.t. mods done to INSN.  */
3229           if (INSN_P (insn))
3230             mark_oprs_set (insn);
3231         }
3232     }
3233
3234   return changed;
3235 }
3236
3237 /* Top level routine to perform one classic GCSE pass.
3238
3239    Return non-zero if a change was made.  */
3240
3241 static int
3242 one_classic_gcse_pass (pass)
3243      int pass;
3244 {
3245   int changed = 0;
3246
3247   gcse_subst_count = 0;
3248   gcse_create_count = 0;
3249
3250   alloc_expr_hash_table (max_cuid);
3251   alloc_rd_mem (n_basic_blocks, max_cuid);
3252   compute_expr_hash_table ();
3253   if (gcse_file)
3254     dump_hash_table (gcse_file, "Expression", expr_hash_table,
3255                      expr_hash_table_size, n_exprs);
3256
3257   if (n_exprs > 0)
3258     {
3259       compute_kill_rd ();
3260       compute_rd ();
3261       alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3262       compute_ae_gen ();
3263       compute_ae_kill (ae_gen, ae_kill);
3264       compute_available (ae_gen, ae_kill, ae_out, ae_in);
3265       changed = classic_gcse ();
3266       free_avail_expr_mem ();
3267     }
3268
3269   free_rd_mem ();
3270   free_expr_hash_table ();
3271
3272   if (gcse_file)
3273     {
3274       fprintf (gcse_file, "\n");
3275       fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs,",
3276                current_function_name, pass, bytes_used, gcse_subst_count);
3277       fprintf (gcse_file, "%d insns created\n", gcse_create_count);
3278     }
3279
3280   return changed;
3281 }
3282 \f
3283 /* Compute copy/constant propagation working variables.  */
3284
3285 /* Local properties of assignments.  */
3286 static sbitmap *cprop_pavloc;
3287 static sbitmap *cprop_absaltered;
3288
3289 /* Global properties of assignments (computed from the local properties).  */
3290 static sbitmap *cprop_avin;
3291 static sbitmap *cprop_avout;
3292
3293 /* Allocate vars used for copy/const propagation.  N_BLOCKS is the number of
3294    basic blocks.  N_SETS is the number of sets.  */
3295
3296 static void
3297 alloc_cprop_mem (n_blocks, n_sets)
3298      int n_blocks, n_sets;
3299 {
3300   cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3301   cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3302
3303   cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3304   cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3305 }
3306
3307 /* Free vars used by copy/const propagation.  */
3308
3309 static void
3310 free_cprop_mem ()
3311 {
3312   free (cprop_pavloc);
3313   free (cprop_absaltered);
3314   free (cprop_avin);
3315   free (cprop_avout);
3316 }
3317
3318 /* For each block, compute whether X is transparent.  X is either an
3319    expression or an assignment [though we don't care which, for this context
3320    an assignment is treated as an expression].  For each block where an
3321    element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
3322    bit in BMAP.  */
3323
3324 static void
3325 compute_transp (x, indx, bmap, set_p)
3326      rtx x;
3327      int indx;
3328      sbitmap *bmap;
3329      int set_p;
3330 {
3331   int bb, i, j;
3332   enum rtx_code code;
3333   reg_set *r;
3334   const char *fmt;
3335
3336   /* repeat is used to turn tail-recursion into iteration since GCC
3337      can't do it when there's no return value.  */
3338  repeat:
3339
3340   if (x == 0)
3341     return;
3342
3343   code = GET_CODE (x);
3344   switch (code)
3345     {
3346     case REG:
3347       if (set_p)
3348         {
3349           if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3350             {
3351               for (bb = 0; bb < n_basic_blocks; bb++)
3352                 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3353                   SET_BIT (bmap[bb], indx);
3354             }
3355           else
3356             {
3357               for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3358                 SET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3359             }
3360         }
3361       else
3362         {
3363           if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3364             {
3365               for (bb = 0; bb < n_basic_blocks; bb++)
3366                 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3367                   RESET_BIT (bmap[bb], indx);
3368             }
3369           else
3370             {
3371               for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3372                 RESET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3373             }
3374         }
3375
3376       return;
3377
3378     case MEM:
3379       if (set_p)
3380         {
3381           for (bb = 0; bb < n_basic_blocks; bb++)
3382             if (mem_set_in_block[bb])
3383               SET_BIT (bmap[bb], indx);
3384         }
3385       else
3386         {
3387           for (bb = 0; bb < n_basic_blocks; bb++)
3388             if (mem_set_in_block[bb])
3389               RESET_BIT (bmap[bb], indx);
3390         }
3391
3392       x = XEXP (x, 0);
3393       goto repeat;
3394
3395     case PC:
3396     case CC0: /*FIXME*/
3397     case CONST:
3398     case CONST_INT:
3399     case CONST_DOUBLE:
3400     case SYMBOL_REF:
3401     case LABEL_REF:
3402     case ADDR_VEC:
3403     case ADDR_DIFF_VEC:
3404       return;
3405
3406     default:
3407       break;
3408     }
3409
3410   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3411     {
3412       if (fmt[i] == 'e')
3413         {
3414           /* If we are about to do the last recursive call
3415              needed at this level, change it into iteration.
3416              This function is called enough to be worth it.  */
3417           if (i == 0)
3418             {
3419               x = XEXP (x, i);
3420               goto repeat;
3421             }
3422
3423           compute_transp (XEXP (x, i), indx, bmap, set_p);
3424         }
3425       else if (fmt[i] == 'E')
3426         for (j = 0; j < XVECLEN (x, i); j++)
3427           compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3428     }
3429 }
3430
3431 /* Top level routine to do the dataflow analysis needed by copy/const
3432    propagation.  */
3433
3434 static void
3435 compute_cprop_data ()
3436 {
3437   compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
3438   compute_available (cprop_pavloc, cprop_absaltered,
3439                      cprop_avout, cprop_avin);
3440 }
3441 \f
3442 /* Copy/constant propagation.  */
3443
3444 /* Maximum number of register uses in an insn that we handle.  */
3445 #define MAX_USES 8
3446
3447 /* Table of uses found in an insn.
3448    Allocated statically to avoid alloc/free complexity and overhead.  */
3449 static struct reg_use reg_use_table[MAX_USES];
3450
3451 /* Index into `reg_use_table' while building it.  */
3452 static int reg_use_count;
3453
3454 /* Set up a list of register numbers used in INSN.  The found uses are stored
3455    in `reg_use_table'.  `reg_use_count' is initialized to zero before entry,
3456    and contains the number of uses in the table upon exit.
3457
3458    ??? If a register appears multiple times we will record it multiple times.
3459    This doesn't hurt anything but it will slow things down.  */
3460
3461 static void
3462 find_used_regs (x)
3463      rtx x;
3464 {
3465   int i, j;
3466   enum rtx_code code;
3467   const char *fmt;
3468
3469   /* repeat is used to turn tail-recursion into iteration since GCC
3470      can't do it when there's no return value.  */
3471  repeat:
3472
3473   if (x == 0)
3474     return;
3475
3476   code = GET_CODE (x);
3477   switch (code)
3478     {
3479     case REG:
3480       if (reg_use_count == MAX_USES)
3481         return;
3482
3483       reg_use_table[reg_use_count].reg_rtx = x;
3484       reg_use_count++;
3485       return;
3486
3487     case MEM:
3488       x = XEXP (x, 0);
3489       goto repeat;
3490
3491     case PC:
3492     case CC0:
3493     case CONST:
3494     case CONST_INT:
3495     case CONST_DOUBLE:
3496     case SYMBOL_REF:
3497     case LABEL_REF:
3498     case CLOBBER:
3499     case ADDR_VEC:
3500     case ADDR_DIFF_VEC:
3501     case ASM_INPUT: /*FIXME*/
3502       return;
3503
3504     case SET:
3505       if (GET_CODE (SET_DEST (x)) == MEM)
3506         find_used_regs (SET_DEST (x));
3507       x = SET_SRC (x);
3508       goto repeat;
3509
3510     default:
3511       break;
3512     }
3513
3514   /* Recursively scan the operands of this expression.  */
3515
3516   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3517     {
3518       if (fmt[i] == 'e')
3519         {
3520           /* If we are about to do the last recursive call
3521              needed at this level, change it into iteration.
3522              This function is called enough to be worth it.  */
3523           if (i == 0)
3524             {
3525               x = XEXP (x, 0);
3526               goto repeat;
3527             }
3528
3529           find_used_regs (XEXP (x, i));
3530         }
3531       else if (fmt[i] == 'E')
3532         for (j = 0; j < XVECLEN (x, i); j++)
3533           find_used_regs (XVECEXP (x, i, j));
3534     }
3535 }
3536
3537 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3538    Returns non-zero is successful.  */
3539
3540 static int
3541 try_replace_reg (from, to, insn)
3542      rtx from, to, insn;
3543 {
3544   rtx note;
3545   rtx src;
3546   int success;
3547   rtx set;
3548
3549   note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3550
3551   if (!note)
3552     note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3553
3554   /* If this fails we could try to simplify the result of the
3555      replacement and attempt to recognize the simplified insn.
3556
3557      But we need a general simplify_rtx that doesn't have pass
3558      specific state variables.  I'm not aware of one at the moment.  */
3559
3560   success = validate_replace_src (from, to, insn);
3561   set = single_set (insn);
3562
3563   /* We've failed to do replacement. Try to add REG_EQUAL note to not loose
3564      information.  */
3565   if (!success && !note)
3566     {
3567       if (!set)
3568         return 0;
3569
3570       note = REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL,
3571                                                    copy_rtx (SET_SRC (set)),
3572                                                    REG_NOTES (insn));
3573     }
3574
3575   /* Always do the replacement in REQ_EQUAL and REG_EQUIV notes.  Also
3576      try to simplify them.  */
3577   if (note)
3578     {
3579       rtx simplified;
3580
3581       if (!validate_replace_rtx_subexp (from, to, insn, &XEXP (note, 0)))
3582         abort();
3583
3584       src = XEXP (note, 0);
3585
3586       /* Try to simplify resulting note. */
3587       simplified = simplify_rtx (src);
3588       if (simplified)
3589         {
3590           src = simplified;
3591           XEXP (note, 0) = src;
3592         }
3593
3594       /* REG_EQUAL may get simplified into register.
3595          We don't allow that. Remove that note. This code ought
3596          not to hapen, because previous code ought to syntetize
3597          reg-reg move, but be on the safe side.  */
3598       else if (REG_P (src))
3599         remove_note (insn, note);
3600     }
3601   return success;
3602 }
3603
3604 /* Find a set of REGNOs that are available on entry to INSN's block.  Returns
3605    NULL no such set is found.  */
3606
3607 static struct expr *
3608 find_avail_set (regno, insn)
3609      int regno;
3610      rtx insn;
3611 {
3612   /* SET1 contains the last set found that can be returned to the caller for
3613      use in a substitution.  */
3614   struct expr *set1 = 0;
3615
3616   /* Loops are not possible here.  To get a loop we would need two sets
3617      available at the start of the block containing INSN.  ie we would
3618      need two sets like this available at the start of the block:
3619
3620        (set (reg X) (reg Y))
3621        (set (reg Y) (reg X))
3622
3623      This can not happen since the set of (reg Y) would have killed the
3624      set of (reg X) making it unavailable at the start of this block.  */
3625   while (1)
3626      {
3627       rtx src;
3628       struct expr *set = lookup_set (regno, NULL_RTX);
3629
3630       /* Find a set that is available at the start of the block
3631          which contains INSN.  */
3632       while (set)
3633         {
3634           if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3635             break;
3636           set = next_set (regno, set);
3637         }
3638
3639       /* If no available set was found we've reached the end of the
3640          (possibly empty) copy chain.  */
3641       if (set == 0)
3642         break;
3643
3644       if (GET_CODE (set->expr) != SET)
3645         abort ();
3646
3647       src = SET_SRC (set->expr);
3648
3649       /* We know the set is available.
3650          Now check that SRC is ANTLOC (i.e. none of the source operands
3651          have changed since the start of the block).
3652
3653          If the source operand changed, we may still use it for the next
3654          iteration of this loop, but we may not use it for substitutions.  */
3655
3656       if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3657         set1 = set;
3658
3659       /* If the source of the set is anything except a register, then
3660          we have reached the end of the copy chain.  */
3661       if (GET_CODE (src) != REG)
3662         break;
3663
3664       /* Follow the copy chain, ie start another iteration of the loop
3665          and see if we have an available copy into SRC.  */
3666       regno = REGNO (src);
3667      }
3668
3669   /* SET1 holds the last set that was available and anticipatable at
3670      INSN.  */
3671   return set1;
3672 }
3673
3674 /* Subroutine of cprop_insn that tries to propagate constants into
3675    JUMP_INSNS.  INSN must be a conditional jump; COPY is a copy of it
3676    that we can use for substitutions.
3677    REG_USED is the use we will try to replace, SRC is the constant we
3678    will try to substitute for it.
3679    Returns nonzero if a change was made.  */
3680
3681 static int
3682 cprop_jump (insn, copy, reg_used, src)
3683      rtx insn, copy;
3684      struct reg_use *reg_used;
3685      rtx src;
3686 {
3687   rtx set = PATTERN (copy);
3688   rtx temp;
3689
3690   /* Replace the register with the appropriate constant.  */
3691   replace_rtx (SET_SRC (set), reg_used->reg_rtx, src);
3692
3693   temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)),
3694                                      GET_MODE (SET_SRC (set)),
3695                                      GET_MODE (XEXP (SET_SRC (set), 0)),
3696                                      XEXP (SET_SRC (set), 0),
3697                                      XEXP (SET_SRC (set), 1),
3698                                      XEXP (SET_SRC (set), 2));
3699
3700   /* If no simplification can be made, then try the next
3701      register.  */
3702   if (temp == 0)
3703     return 0;
3704
3705   SET_SRC (set) = temp;
3706
3707   /* That may have changed the structure of TEMP, so
3708      force it to be rerecognized if it has not turned
3709      into a nop or unconditional jump.  */
3710
3711   INSN_CODE (copy) = -1;
3712   if ((SET_DEST (set) == pc_rtx
3713        && (SET_SRC (set) == pc_rtx
3714            || GET_CODE (SET_SRC (set)) == LABEL_REF))
3715       || recog (PATTERN (copy), copy, NULL) >= 0)
3716     {
3717       /* This has either become an unconditional jump
3718          or a nop-jump.  We'd like to delete nop jumps
3719          here, but doing so confuses gcse.  So we just
3720          make the replacement and let later passes
3721          sort things out.  */
3722       PATTERN (insn) = set;
3723       INSN_CODE (insn) = -1;
3724
3725       /* One less use of the label this insn used to jump to
3726          if we turned this into a NOP jump.  */
3727       if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0)
3728         --LABEL_NUSES (JUMP_LABEL (insn));
3729
3730       /* If this has turned into an unconditional jump,
3731          then put a barrier after it so that the unreachable
3732          code will be deleted.  */
3733       if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3734         emit_barrier_after (insn);
3735
3736       run_jump_opt_after_gcse = 1;
3737
3738       const_prop_count++;
3739       if (gcse_file != NULL)
3740         {
3741           fprintf (gcse_file,
3742                    "CONST-PROP: Replacing reg %d in insn %d with constant ",
3743                    REGNO (reg_used->reg_rtx), INSN_UID (insn));
3744           print_rtl (gcse_file, src);
3745           fprintf (gcse_file, "\n");
3746         }
3747
3748       return 1;
3749     }
3750   return 0;
3751 }
3752
3753 #ifdef HAVE_cc0
3754
3755 /* Subroutine of cprop_insn that tries to propagate constants into JUMP_INSNS
3756    for machines that have CC0.  INSN is a single set that stores into CC0;
3757    the insn following it is a conditional jump.  REG_USED is the use we will
3758    try to replace, SRC is the constant we will try to substitute for it.
3759    Returns nonzero if a change was made.  */
3760
3761 static int
3762 cprop_cc0_jump (insn, reg_used, src)
3763      rtx insn;
3764      struct reg_use *reg_used;
3765      rtx src;
3766 {
3767   rtx jump = NEXT_INSN (insn);
3768   rtx copy = copy_rtx (jump);
3769   rtx set = PATTERN (copy);
3770
3771   /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3772      substitute into it.  */
3773   replace_rtx (SET_SRC (set), cc0_rtx, copy_rtx (SET_SRC (PATTERN (insn))));
3774   if (! cprop_jump (jump, copy, reg_used, src))
3775     return 0;
3776
3777   /* If we succeeded, delete the cc0 setter.  */
3778   PUT_CODE (insn, NOTE);
3779   NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3780   NOTE_SOURCE_FILE (insn) = 0;
3781   return 1;
3782  }
3783 #endif
3784
3785 /* Perform constant and copy propagation on INSN.
3786    The result is non-zero if a change was made.  */
3787
3788 static int
3789 cprop_insn (insn, alter_jumps)
3790      rtx insn;
3791      int alter_jumps;
3792 {
3793   struct reg_use *reg_used;
3794   int changed = 0;
3795   rtx note;
3796
3797   /* Only propagate into SETs.  Note that a conditional jump is a
3798      SET with pc_rtx as the destination.  */
3799   if ((GET_CODE (insn) != INSN
3800        && GET_CODE (insn) != JUMP_INSN)
3801       || GET_CODE (PATTERN (insn)) != SET)
3802     return 0;
3803
3804   reg_use_count = 0;
3805   find_used_regs (PATTERN (insn));
3806
3807   note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3808   if (!note)
3809     note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3810
3811   /* We may win even when propagating constants into notes. */
3812   if (note)
3813     find_used_regs (XEXP (note, 0));
3814
3815   for (reg_used = &reg_use_table[0]; reg_use_count > 0;
3816        reg_used++, reg_use_count--)
3817     {
3818       unsigned int regno = REGNO (reg_used->reg_rtx);
3819       rtx pat, src;
3820       struct expr *set;
3821
3822       /* Ignore registers created by GCSE.
3823          We do this because ... */
3824       if (regno >= max_gcse_regno)
3825         continue;
3826
3827       /* If the register has already been set in this block, there's
3828          nothing we can do.  */
3829       if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3830         continue;
3831
3832       /* Find an assignment that sets reg_used and is available
3833          at the start of the block.  */
3834       set = find_avail_set (regno, insn);
3835       if (! set)
3836         continue;
3837
3838       pat = set->expr;
3839       /* ??? We might be able to handle PARALLELs.  Later.  */
3840       if (GET_CODE (pat) != SET)
3841         abort ();
3842
3843       src = SET_SRC (pat);
3844
3845       /* Constant propagation.  */
3846       if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
3847           || GET_CODE (src) == SYMBOL_REF)
3848         {
3849           /* Handle normal insns first.  */
3850           if (GET_CODE (insn) == INSN
3851               && try_replace_reg (reg_used->reg_rtx, src, insn))
3852             {
3853               changed = 1;
3854               const_prop_count++;
3855               if (gcse_file != NULL)
3856                 {
3857                   fprintf (gcse_file, "CONST-PROP: Replacing reg %d in ",
3858                            regno);
3859                   fprintf (gcse_file, "insn %d with constant ",
3860                            INSN_UID (insn));
3861                   print_rtl (gcse_file, src);
3862                   fprintf (gcse_file, "\n");
3863                 }
3864
3865               /* The original insn setting reg_used may or may not now be
3866                  deletable.  We leave the deletion to flow.  */
3867             }
3868
3869           /* Try to propagate a CONST_INT into a conditional jump.
3870              We're pretty specific about what we will handle in this
3871              code, we can extend this as necessary over time.
3872
3873              Right now the insn in question must look like
3874              (set (pc) (if_then_else ...))  */
3875           else if (alter_jumps
3876                    && GET_CODE (insn) == JUMP_INSN
3877                    && condjump_p (insn)
3878                    && ! simplejump_p (insn))
3879             changed |= cprop_jump (insn, copy_rtx (insn), reg_used, src);
3880 #ifdef HAVE_cc0
3881           /* Similar code for machines that use a pair of CC0 setter and
3882              conditional jump insn.  */
3883           else if (alter_jumps
3884                    && GET_CODE (PATTERN (insn)) == SET
3885                    && SET_DEST (PATTERN (insn)) == cc0_rtx
3886                    && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
3887                    && condjump_p (NEXT_INSN (insn))
3888                    && ! simplejump_p (NEXT_INSN (insn)))
3889             changed |= cprop_cc0_jump (insn, reg_used, src);
3890 #endif
3891         }
3892       else if (GET_CODE (src) == REG
3893                && REGNO (src) >= FIRST_PSEUDO_REGISTER
3894                && REGNO (src) != regno)
3895         {
3896           if (try_replace_reg (reg_used->reg_rtx, src, insn))
3897             {
3898               changed = 1;
3899               copy_prop_count++;
3900               if (gcse_file != NULL)
3901                 {
3902                   fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d",
3903                            regno, INSN_UID (insn));
3904                   fprintf (gcse_file, " with reg %d\n", REGNO (src));
3905                 }
3906
3907               /* The original insn setting reg_used may or may not now be
3908                  deletable.  We leave the deletion to flow.  */
3909               /* FIXME: If it turns out that the insn isn't deletable,
3910                  then we may have unnecessarily extended register lifetimes
3911                  and made things worse.  */
3912             }
3913         }
3914     }
3915
3916   return changed;
3917 }
3918
3919 /* Forward propagate copies.  This includes copies and constants.  Return
3920    non-zero if a change was made.  */
3921
3922 static int
3923 cprop (alter_jumps)
3924      int alter_jumps;
3925 {
3926   int bb, changed;
3927   rtx insn;
3928
3929   /* Note we start at block 1.  */
3930
3931   changed = 0;
3932   for (bb = 1; bb < n_basic_blocks; bb++)
3933     {
3934       /* Reset tables used to keep track of what's still valid [since the
3935          start of the block].  */
3936       reset_opr_set_tables ();
3937
3938       for (insn = BLOCK_HEAD (bb);
3939            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3940            insn = NEXT_INSN (insn))
3941         {
3942           if (INSN_P (insn))
3943             {
3944               changed |= cprop_insn (insn, alter_jumps);
3945
3946               /* Keep track of everything modified by this insn.  */
3947               /* ??? Need to be careful w.r.t. mods done to INSN.  Don't
3948                  call mark_oprs_set if we turned the insn into a NOTE.  */
3949               if (GET_CODE (insn) != NOTE)
3950                 mark_oprs_set (insn);
3951             }
3952         }
3953     }
3954
3955   if (gcse_file != NULL)
3956     fprintf (gcse_file, "\n");
3957
3958   return changed;
3959 }
3960
3961 /* Perform one copy/constant propagation pass.
3962    F is the first insn in the function.
3963    PASS is the pass count.  */
3964
3965 static int
3966 one_cprop_pass (pass, alter_jumps)
3967      int pass;
3968      int alter_jumps;
3969 {
3970   int changed = 0;
3971
3972   const_prop_count = 0;
3973   copy_prop_count = 0;
3974
3975   alloc_set_hash_table (max_cuid);
3976   compute_set_hash_table ();
3977   if (gcse_file)
3978     dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
3979                      n_sets);
3980   if (n_sets > 0)
3981     {
3982       alloc_cprop_mem (n_basic_blocks, n_sets);
3983       compute_cprop_data ();
3984       changed = cprop (alter_jumps);
3985       free_cprop_mem ();
3986     }
3987
3988   free_set_hash_table ();
3989
3990   if (gcse_file)
3991     {
3992       fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, ",
3993                current_function_name, pass, bytes_used);
3994       fprintf (gcse_file, "%d const props, %d copy props\n\n",
3995                const_prop_count, copy_prop_count);
3996     }
3997
3998   return changed;
3999 }
4000 \f
4001 /* Compute PRE+LCM working variables.  */
4002
4003 /* Local properties of expressions.  */
4004 /* Nonzero for expressions that are transparent in the block.  */
4005 static sbitmap *transp;
4006
4007 /* Nonzero for expressions that are transparent at the end of the block.
4008    This is only zero for expressions killed by abnormal critical edge
4009    created by a calls.  */
4010 static sbitmap *transpout;
4011
4012 /* Nonzero for expressions that are computed (available) in the block.  */
4013 static sbitmap *comp;
4014
4015 /* Nonzero for expressions that are locally anticipatable in the block.  */
4016 static sbitmap *antloc;
4017
4018 /* Nonzero for expressions where this block is an optimal computation
4019    point.  */
4020 static sbitmap *pre_optimal;
4021
4022 /* Nonzero for expressions which are redundant in a particular block.  */
4023 static sbitmap *pre_redundant;
4024
4025 /* Nonzero for expressions which should be inserted on a specific edge.  */
4026 static sbitmap *pre_insert_map;
4027
4028 /* Nonzero for expressions which should be deleted in a specific block.  */
4029 static sbitmap *pre_delete_map;
4030
4031 /* Contains the edge_list returned by pre_edge_lcm.  */
4032 static struct edge_list *edge_list;
4033
4034 /* Redundant insns.  */
4035 static sbitmap pre_redundant_insns;
4036
4037 /* Allocate vars used for PRE analysis.  */
4038
4039 static void
4040 alloc_pre_mem (n_blocks, n_exprs)
4041      int n_blocks, n_exprs;
4042 {
4043   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4044   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4045   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4046
4047   pre_optimal = NULL;
4048   pre_redundant = NULL;
4049   pre_insert_map = NULL;
4050   pre_delete_map = NULL;
4051   ae_in = NULL;
4052   ae_out = NULL;
4053   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
4054
4055   /* pre_insert and pre_delete are allocated later.  */
4056 }
4057
4058 /* Free vars used for PRE analysis.  */
4059
4060 static void
4061 free_pre_mem ()
4062 {
4063   free (transp);
4064   free (comp);
4065
4066   /* ANTLOC and AE_KILL are freed just after pre_lcm finishes.  */
4067
4068   if (pre_optimal)
4069     free (pre_optimal);
4070   if (pre_redundant)
4071     free (pre_redundant);
4072   if (pre_insert_map)
4073     free (pre_insert_map);
4074   if (pre_delete_map)
4075     free (pre_delete_map);
4076
4077   if (ae_in)
4078     free (ae_in);
4079   if (ae_out)
4080     free (ae_out);
4081
4082   transp = comp = NULL;
4083   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
4084   ae_in = ae_out = NULL;
4085 }
4086
4087 /* Top level routine to do the dataflow analysis needed by PRE.  */
4088
4089 static void
4090 compute_pre_data ()
4091 {
4092   int i;
4093
4094   compute_local_properties (transp, comp, antloc, 0);
4095   sbitmap_vector_zero (ae_kill, n_basic_blocks);
4096
4097   /* Compute ae_kill for each basic block using:
4098
4099      ~(TRANSP | COMP)
4100
4101      This is significantly faster than compute_ae_kill.  */
4102
4103   for (i = 0; i < n_basic_blocks; i++)
4104     {
4105       sbitmap_a_or_b (ae_kill[i], transp[i], comp[i]);
4106       sbitmap_not (ae_kill[i], ae_kill[i]);
4107     }
4108
4109   edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
4110                             ae_kill, &pre_insert_map, &pre_delete_map);
4111   free (antloc);
4112   antloc = NULL;
4113   free (ae_kill);
4114   ae_kill = NULL;
4115 }
4116 \f
4117 /* PRE utilities */
4118
4119 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4120    block BB.
4121
4122    VISITED is a pointer to a working buffer for tracking which BB's have
4123    been visited.  It is NULL for the top-level call.
4124
4125    We treat reaching expressions that go through blocks containing the same
4126    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
4127    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4128    2 as not reaching.  The intent is to improve the probability of finding
4129    only one reaching expression and to reduce register lifetimes by picking
4130    the closest such expression.  */
4131
4132 static int
4133 pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited)
4134      int occr_bb;
4135      struct expr *expr;
4136      int bb;
4137      char *visited;
4138 {
4139   edge pred;
4140
4141   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
4142     {
4143       int pred_bb = pred->src->index;
4144
4145       if (pred->src == ENTRY_BLOCK_PTR
4146           /* Has predecessor has already been visited?  */
4147           || visited[pred_bb])
4148         ;/* Nothing to do.  */
4149
4150       /* Does this predecessor generate this expression?  */
4151       else if (TEST_BIT (comp[pred_bb], expr->bitmap_index))
4152         {
4153           /* Is this the occurrence we're looking for?
4154              Note that there's only one generating occurrence per block
4155              so we just need to check the block number.  */
4156           if (occr_bb == pred_bb)
4157             return 1;
4158
4159           visited[pred_bb] = 1;
4160         }
4161       /* Ignore this predecessor if it kills the expression.  */
4162       else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
4163         visited[pred_bb] = 1;
4164
4165       /* Neither gen nor kill.  */
4166       else
4167         {
4168           visited[pred_bb] = 1;
4169           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
4170             return 1;
4171         }
4172     }
4173
4174   /* All paths have been checked.  */
4175   return 0;
4176 }
4177
4178 /* The wrapper for pre_expr_reaches_here_work that ensures that any
4179    memory allocated for that function is returned. */
4180
4181 static int
4182 pre_expr_reaches_here_p (occr_bb, expr, bb)
4183      int occr_bb;
4184      struct expr *expr;
4185      int bb;
4186 {
4187   int rval;
4188   char *visited = (char *) xcalloc (n_basic_blocks, 1);
4189
4190   rval = pre_expr_reaches_here_p_work(occr_bb, expr, bb, visited);
4191
4192   free (visited);
4193   return rval;
4194 }
4195 \f
4196
4197 /* Given an expr, generate RTL which we can insert at the end of a BB,
4198    or on an edge.  Set the block number of any insns generated to
4199    the value of BB.  */
4200
4201 static rtx
4202 process_insert_insn (expr)
4203      struct expr *expr;
4204 {
4205   rtx reg = expr->reaching_reg;
4206   rtx pat, copied_expr;
4207   rtx first_new_insn;
4208
4209   start_sequence ();
4210   copied_expr = copy_rtx (expr->expr);
4211   emit_move_insn (reg, copied_expr);
4212   first_new_insn = get_insns ();
4213   pat = gen_sequence ();
4214   end_sequence ();
4215
4216   return pat;
4217 }
4218
4219 /* Add EXPR to the end of basic block BB.
4220
4221    This is used by both the PRE and code hoisting.
4222
4223    For PRE, we want to verify that the expr is either transparent
4224    or locally anticipatable in the target block.  This check makes
4225    no sense for code hoisting.  */
4226
4227 static void
4228 insert_insn_end_bb (expr, bb, pre)
4229      struct expr *expr;
4230      int bb;
4231      int pre;
4232 {
4233   rtx insn = BLOCK_END (bb);
4234   rtx new_insn;
4235   rtx reg = expr->reaching_reg;
4236   int regno = REGNO (reg);
4237   rtx pat;
4238   int i;
4239
4240   pat = process_insert_insn (expr);
4241
4242   /* If the last insn is a jump, insert EXPR in front [taking care to
4243      handle cc0, etc. properly].  */
4244
4245   if (GET_CODE (insn) == JUMP_INSN)
4246     {
4247 #ifdef HAVE_cc0
4248       rtx note;
4249 #endif
4250
4251       /* If this is a jump table, then we can't insert stuff here.  Since
4252          we know the previous real insn must be the tablejump, we insert
4253          the new instruction just before the tablejump.  */
4254       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4255           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4256         insn = prev_real_insn (insn);
4257
4258 #ifdef HAVE_cc0
4259       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4260          if cc0 isn't set.  */
4261       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4262       if (note)
4263         insn = XEXP (note, 0);
4264       else
4265         {
4266           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4267           if (maybe_cc0_setter
4268               && INSN_P (maybe_cc0_setter)
4269               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4270             insn = maybe_cc0_setter;
4271         }
4272 #endif
4273       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
4274       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4275     }
4276
4277   /* Likewise if the last insn is a call, as will happen in the presence
4278      of exception handling.  */
4279   else if (GET_CODE (insn) == CALL_INSN)
4280     {
4281       HARD_REG_SET parm_regs;
4282       int nparm_regs;
4283       rtx p;
4284
4285       /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4286          we search backward and place the instructions before the first
4287          parameter is loaded.  Do this for everyone for consistency and a
4288          presumtion that we'll get better code elsewhere as well.
4289
4290          It should always be the case that we can put these instructions
4291          anywhere in the basic block with performing PRE optimizations.
4292          Check this.  */
4293
4294       if (pre
4295           && !TEST_BIT (antloc[bb], expr->bitmap_index)
4296           && !TEST_BIT (transp[bb], expr->bitmap_index))
4297         abort ();
4298
4299       /* Since different machines initialize their parameter registers
4300          in different orders, assume nothing.  Collect the set of all
4301          parameter registers.  */
4302       CLEAR_HARD_REG_SET (parm_regs);
4303       nparm_regs = 0;
4304       for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4305         if (GET_CODE (XEXP (p, 0)) == USE
4306             && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4307           {
4308             if (REGNO (XEXP (XEXP (p, 0), 0)) >= FIRST_PSEUDO_REGISTER)
4309               abort ();
4310
4311             SET_HARD_REG_BIT (parm_regs, REGNO (XEXP (XEXP (p, 0), 0)));
4312             nparm_regs++;
4313           }
4314
4315       /* Search backward for the first set of a register in this set.  */
4316       while (nparm_regs && BLOCK_HEAD (bb) != insn)
4317         {
4318           insn = PREV_INSN (insn);
4319           p = single_set (insn);
4320           if (p && GET_CODE (SET_DEST (p)) == REG
4321               && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4322               && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4323             {
4324               CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4325               nparm_regs--;
4326             }
4327         }
4328
4329       /* If we found all the parameter loads, then we want to insert
4330          before the first parameter load.
4331
4332          If we did not find all the parameter loads, then we might have
4333          stopped on the head of the block, which could be a CODE_LABEL.
4334          If we inserted before the CODE_LABEL, then we would be putting
4335          the insn in the wrong basic block.  In that case, put the insn
4336          after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
4337       while (GET_CODE (insn) == CODE_LABEL
4338              || NOTE_INSN_BASIC_BLOCK_P (insn))
4339         insn = NEXT_INSN (insn);
4340
4341       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4342     }
4343   else
4344     {
4345       new_insn = emit_insn_after (pat, insn);
4346       BLOCK_END (bb) = new_insn;
4347     }
4348
4349   /* Keep block number table up to date.
4350      Note, PAT could be a multiple insn sequence, we have to make
4351      sure that each insn in the sequence is handled.  */
4352   if (GET_CODE (pat) == SEQUENCE)
4353     {
4354       for (i = 0; i < XVECLEN (pat, 0); i++)
4355         {
4356           rtx insn = XVECEXP (pat, 0, i);
4357
4358           set_block_num (insn, bb);
4359           if (INSN_P (insn))
4360             add_label_notes (PATTERN (insn), new_insn);
4361
4362           note_stores (PATTERN (insn), record_set_info, insn);
4363         }
4364     }
4365   else
4366     {
4367       add_label_notes (SET_SRC (pat), new_insn);
4368       set_block_num (new_insn, bb);
4369
4370       /* Keep register set table up to date.  */
4371       record_one_set (regno, new_insn);
4372     }
4373
4374   gcse_create_count++;
4375
4376   if (gcse_file)
4377     {
4378       fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, ",
4379                bb, INSN_UID (new_insn));
4380       fprintf (gcse_file, "copying expression %d to reg %d\n",
4381                expr->bitmap_index, regno);
4382     }
4383 }
4384
4385 /* Insert partially redundant expressions on edges in the CFG to make
4386    the expressions fully redundant.  */
4387
4388 static int
4389 pre_edge_insert (edge_list, index_map)
4390      struct edge_list *edge_list;
4391      struct expr **index_map;
4392 {
4393   int e, i, j, num_edges, set_size, did_insert = 0;
4394   sbitmap *inserted;
4395
4396   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4397      if it reaches any of the deleted expressions.  */
4398
4399   set_size = pre_insert_map[0]->size;
4400   num_edges = NUM_EDGES (edge_list);
4401   inserted = sbitmap_vector_alloc (num_edges, n_exprs);
4402   sbitmap_vector_zero (inserted, num_edges);
4403
4404   for (e = 0; e < num_edges; e++)
4405     {
4406       int indx;
4407       basic_block pred = INDEX_EDGE_PRED_BB (edge_list, e);
4408       int bb = pred->index;
4409
4410       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4411         {
4412           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
4413
4414           for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
4415             if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4416               {
4417                 struct expr *expr = index_map[j];
4418                 struct occr *occr;
4419
4420                 /* Now look at each deleted occurence of this expression.  */
4421                 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4422                   {
4423                     if (! occr->deleted_p)
4424                       continue;
4425
4426                     /* Insert this expression on this edge if if it would
4427                        reach the deleted occurence in BB.  */
4428                     if (!TEST_BIT (inserted[e], j))
4429                       {
4430                         rtx insn;
4431                         edge eg = INDEX_EDGE (edge_list, e);
4432
4433                         /* We can't insert anything on an abnormal and
4434                            critical edge, so we insert the insn at the end of
4435                            the previous block. There are several alternatives
4436                            detailed in Morgans book P277 (sec 10.5) for
4437                            handling this situation.  This one is easiest for
4438                            now.  */
4439
4440                         if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
4441                           insert_insn_end_bb (index_map[j], bb, 0);
4442                         else
4443                           {
4444                             insn = process_insert_insn (index_map[j]);
4445                             insert_insn_on_edge (insn, eg);
4446                           }
4447
4448                         if (gcse_file)
4449                           {
4450                             fprintf (gcse_file, "PRE/HOIST: edge (%d,%d), ",
4451                                      bb,
4452                                      INDEX_EDGE_SUCC_BB (edge_list, e)->index);
4453                             fprintf (gcse_file, "copy expression %d\n",
4454                                      expr->bitmap_index);
4455                           }
4456
4457                         SET_BIT (inserted[e], j);
4458                         did_insert = 1;
4459                         gcse_create_count++;
4460                       }
4461                   }
4462               }
4463         }
4464     }
4465
4466   free (inserted);
4467   return did_insert;
4468 }
4469
4470 /* Copy the result of INSN to REG.  INDX is the expression number.  */
4471
4472 static void
4473 pre_insert_copy_insn (expr, insn)
4474      struct expr *expr;
4475      rtx insn;
4476 {
4477   rtx reg = expr->reaching_reg;
4478   int regno = REGNO (reg);
4479   int indx = expr->bitmap_index;
4480   rtx set = single_set (insn);
4481   rtx new_insn;
4482   int bb = BLOCK_NUM (insn);
4483
4484   if (!set)
4485     abort ();
4486
4487   new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4488                               insn);
4489
4490   /* Keep block number table up to date.  */
4491   set_block_num (new_insn, bb);
4492
4493   /* Keep register set table up to date.  */
4494   record_one_set (regno, new_insn);
4495   if (insn == BLOCK_END (bb))
4496     BLOCK_END (bb) = new_insn;
4497
4498   gcse_create_count++;
4499
4500   if (gcse_file)
4501     fprintf (gcse_file,
4502              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4503               BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4504               INSN_UID (insn), regno);
4505 }
4506
4507 /* Copy available expressions that reach the redundant expression
4508    to `reaching_reg'.  */
4509
4510 static void
4511 pre_insert_copies ()
4512 {
4513   unsigned int i;
4514   struct expr *expr;
4515   struct occr *occr;
4516   struct occr *avail;
4517
4518   /* For each available expression in the table, copy the result to
4519      `reaching_reg' if the expression reaches a deleted one.
4520
4521      ??? The current algorithm is rather brute force.
4522      Need to do some profiling.  */
4523
4524   for (i = 0; i < expr_hash_table_size; i++)
4525     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4526       {
4527         /* If the basic block isn't reachable, PPOUT will be TRUE.  However,
4528            we don't want to insert a copy here because the expression may not
4529            really be redundant.  So only insert an insn if the expression was
4530            deleted.  This test also avoids further processing if the
4531            expression wasn't deleted anywhere.  */
4532         if (expr->reaching_reg == NULL)
4533           continue;
4534
4535         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4536           {
4537             if (! occr->deleted_p)
4538               continue;
4539
4540             for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4541               {
4542                 rtx insn = avail->insn;
4543
4544                 /* No need to handle this one if handled already.  */
4545                 if (avail->copied_p)
4546                   continue;
4547
4548                 /* Don't handle this one if it's a redundant one.  */
4549                 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4550                   continue;
4551
4552                 /* Or if the expression doesn't reach the deleted one.  */
4553                 if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
4554                                                BLOCK_NUM (occr->insn)))
4555                   continue;
4556
4557                 /* Copy the result of avail to reaching_reg.  */
4558                 pre_insert_copy_insn (expr, insn);
4559                 avail->copied_p = 1;
4560               }
4561           }
4562       }
4563 }
4564
4565 /* Delete redundant computations.
4566    Deletion is done by changing the insn to copy the `reaching_reg' of
4567    the expression into the result of the SET.  It is left to later passes
4568    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4569
4570    Returns non-zero if a change is made.  */
4571
4572 static int
4573 pre_delete ()
4574 {
4575   unsigned int i;
4576   int changed;
4577   struct expr *expr;
4578   struct occr *occr;
4579
4580   changed = 0;
4581   for (i = 0; i < expr_hash_table_size; i++)
4582     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4583       {
4584         int indx = expr->bitmap_index;
4585
4586         /* We only need to search antic_occr since we require
4587            ANTLOC != 0.  */
4588
4589         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4590           {
4591             rtx insn = occr->insn;
4592             rtx set;
4593             int bb = BLOCK_NUM (insn);
4594
4595             if (TEST_BIT (pre_delete_map[bb], indx))
4596               {
4597                 set = single_set (insn);
4598                 if (! set)
4599                   abort ();
4600
4601                 /* Create a pseudo-reg to store the result of reaching
4602                    expressions into.  Get the mode for the new pseudo from
4603                    the mode of the original destination pseudo.  */
4604                 if (expr->reaching_reg == NULL)
4605                   expr->reaching_reg
4606                     = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4607
4608                 /* In theory this should never fail since we're creating
4609                    a reg->reg copy.
4610
4611                    However, on the x86 some of the movXX patterns actually
4612                    contain clobbers of scratch regs.  This may cause the
4613                    insn created by validate_change to not match any pattern
4614                    and thus cause validate_change to fail.   */
4615                 if (validate_change (insn, &SET_SRC (set),
4616                                      expr->reaching_reg, 0))
4617                   {
4618                     occr->deleted_p = 1;
4619                     SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4620                     changed = 1;
4621                     gcse_subst_count++;
4622                   }
4623
4624                 if (gcse_file)
4625                   {
4626                     fprintf (gcse_file,
4627                              "PRE: redundant insn %d (expression %d) in ",
4628                                INSN_UID (insn), indx);
4629                     fprintf (gcse_file, "bb %d, reaching reg is %d\n",
4630                              bb, REGNO (expr->reaching_reg));
4631                   }
4632               }
4633           }
4634       }
4635
4636   return changed;
4637 }
4638
4639 /* Perform GCSE optimizations using PRE.
4640    This is called by one_pre_gcse_pass after all the dataflow analysis
4641    has been done.
4642
4643    This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
4644    lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
4645    Compiler Design and Implementation.
4646
4647    ??? A new pseudo reg is created to hold the reaching expression.  The nice
4648    thing about the classical approach is that it would try to use an existing
4649    reg.  If the register can't be adequately optimized [i.e. we introduce
4650    reload problems], one could add a pass here to propagate the new register
4651    through the block.
4652
4653    ??? We don't handle single sets in PARALLELs because we're [currently] not
4654    able to copy the rest of the parallel when we insert copies to create full
4655    redundancies from partial redundancies.  However, there's no reason why we
4656    can't handle PARALLELs in the cases where there are no partial
4657    redundancies.  */
4658
4659 static int
4660 pre_gcse ()
4661 {
4662   unsigned int i;
4663   int did_insert, changed;
4664   struct expr **index_map;
4665   struct expr *expr;
4666
4667   /* Compute a mapping from expression number (`bitmap_index') to
4668      hash table entry.  */
4669
4670   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
4671   for (i = 0; i < expr_hash_table_size; i++)
4672     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4673       index_map[expr->bitmap_index] = expr;
4674
4675   /* Reset bitmap used to track which insns are redundant.  */
4676   pre_redundant_insns = sbitmap_alloc (max_cuid);
4677   sbitmap_zero (pre_redundant_insns);
4678
4679   /* Delete the redundant insns first so that
4680      - we know what register to use for the new insns and for the other
4681        ones with reaching expressions
4682      - we know which insns are redundant when we go to create copies  */
4683
4684   changed = pre_delete ();
4685
4686   did_insert = pre_edge_insert (edge_list, index_map);
4687
4688   /* In other places with reaching expressions, copy the expression to the
4689      specially allocated pseudo-reg that reaches the redundant expr.  */
4690   pre_insert_copies ();
4691   if (did_insert)
4692     {
4693       commit_edge_insertions ();
4694       changed = 1;
4695     }
4696
4697   free (index_map);
4698   free (pre_redundant_insns);
4699   return changed;
4700 }
4701
4702 /* Top level routine to perform one PRE GCSE pass.
4703
4704    Return non-zero if a change was made.  */
4705
4706 static int
4707 one_pre_gcse_pass (pass)
4708      int pass;
4709 {
4710   int changed = 0;
4711
4712   gcse_subst_count = 0;
4713   gcse_create_count = 0;
4714
4715   alloc_expr_hash_table (max_cuid);
4716   add_noreturn_fake_exit_edges ();
4717   compute_expr_hash_table ();
4718   if (gcse_file)
4719     dump_hash_table (gcse_file, "Expression", expr_hash_table,
4720                      expr_hash_table_size, n_exprs);
4721
4722   if (n_exprs > 0)
4723     {
4724       alloc_pre_mem (n_basic_blocks, n_exprs);
4725       compute_pre_data ();
4726       changed |= pre_gcse ();
4727       free_edge_list (edge_list);
4728       free_pre_mem ();
4729     }
4730
4731   remove_fake_edges ();
4732   free_expr_hash_table ();
4733
4734   if (gcse_file)
4735     {
4736       fprintf (gcse_file, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
4737                current_function_name, pass, bytes_used);
4738       fprintf (gcse_file, "%d substs, %d insns created\n",
4739                gcse_subst_count, gcse_create_count);
4740     }
4741
4742   return changed;
4743 }
4744 \f
4745 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4746    We have to add REG_LABEL notes, because the following loop optimization
4747    pass requires them.  */
4748
4749 /* ??? This is very similar to the loop.c add_label_notes function.  We
4750    could probably share code here.  */
4751
4752 /* ??? If there was a jump optimization pass after gcse and before loop,
4753    then we would not need to do this here, because jump would add the
4754    necessary REG_LABEL notes.  */
4755
4756 static void
4757 add_label_notes (x, insn)
4758      rtx x;
4759      rtx insn;
4760 {
4761   enum rtx_code code = GET_CODE (x);
4762   int i, j;
4763   const char *fmt;
4764
4765   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4766     {
4767       /* This code used to ignore labels that referred to dispatch tables to
4768          avoid flow generating (slighly) worse code.
4769
4770          We no longer ignore such label references (see LABEL_REF handling in
4771          mark_jump_label for additional information).  */
4772
4773       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4774                                             REG_NOTES (insn));
4775       return;
4776     }
4777
4778   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
4779     {
4780       if (fmt[i] == 'e')
4781         add_label_notes (XEXP (x, i), insn);
4782       else if (fmt[i] == 'E')
4783         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4784           add_label_notes (XVECEXP (x, i, j), insn);
4785     }
4786 }
4787
4788 /* Compute transparent outgoing information for each block.
4789
4790    An expression is transparent to an edge unless it is killed by
4791    the edge itself.  This can only happen with abnormal control flow,
4792    when the edge is traversed through a call.  This happens with
4793    non-local labels and exceptions.
4794
4795    This would not be necessary if we split the edge.  While this is
4796    normally impossible for abnormal critical edges, with some effort
4797    it should be possible with exception handling, since we still have
4798    control over which handler should be invoked.  But due to increased
4799    EH table sizes, this may not be worthwhile.  */
4800
4801 static void
4802 compute_transpout ()
4803 {
4804   int bb;
4805   unsigned int i;
4806   struct expr *expr;
4807
4808   sbitmap_vector_ones (transpout, n_basic_blocks);
4809
4810   for (bb = 0; bb < n_basic_blocks; ++bb)
4811     {
4812       /* Note that flow inserted a nop a the end of basic blocks that
4813          end in call instructions for reasons other than abnormal
4814          control flow.  */
4815       if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4816         continue;
4817
4818       for (i = 0; i < expr_hash_table_size; i++)
4819         for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4820           if (GET_CODE (expr->expr) == MEM)
4821             {
4822               if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
4823                   && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
4824                 continue;
4825
4826               /* ??? Optimally, we would use interprocedural alias
4827                  analysis to determine if this mem is actually killed
4828                  by this call.  */
4829               RESET_BIT (transpout[bb], expr->bitmap_index);
4830             }
4831     }
4832 }
4833
4834 /* Removal of useless null pointer checks */
4835
4836 /* Called via note_stores.  X is set by SETTER.  If X is a register we must
4837    invalidate nonnull_local and set nonnull_killed.  DATA is really a
4838    `null_pointer_info *'.
4839
4840    We ignore hard registers.  */
4841
4842 static void
4843 invalidate_nonnull_info (x, setter, data)
4844      rtx x;
4845      rtx setter ATTRIBUTE_UNUSED;
4846      void *data;
4847 {
4848   unsigned int regno;
4849   struct null_pointer_info *npi = (struct null_pointer_info *) data;
4850
4851   while (GET_CODE (x) == SUBREG)
4852     x = SUBREG_REG (x);
4853
4854   /* Ignore anything that is not a register or is a hard register.  */
4855   if (GET_CODE (x) != REG
4856       || REGNO (x) < npi->min_reg
4857       || REGNO (x) >= npi->max_reg)
4858     return;
4859
4860   regno = REGNO (x) - npi->min_reg;
4861
4862   RESET_BIT (npi->nonnull_local[npi->current_block], regno);
4863   SET_BIT (npi->nonnull_killed[npi->current_block], regno);
4864 }
4865
4866 /* Do null-pointer check elimination for the registers indicated in
4867    NPI.  NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
4868    they are not our responsibility to free.  */
4869
4870 static void
4871 delete_null_pointer_checks_1 (block_reg, nonnull_avin, nonnull_avout, npi)
4872      unsigned int *block_reg;
4873      sbitmap *nonnull_avin;
4874      sbitmap *nonnull_avout;
4875      struct null_pointer_info *npi;
4876 {
4877   int bb;
4878   int current_block;
4879   sbitmap *nonnull_local = npi->nonnull_local;
4880   sbitmap *nonnull_killed = npi->nonnull_killed;
4881
4882   /* Compute local properties, nonnull and killed.  A register will have
4883      the nonnull property if at the end of the current block its value is
4884      known to be nonnull.  The killed property indicates that somewhere in
4885      the block any information we had about the register is killed.
4886
4887      Note that a register can have both properties in a single block.  That
4888      indicates that it's killed, then later in the block a new value is
4889      computed.  */
4890   sbitmap_vector_zero (nonnull_local, n_basic_blocks);
4891   sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
4892
4893   for (current_block = 0; current_block < n_basic_blocks; current_block++)
4894     {
4895       rtx insn, stop_insn;
4896
4897       /* Set the current block for invalidate_nonnull_info.  */
4898       npi->current_block = current_block;
4899
4900       /* Scan each insn in the basic block looking for memory references and
4901          register sets.  */
4902       stop_insn = NEXT_INSN (BLOCK_END (current_block));
4903       for (insn = BLOCK_HEAD (current_block);
4904            insn != stop_insn;
4905            insn = NEXT_INSN (insn))
4906         {
4907           rtx set;
4908           rtx reg;
4909
4910           /* Ignore anything that is not a normal insn.  */
4911           if (! INSN_P (insn))
4912             continue;
4913
4914           /* Basically ignore anything that is not a simple SET.  We do have
4915              to make sure to invalidate nonnull_local and set nonnull_killed
4916              for such insns though.  */
4917           set = single_set (insn);
4918           if (!set)
4919             {
4920               note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
4921               continue;
4922             }
4923
4924           /* See if we've got a useable memory load.  We handle it first
4925              in case it uses its address register as a dest (which kills
4926              the nonnull property).  */
4927           if (GET_CODE (SET_SRC (set)) == MEM
4928               && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
4929               && REGNO (reg) >= npi->min_reg
4930               && REGNO (reg) < npi->max_reg)
4931             SET_BIT (nonnull_local[current_block],
4932                      REGNO (reg) - npi->min_reg);
4933
4934           /* Now invalidate stuff clobbered by this insn.  */
4935           note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
4936
4937           /* And handle stores, we do these last since any sets in INSN can
4938              not kill the nonnull property if it is derived from a MEM
4939              appearing in a SET_DEST.  */
4940           if (GET_CODE (SET_DEST (set)) == MEM
4941               && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
4942               && REGNO (reg) >= npi->min_reg
4943               && REGNO (reg) < npi->max_reg)
4944             SET_BIT (nonnull_local[current_block],
4945                      REGNO (reg) - npi->min_reg);
4946         }
4947     }
4948
4949   /* Now compute global properties based on the local properties.   This
4950      is a classic global availablity algorithm.  */
4951   compute_available (nonnull_local, nonnull_killed,
4952                      nonnull_avout, nonnull_avin);
4953
4954   /* Now look at each bb and see if it ends with a compare of a value
4955      against zero.  */
4956   for (bb = 0; bb < n_basic_blocks; bb++)
4957     {
4958       rtx last_insn = BLOCK_END (bb);
4959       rtx condition, earliest;
4960       int compare_and_branch;
4961
4962       /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
4963          since BLOCK_REG[BB] is zero if this block did not end with a
4964          comparison against zero, this condition works.  */
4965       if (block_reg[bb] < npi->min_reg
4966           || block_reg[bb] >= npi->max_reg)
4967         continue;
4968
4969       /* LAST_INSN is a conditional jump.  Get its condition.  */
4970       condition = get_condition (last_insn, &earliest);
4971
4972       /* If we can't determine the condition then skip.  */
4973       if (! condition)
4974         continue;
4975
4976       /* Is the register known to have a nonzero value?  */
4977       if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
4978         continue;
4979
4980       /* Try to compute whether the compare/branch at the loop end is one or
4981          two instructions.  */
4982       if (earliest == last_insn)
4983         compare_and_branch = 1;
4984       else if (earliest == prev_nonnote_insn (last_insn))
4985         compare_and_branch = 2;
4986       else
4987         continue;
4988
4989       /* We know the register in this comparison is nonnull at exit from
4990          this block.  We can optimize this comparison.  */
4991       if (GET_CODE (condition) == NE)
4992         {
4993           rtx new_jump;
4994
4995           new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
4996                                             last_insn);
4997           JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
4998           LABEL_NUSES (JUMP_LABEL (new_jump))++;
4999           emit_barrier_after (new_jump);
5000         }
5001       delete_insn (last_insn);
5002       if (compare_and_branch == 2)
5003         delete_insn (earliest);
5004
5005       /* Don't check this block again.  (Note that BLOCK_END is
5006          invalid here; we deleted the last instruction in the
5007          block.)  */
5008       block_reg[bb] = 0;
5009     }
5010 }
5011
5012 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5013    at compile time.
5014
5015    This is conceptually similar to global constant/copy propagation and
5016    classic global CSE (it even uses the same dataflow equations as cprop).
5017
5018    If a register is used as memory address with the form (mem (reg)), then we
5019    know that REG can not be zero at that point in the program.  Any instruction
5020    which sets REG "kills" this property.
5021
5022    So, if every path leading to a conditional branch has an available memory
5023    reference of that form, then we know the register can not have the value
5024    zero at the conditional branch.
5025
5026    So we merely need to compute the local properies and propagate that data
5027    around the cfg, then optimize where possible.
5028
5029    We run this pass two times.  Once before CSE, then again after CSE.  This
5030    has proven to be the most profitable approach.  It is rare for new
5031    optimization opportunities of this nature to appear after the first CSE
5032    pass.
5033
5034    This could probably be integrated with global cprop with a little work.  */
5035
5036 void
5037 delete_null_pointer_checks (f)
5038      rtx f ATTRIBUTE_UNUSED;
5039 {
5040   sbitmap *nonnull_avin, *nonnull_avout;
5041   unsigned int *block_reg;
5042   int bb;
5043   int reg;
5044   int regs_per_pass;
5045   int max_reg;
5046   struct null_pointer_info npi;
5047
5048   /* If we have only a single block, then there's nothing to do.  */
5049   if (n_basic_blocks <= 1)
5050     return;
5051
5052   /* Trying to perform global optimizations on flow graphs which have
5053      a high connectivity will take a long time and is unlikely to be
5054      particularly useful.
5055
5056      In normal circumstances a cfg should have about twice has many edges
5057      as blocks.  But we do not want to punish small functions which have
5058      a couple switch statements.  So we require a relatively large number
5059      of basic blocks and the ratio of edges to blocks to be high.  */
5060   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
5061     return;
5062
5063   /* We need four bitmaps, each with a bit for each register in each
5064      basic block.  */
5065   max_reg = max_reg_num ();
5066   regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
5067
5068   /* Allocate bitmaps to hold local and global properties.  */
5069   npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5070   npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5071   nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5072   nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5073
5074   /* Go through the basic blocks, seeing whether or not each block
5075      ends with a conditional branch whose condition is a comparison
5076      against zero.  Record the register compared in BLOCK_REG.  */
5077   block_reg = (unsigned int *) xcalloc (n_basic_blocks, sizeof (int));
5078   for (bb = 0; bb < n_basic_blocks; bb++)
5079     {
5080       rtx last_insn = BLOCK_END (bb);
5081       rtx condition, earliest, reg;
5082
5083       /* We only want conditional branches.  */
5084       if (GET_CODE (last_insn) != JUMP_INSN
5085           || !any_condjump_p (last_insn)
5086           || !onlyjump_p (last_insn))
5087         continue;
5088
5089       /* LAST_INSN is a conditional jump.  Get its condition.  */
5090       condition = get_condition (last_insn, &earliest);
5091
5092       /* If we were unable to get the condition, or it is not a equality
5093          comparison against zero then there's nothing we can do.  */
5094       if (!condition
5095           || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5096           || GET_CODE (XEXP (condition, 1)) != CONST_INT
5097           || (XEXP (condition, 1)
5098               != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
5099         continue;
5100
5101       /* We must be checking a register against zero.  */
5102       reg = XEXP (condition, 0);
5103       if (GET_CODE (reg) != REG)
5104         continue;
5105
5106       block_reg[bb] = REGNO (reg);
5107     }
5108
5109   /* Go through the algorithm for each block of registers.  */
5110   for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
5111     {
5112       npi.min_reg = reg;
5113       npi.max_reg = MIN (reg + regs_per_pass, max_reg);
5114       delete_null_pointer_checks_1 (block_reg, nonnull_avin,
5115                                     nonnull_avout, &npi);
5116     }
5117
5118   /* Free the table of registers compared at the end of every block.  */
5119   free (block_reg);
5120
5121   /* Free bitmaps.  */
5122   free (npi.nonnull_local);
5123   free (npi.nonnull_killed);
5124   free (nonnull_avin);
5125   free (nonnull_avout);
5126 }
5127
5128 /* Code Hoisting variables and subroutines.  */
5129
5130 /* Very busy expressions.  */
5131 static sbitmap *hoist_vbein;
5132 static sbitmap *hoist_vbeout;
5133
5134 /* Hoistable expressions.  */
5135 static sbitmap *hoist_exprs;
5136
5137 /* Dominator bitmaps.  */
5138 static sbitmap *dominators;
5139
5140 /* ??? We could compute post dominators and run this algorithm in
5141    reverse to to perform tail merging, doing so would probably be
5142    more effective than the tail merging code in jump.c.
5143
5144    It's unclear if tail merging could be run in parallel with
5145    code hoisting.  It would be nice.  */
5146
5147 /* Allocate vars used for code hoisting analysis.  */
5148
5149 static void
5150 alloc_code_hoist_mem (n_blocks, n_exprs)
5151      int n_blocks, n_exprs;
5152 {
5153   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5154   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5155   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5156
5157   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5158   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5159   hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5160   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5161
5162   dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5163 }
5164
5165 /* Free vars used for code hoisting analysis.  */
5166
5167 static void
5168 free_code_hoist_mem ()
5169 {
5170   free (antloc);
5171   free (transp);
5172   free (comp);
5173
5174   free (hoist_vbein);
5175   free (hoist_vbeout);
5176   free (hoist_exprs);
5177   free (transpout);
5178
5179   free (dominators);
5180 }
5181
5182 /* Compute the very busy expressions at entry/exit from each block.
5183
5184    An expression is very busy if all paths from a given point
5185    compute the expression.  */
5186
5187 static void
5188 compute_code_hoist_vbeinout ()
5189 {
5190   int bb, changed, passes;
5191
5192   sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5193   sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5194
5195   passes = 0;
5196   changed = 1;
5197
5198   while (changed)
5199     {
5200       changed = 0;
5201
5202       /* We scan the blocks in the reverse order to speed up
5203          the convergence.  */
5204       for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5205         {
5206           changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5207                                            hoist_vbeout[bb], transp[bb]);
5208           if (bb != n_basic_blocks - 1)
5209             sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
5210         }
5211
5212       passes++;
5213     }
5214
5215   if (gcse_file)
5216     fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5217 }
5218
5219 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
5220
5221 static void
5222 compute_code_hoist_data ()
5223 {
5224   compute_local_properties (transp, comp, antloc, 0);
5225   compute_transpout ();
5226   compute_code_hoist_vbeinout ();
5227   compute_flow_dominators (dominators, NULL);
5228   if (gcse_file)
5229     fprintf (gcse_file, "\n");
5230 }
5231
5232 /* Determine if the expression identified by EXPR_INDEX would
5233    reach BB unimpared if it was placed at the end of EXPR_BB.
5234
5235    It's unclear exactly what Muchnick meant by "unimpared".  It seems
5236    to me that the expression must either be computed or transparent in
5237    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
5238    would allow the expression to be hoisted out of loops, even if
5239    the expression wasn't a loop invariant.
5240
5241    Contrast this to reachability for PRE where an expression is
5242    considered reachable if *any* path reaches instead of *all*
5243    paths.  */
5244
5245 static int
5246 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5247      int expr_bb;
5248      int expr_index;
5249      int bb;
5250      char *visited;
5251 {
5252   edge pred;
5253   int visited_allocated_locally = 0;
5254
5255
5256   if (visited == NULL)
5257     {
5258        visited_allocated_locally = 1;
5259        visited = xcalloc (n_basic_blocks, 1);
5260     }
5261
5262   visited[expr_bb] = 1;
5263   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5264     {
5265       int pred_bb = pred->src->index;
5266
5267       if (pred->src == ENTRY_BLOCK_PTR)
5268         break;
5269       else if (visited[pred_bb])
5270         continue;
5271
5272       /* Does this predecessor generate this expression?  */
5273       else if (TEST_BIT (comp[pred_bb], expr_index))
5274         break;
5275       else if (! TEST_BIT (transp[pred_bb], expr_index))
5276         break;
5277
5278       /* Not killed.  */
5279       else
5280         {
5281           visited[pred_bb] = 1;
5282           if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5283                                            pred_bb, visited))
5284             break;
5285         }
5286     }
5287   if (visited_allocated_locally)
5288     free (visited);
5289
5290   return (pred == NULL);
5291 }
5292 \f
5293 /* Actually perform code hoisting.  */
5294
5295 static void
5296 hoist_code ()
5297 {
5298   int bb, dominated;
5299   unsigned int i;
5300   struct expr **index_map;
5301   struct expr *expr;
5302
5303   sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5304
5305   /* Compute a mapping from expression number (`bitmap_index') to
5306      hash table entry.  */
5307
5308   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
5309   for (i = 0; i < expr_hash_table_size; i++)
5310     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5311       index_map[expr->bitmap_index] = expr;
5312
5313   /* Walk over each basic block looking for potentially hoistable
5314      expressions, nothing gets hoisted from the entry block.  */
5315   for (bb = 0; bb < n_basic_blocks; bb++)
5316     {
5317       int found = 0;
5318       int insn_inserted_p;
5319
5320       /* Examine each expression that is very busy at the exit of this
5321          block.  These are the potentially hoistable expressions.  */
5322       for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5323         {
5324           int hoistable = 0;
5325
5326           if (TEST_BIT (hoist_vbeout[bb], i) && TEST_BIT (transpout[bb], i))
5327             {
5328               /* We've found a potentially hoistable expression, now
5329                  we look at every block BB dominates to see if it
5330                  computes the expression.  */
5331               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5332                 {
5333                   /* Ignore self dominance.  */
5334                   if (bb == dominated
5335                       || ! TEST_BIT (dominators[dominated], bb))
5336                     continue;
5337
5338                   /* We've found a dominated block, now see if it computes
5339                      the busy expression and whether or not moving that
5340                      expression to the "beginning" of that block is safe.  */
5341                   if (!TEST_BIT (antloc[dominated], i))
5342                     continue;
5343
5344                   /* Note if the expression would reach the dominated block
5345                      unimpared if it was placed at the end of BB.
5346
5347                      Keep track of how many times this expression is hoistable
5348                      from a dominated block into BB.  */
5349                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5350                     hoistable++;
5351                 }
5352
5353               /* If we found more than one hoistable occurence of this
5354                  expression, then note it in the bitmap of expressions to
5355                  hoist.  It makes no sense to hoist things which are computed
5356                  in only one BB, and doing so tends to pessimize register
5357                  allocation.  One could increase this value to try harder
5358                  to avoid any possible code expansion due to register
5359                  allocation issues; however experiments have shown that
5360                  the vast majority of hoistable expressions are only movable
5361                  from two successors, so raising this threshhold is likely
5362                  to nullify any benefit we get from code hoisting.  */
5363               if (hoistable > 1)
5364                 {
5365                   SET_BIT (hoist_exprs[bb], i);
5366                   found = 1;
5367                 }
5368             }
5369         }
5370
5371       /* If we found nothing to hoist, then quit now.  */
5372       if (! found)
5373         continue;
5374
5375       /* Loop over all the hoistable expressions.  */
5376       for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5377         {
5378           /* We want to insert the expression into BB only once, so
5379              note when we've inserted it.  */
5380           insn_inserted_p = 0;
5381
5382           /* These tests should be the same as the tests above.  */
5383           if (TEST_BIT (hoist_vbeout[bb], i))
5384             {
5385               /* We've found a potentially hoistable expression, now
5386                  we look at every block BB dominates to see if it
5387                  computes the expression.  */
5388               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5389                 {
5390                   /* Ignore self dominance.  */
5391                   if (bb == dominated
5392                       || ! TEST_BIT (dominators[dominated], bb))
5393                     continue;
5394
5395                   /* We've found a dominated block, now see if it computes
5396                      the busy expression and whether or not moving that
5397                      expression to the "beginning" of that block is safe.  */
5398                   if (!TEST_BIT (antloc[dominated], i))
5399                     continue;
5400
5401                   /* The expression is computed in the dominated block and
5402                      it would be safe to compute it at the start of the
5403                      dominated block.  Now we have to determine if the
5404                      expresion would reach the dominated block if it was
5405                      placed at the end of BB.  */
5406                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5407                     {
5408                       struct expr *expr = index_map[i];
5409                       struct occr *occr = expr->antic_occr;
5410                       rtx insn;
5411                       rtx set;
5412
5413                       /* Find the right occurence of this expression.  */
5414                       while (BLOCK_NUM (occr->insn) != dominated && occr)
5415                         occr = occr->next;
5416
5417                       /* Should never happen.  */
5418                       if (!occr)
5419                         abort ();
5420
5421                       insn = occr->insn;
5422
5423                       set = single_set (insn);
5424                       if (! set)
5425                         abort ();
5426
5427                       /* Create a pseudo-reg to store the result of reaching
5428                          expressions into.  Get the mode for the new pseudo
5429                          from the mode of the original destination pseudo.  */
5430                       if (expr->reaching_reg == NULL)
5431                         expr->reaching_reg
5432                           = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5433
5434                       /* In theory this should never fail since we're creating
5435                          a reg->reg copy.
5436
5437                          However, on the x86 some of the movXX patterns
5438                          actually contain clobbers of scratch regs.  This may
5439                          cause the insn created by validate_change to not
5440                          match any pattern and thus cause validate_change to
5441                          fail.  */
5442                       if (validate_change (insn, &SET_SRC (set),
5443                                            expr->reaching_reg, 0))
5444                         {
5445                           occr->deleted_p = 1;
5446                           if (!insn_inserted_p)
5447                             {
5448                               insert_insn_end_bb (index_map[i], bb, 0);
5449                               insn_inserted_p = 1;
5450                             }
5451                         }
5452                     }
5453                 }
5454             }
5455         }
5456     }
5457
5458     free (index_map);
5459 }
5460
5461 /* Top level routine to perform one code hoisting (aka unification) pass
5462
5463    Return non-zero if a change was made.  */
5464
5465 static int
5466 one_code_hoisting_pass ()
5467 {
5468   int changed = 0;
5469
5470   alloc_expr_hash_table (max_cuid);
5471   compute_expr_hash_table ();
5472   if (gcse_file)
5473     dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5474                      expr_hash_table_size, n_exprs);
5475
5476   if (n_exprs > 0)
5477     {
5478       alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5479       compute_code_hoist_data ();
5480       hoist_code ();
5481       free_code_hoist_mem ();
5482     }
5483
5484   free_expr_hash_table ();
5485
5486   return changed;
5487 }