gcc/gcse.c

   1 /* Partial redundancy elimination / Hoisting for RTL.
   2    Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
   3    2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* TODO
  22    - reordering of memory allocation and freeing to be more space efficient
  23    - do rough calc of how many regs are needed in each block, and a rough
  24      calc of how many regs are available in each class and use that to
  25      throttle back the code in cases where RTX_COST is minimal.
  26    - a store to the same address as a load does not kill the load if the
  27      source of the store is also the destination of the load.  Handling this
  28      allows more load motion, particularly out of loops.
  29
  30 */
  31
  32 /* References searched while implementing this.
  33
  34    Compilers Principles, Techniques and Tools
  35    Aho, Sethi, Ullman
  36    Addison-Wesley, 1988
  37
  38    Global Optimization by Suppression of Partial Redundancies
  39    E. Morel, C. Renvoise
  40    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  41
  42    A Portable Machine-Independent Global Optimizer - Design and Measurements
  43    Frederick Chow
  44    Stanford Ph.D. thesis, Dec. 1983
  45
  46    A Fast Algorithm for Code Movement Optimization
  47    D.M. Dhamdhere
  48    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  49
  50    A Solution to a Problem with Morel and Renvoise's
  51    Global Optimization by Suppression of Partial Redundancies
  52    K-H Drechsler, M.P. Stadel
  53    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  54
  55    Practical Adaptation of the Global Optimization
  56    Algorithm of Morel and Renvoise
  57    D.M. Dhamdhere
  58    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  59
  60    Efficiently Computing Static Single Assignment Form and the Control
  61    Dependence Graph
  62    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  63    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  64
  65    Lazy Code Motion
  66    J. Knoop, O. Ruthing, B. Steffen
  67    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  68
  69    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  70    Time for Reducible Flow Control
  71    Thomas Ball
  72    ACM Letters on Programming Languages and Systems,
  73    Vol. 2, Num. 1-4, Mar-Dec 1993
  74
  75    An Efficient Representation for Sparse Sets
  76    Preston Briggs, Linda Torczon
  77    ACM Letters on Programming Languages and Systems,
  78    Vol. 2, Num. 1-4, Mar-Dec 1993
  79
  80    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  81    K-H Drechsler, M.P. Stadel
  82    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  83
  84    Partial Dead Code Elimination
  85    J. Knoop, O. Ruthing, B. Steffen
  86    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  87
  88    Effective Partial Redundancy Elimination
  89    P. Briggs, K.D. Cooper
  90    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  91
  92    The Program Structure Tree: Computing Control Regions in Linear Time
  93    R. Johnson, D. Pearson, K. Pingali
  94    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  95
  96    Optimal Code Motion: Theory and Practice
  97    J. Knoop, O. Ruthing, B. Steffen
  98    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
  99
 100    The power of assignment motion
 101    J. Knoop, O. Ruthing, B. Steffen
 102    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 103
 104    Global code motion / global value numbering
 105    C. Click
 106    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 107
 108    Value Driven Redundancy Elimination
 109    L.T. Simpson
 110    Rice University Ph.D. thesis, Apr. 1996
 111
 112    Value Numbering
 113    L.T. Simpson
 114    Massively Scalar Compiler Project, Rice University, Sep. 1996
 115
 116    High Performance Compilers for Parallel Computing
 117    Michael Wolfe
 118    Addison-Wesley, 1996
 119
 120    Advanced Compiler Design and Implementation
 121    Steven Muchnick
 122    Morgan Kaufmann, 1997
 123
 124    Building an Optimizing Compiler
 125    Robert Morgan
 126    Digital Press, 1998
 127
 128    People wishing to speed up the code here should read:
 129      Elimination Algorithms for Data Flow Analysis
 130      B.G. Ryder, M.C. Paull
 131      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 132
 133      How to Analyze Large Programs Efficiently and Informatively
 134      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 135      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 136
 137    People wishing to do something different can find various possibilities
 138    in the above papers and elsewhere.
 139 */
 140
 141 #include "config.h"
 142 #include "system.h"
 143 #include "coretypes.h"
 144 #include "tm.h"
 145 #include "diagnostic-core.h"
 146 #include "toplev.h"
 147
 148 #include "rtl.h"
 149 #include "tree.h"
 150 #include "tm_p.h"
 151 #include "regs.h"
 152 #include "hard-reg-set.h"
 153 #include "flags.h"
 154 #include "insn-config.h"
 155 #include "recog.h"
 156 #include "basic-block.h"
 157 #include "output.h"
 158 #include "function.h"
 159 #include "expr.h"
 160 #include "except.h"
 161 #include "ggc.h"
 162 #include "params.h"
 163 #include "cselib.h"
 164 #include "intl.h"
 165 #include "obstack.h"
 166 #include "timevar.h"
 167 #include "tree-pass.h"
 168 #include "hashtab.h"
 169 #include "df.h"
 170 #include "dbgcnt.h"
 171 #include "target.h"
 172 #include "gcse.h"
 173
 174 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 175    are a superset of those done by classic GCSE.
 176
 177    Two passes of copy/constant propagation are done around PRE or hoisting
 178    because the first one enables more GCSE and the second one helps to clean
 179    up the copies that PRE and HOIST create.  This is needed more for PRE than
 180    for HOIST because code hoisting will try to use an existing register
 181    containing the common subexpression rather than create a new one.  This is
 182    harder to do for PRE because of the code motion (which HOIST doesn't do).
 183
 184    Expressions we are interested in GCSE-ing are of the form
 185    (set (pseudo-reg) (expression)).
 186    Function want_to_gcse_p says what these are.
 187
 188    In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
 189    This allows PRE to hoist expressions that are expressed in multiple insns,
 190    such as complex address calculations (e.g. for PIC code, or loads with a
 191    high part and a low part).
 192
 193    PRE handles moving invariant expressions out of loops (by treating them as
 194    partially redundant).
 195
 196    **********************
 197
 198    We used to support multiple passes but there are diminishing returns in
 199    doing so.  The first pass usually makes 90% of the changes that are doable.
 200    A second pass can make a few more changes made possible by the first pass.
 201    Experiments show any further passes don't make enough changes to justify
 202    the expense.
 203
 204    A study of spec92 using an unlimited number of passes:
 205    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 206    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 207    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 208
 209    It was found doing copy propagation between each pass enables further
 210    substitutions.
 211
 212    This study was done before expressions in REG_EQUAL notes were added as
 213    candidate expressions for optimization, and before the GIMPLE optimizers
 214    were added.  Probably, multiple passes is even less efficient now than
 215    at the time when the study was conducted.
 216
 217    PRE is quite expensive in complicated functions because the DFA can take
 218    a while to converge.  Hence we only perform one pass.
 219
 220    **********************
 221
 222    The steps for PRE are:
 223
 224    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 225
 226    2) Perform the data flow analysis for PRE.
 227
 228    3) Delete the redundant instructions
 229
 230    4) Insert the required copies [if any] that make the partially
 231       redundant instructions fully redundant.
 232
 233    5) For other reaching expressions, insert an instruction to copy the value
 234       to a newly created pseudo that will reach the redundant instruction.
 235
 236    The deletion is done first so that when we do insertions we
 237    know which pseudo reg to use.
 238
 239    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 240    argue it is not.  The number of iterations for the algorithm to converge
 241    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 242
 243    PRE GCSE depends heavily on the second CPROP pass to clean up the copies
 244    we create.  To make an expression reach the place where it's redundant,
 245    the result of the expression is copied to a new register, and the redundant
 246    expression is deleted by replacing it with this new register.  Classic GCSE
 247    doesn't have this problem as much as it computes the reaching defs of
 248    each register in each block and thus can try to use an existing
 249    register.  */
 250 \f
 251 /* GCSE global vars.  */
 252
 253 struct target_gcse default_target_gcse;
 254 #if SWITCHABLE_TARGET
 255 struct target_gcse *this_target_gcse = &default_target_gcse;
 256 #endif
 257
 258 /* Set to non-zero if CSE should run after all GCSE optimizations are done.  */
 259 int flag_rerun_cse_after_global_opts;
 260
 261 /* An obstack for our working variables.  */
 262 static struct obstack gcse_obstack;
 263
 264 struct reg_use {rtx reg_rtx; };
 265
 266 /* Hash table of expressions.  */
 267
 268 struct expr
 269 {
 270   /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
 271   rtx expr;
 272   /* Index in the available expression bitmaps.  */
 273   int bitmap_index;
 274   /* Next entry with the same hash.  */
 275   struct expr *next_same_hash;
 276   /* List of anticipatable occurrences in basic blocks in the function.
 277      An "anticipatable occurrence" is one that is the first occurrence in the
 278      basic block, the operands are not modified in the basic block prior
 279      to the occurrence and the output is not used between the start of
 280      the block and the occurrence.  */
 281   struct occr *antic_occr;
 282   /* List of available occurrence in basic blocks in the function.
 283      An "available occurrence" is one that is the last occurrence in the
 284      basic block and the operands are not modified by following statements in
 285      the basic block [including this insn].  */
 286   struct occr *avail_occr;
 287   /* Non-null if the computation is PRE redundant.
 288      The value is the newly created pseudo-reg to record a copy of the
 289      expression in all the places that reach the redundant copy.  */
 290   rtx reaching_reg;
 291   /* Maximum distance in instructions this expression can travel.
 292      We avoid moving simple expressions for more than a few instructions
 293      to keep register pressure under control.
 294      A value of "0" removes restrictions on how far the expression can
 295      travel.  */
 296   int max_distance;
 297 };
 298
 299 /* Occurrence of an expression.
 300    There is one per basic block.  If a pattern appears more than once the
 301    last appearance is used [or first for anticipatable expressions].  */
 302
 303 struct occr
 304 {
 305   /* Next occurrence of this expression.  */
 306   struct occr *next;
 307   /* The insn that computes the expression.  */
 308   rtx insn;
 309   /* Nonzero if this [anticipatable] occurrence has been deleted.  */
 310   char deleted_p;
 311   /* Nonzero if this [available] occurrence has been copied to
 312      reaching_reg.  */
 313   /* ??? This is mutually exclusive with deleted_p, so they could share
 314      the same byte.  */
 315   char copied_p;
 316 };
 317
 318 typedef struct occr *occr_t;
 319 DEF_VEC_P (occr_t);
 320 DEF_VEC_ALLOC_P (occr_t, heap);
 321
 322 /* Expression hash tables.
 323    Each hash table is an array of buckets.
 324    ??? It is known that if it were an array of entries, structure elements
 325    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 326    not clear whether in the final analysis a sufficient amount of memory would
 327    be saved as the size of the available expression bitmaps would be larger
 328    [one could build a mapping table without holes afterwards though].
 329    Someday I'll perform the computation and figure it out.  */
 330
 331 struct hash_table_d
 332 {
 333   /* The table itself.
 334      This is an array of `expr_hash_table_size' elements.  */
 335   struct expr **table;
 336
 337   /* Size of the hash table, in elements.  */
 338   unsigned int size;
 339
 340   /* Number of hash table elements.  */
 341   unsigned int n_elems;
 342 };
 343
 344 /* Expression hash table.  */
 345 static struct hash_table_d expr_hash_table;
 346
 347 /* This is a list of expressions which are MEMs and will be used by load
 348    or store motion.
 349    Load motion tracks MEMs which aren't killed by
 350    anything except itself. (i.e., loads and stores to a single location).
 351    We can then allow movement of these MEM refs with a little special
 352    allowance. (all stores copy the same value to the reaching reg used
 353    for the loads).  This means all values used to store into memory must have
 354    no side effects so we can re-issue the setter value.
 355    Store Motion uses this structure as an expression table to track stores
 356    which look interesting, and might be moveable towards the exit block.  */
 357
 358 struct ls_expr
 359 {
 360   struct expr * expr;           /* Gcse expression reference for LM.  */
 361   rtx pattern;                  /* Pattern of this mem.  */
 362   rtx pattern_regs;             /* List of registers mentioned by the mem.  */
 363   rtx loads;                    /* INSN list of loads seen.  */
 364   rtx stores;                   /* INSN list of stores seen.  */
 365   struct ls_expr * next;        /* Next in the list.  */
 366   int invalid;                  /* Invalid for some reason.  */
 367   int index;                    /* If it maps to a bitmap index.  */
 368   unsigned int hash_index;      /* Index when in a hash table.  */
 369   rtx reaching_reg;             /* Register to use when re-writing.  */
 370 };
 371
 372 /* Head of the list of load/store memory refs.  */
 373 static struct ls_expr * pre_ldst_mems = NULL;
 374
 375 /* Hashtable for the load/store memory refs.  */
 376 static htab_t pre_ldst_table = NULL;
 377
 378 /* Bitmap containing one bit for each register in the program.
 379    Used when performing GCSE to track which registers have been set since
 380    the start of the basic block.  */
 381 static regset reg_set_bitmap;
 382
 383 /* Array, indexed by basic block number for a list of insns which modify
 384    memory within that block.  */
 385 static VEC (rtx,heap) **modify_mem_list;
 386 static bitmap modify_mem_list_set;
 387
 388 typedef struct modify_pair_s
 389 {
 390   rtx dest;                     /* A MEM.  */
 391   rtx dest_addr;                /* The canonical address of `dest'.  */
 392 } modify_pair;
 393
 394 DEF_VEC_O(modify_pair);
 395 DEF_VEC_ALLOC_O(modify_pair,heap);
 396
 397 /* This array parallels modify_mem_list, except that it stores MEMs
 398    being set and their canonicalized memory addresses.  */
 399 static VEC (modify_pair,heap) **canon_modify_mem_list;
 400
 401 /* Bitmap indexed by block numbers to record which blocks contain
 402    function calls.  */
 403 static bitmap blocks_with_calls;
 404
 405 /* Various variables for statistics gathering.  */
 406
 407 /* Memory used in a pass.
 408    This isn't intended to be absolutely precise.  Its intent is only
 409    to keep an eye on memory usage.  */
 410 static int bytes_used;
 411
 412 /* GCSE substitutions made.  */
 413 static int gcse_subst_count;
 414 /* Number of copy instructions created.  */
 415 static int gcse_create_count;
 416 \f
 417 /* Doing code hoisting.  */
 418 static bool doing_code_hoisting_p = false;
 419 \f
 420 /* For available exprs */
 421 static sbitmap *ae_kill;
 422 \f
 423 static void compute_can_copy (void);
 424 static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
 425 static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
 426 static void *gcse_alloc (unsigned long);
 427 static void alloc_gcse_mem (void);
 428 static void free_gcse_mem (void);
 429 static void hash_scan_insn (rtx, struct hash_table_d *);
 430 static void hash_scan_set (rtx, rtx, struct hash_table_d *);
 431 static void hash_scan_clobber (rtx, rtx, struct hash_table_d *);
 432 static void hash_scan_call (rtx, rtx, struct hash_table_d *);
 433 static int want_to_gcse_p (rtx, int *);
 434 static int oprs_unchanged_p (const_rtx, const_rtx, int);
 435 static int oprs_anticipatable_p (const_rtx, const_rtx);
 436 static int oprs_available_p (const_rtx, const_rtx);
 437 static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int,
 438                                   struct hash_table_d *);
 439 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
 440 static int expr_equiv_p (const_rtx, const_rtx);
 441 static void record_last_reg_set_info (rtx, int);
 442 static void record_last_mem_set_info (rtx);
 443 static void record_last_set_info (rtx, const_rtx, void *);
 444 static void compute_hash_table (struct hash_table_d *);
 445 static void alloc_hash_table (struct hash_table_d *);
 446 static void free_hash_table (struct hash_table_d *);
 447 static void compute_hash_table_work (struct hash_table_d *);
 448 static void dump_hash_table (FILE *, const char *, struct hash_table_d *);
 449 static void compute_transp (const_rtx, int, sbitmap *);
 450 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
 451                                       struct hash_table_d *);
 452 static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
 453 static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
 454 static void canon_list_insert (rtx, const_rtx, void *);
 455 static void alloc_pre_mem (int, int);
 456 static void free_pre_mem (void);
 457 static void compute_pre_data (void);
 458 static int pre_expr_reaches_here_p (basic_block, struct expr *,
 459                                     basic_block);
 460 static void insert_insn_end_basic_block (struct expr *, basic_block);
 461 static void pre_insert_copy_insn (struct expr *, rtx);
 462 static void pre_insert_copies (void);
 463 static int pre_delete (void);
 464 static int pre_gcse (void);
 465 static int one_pre_gcse_pass (void);
 466 static void add_label_notes (rtx, rtx);
 467 static void alloc_code_hoist_mem (int, int);
 468 static void free_code_hoist_mem (void);
 469 static void compute_code_hoist_vbeinout (void);
 470 static void compute_code_hoist_data (void);
 471 static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *,
 472                                       int, int *);
 473 static int hoist_code (void);
 474 static int one_code_hoisting_pass (void);
 475 static rtx process_insert_insn (struct expr *);
 476 static int pre_edge_insert (struct edge_list *, struct expr **);
 477 static int pre_expr_reaches_here_p_work (basic_block, struct expr *,
 478                                          basic_block, char *);
 479 static struct ls_expr * ldst_entry (rtx);
 480 static void free_ldst_entry (struct ls_expr *);
 481 static void free_ldst_mems (void);
 482 static void print_ldst_list (FILE *);
 483 static struct ls_expr * find_rtx_in_ldst (rtx);
 484 static inline struct ls_expr * first_ls_expr (void);
 485 static inline struct ls_expr * next_ls_expr (struct ls_expr *);
 486 static int simple_mem (const_rtx);
 487 static void invalidate_any_buried_refs (rtx);
 488 static void compute_ld_motion_mems (void);
 489 static void trim_ld_motion_mems (void);
 490 static void update_ld_motion_stores (struct expr *);
 491 static void clear_modify_mem_tables (void);
 492 static void free_modify_mem_tables (void);
 493 static rtx gcse_emit_move_after (rtx, rtx, rtx);
 494 static bool is_too_expensive (const char *);
 495
 496 #define GNEW(T)                 ((T *) gmalloc (sizeof (T)))
 497 #define GCNEW(T)                ((T *) gcalloc (1, sizeof (T)))
 498
 499 #define GNEWVEC(T, N)           ((T *) gmalloc (sizeof (T) * (N)))
 500 #define GCNEWVEC(T, N)          ((T *) gcalloc ((N), sizeof (T)))
 501
 502 #define GNEWVAR(T, S)           ((T *) gmalloc ((S)))
 503 #define GCNEWVAR(T, S)          ((T *) gcalloc (1, (S)))
 504
 505 #define GOBNEW(T)               ((T *) gcse_alloc (sizeof (T)))
 506 #define GOBNEWVAR(T, S)         ((T *) gcse_alloc ((S)))
 507 \f
 508 /* Misc. utilities.  */
 509
 510 #define can_copy \
 511   (this_target_gcse->x_can_copy)
 512 #define can_copy_init_p \
 513   (this_target_gcse->x_can_copy_init_p)
 514
 515 /* Compute which modes support reg/reg copy operations.  */
 516
 517 static void
 518 compute_can_copy (void)
 519 {
 520   int i;
 521 #ifndef AVOID_CCMODE_COPIES
 522   rtx reg, insn;
 523 #endif
 524   memset (can_copy, 0, NUM_MACHINE_MODES);
 525
 526   start_sequence ();
 527   for (i = 0; i < NUM_MACHINE_MODES; i++)
 528     if (GET_MODE_CLASS (i) == MODE_CC)
 529       {
 530 #ifdef AVOID_CCMODE_COPIES
 531         can_copy[i] = 0;
 532 #else
 533         reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 534         insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 535         if (recog (PATTERN (insn), insn, NULL) >= 0)
 536           can_copy[i] = 1;
 537 #endif
 538       }
 539     else
 540       can_copy[i] = 1;
 541
 542   end_sequence ();
 543 }
 544
 545 /* Returns whether the mode supports reg/reg copy operations.  */
 546
 547 bool
 548 can_copy_p (enum machine_mode mode)
 549 {
 550   if (! can_copy_init_p)
 551     {
 552       compute_can_copy ();
 553       can_copy_init_p = true;
 554     }
 555
 556   return can_copy[mode] != 0;
 557 }
 558
 559 \f
 560 /* Cover function to xmalloc to record bytes allocated.  */
 561
 562 static void *
 563 gmalloc (size_t size)
 564 {
 565   bytes_used += size;
 566   return xmalloc (size);
 567 }
 568
 569 /* Cover function to xcalloc to record bytes allocated.  */
 570
 571 static void *
 572 gcalloc (size_t nelem, size_t elsize)
 573 {
 574   bytes_used += nelem * elsize;
 575   return xcalloc (nelem, elsize);
 576 }
 577
 578 /* Cover function to obstack_alloc.  */
 579
 580 static void *
 581 gcse_alloc (unsigned long size)
 582 {
 583   bytes_used += size;
 584   return obstack_alloc (&gcse_obstack, size);
 585 }
 586
 587 /* Allocate memory for the reg/memory set tracking tables.
 588    This is called at the start of each pass.  */
 589
 590 static void
 591 alloc_gcse_mem (void)
 592 {
 593   /* Allocate vars to track sets of regs.  */
 594   reg_set_bitmap = ALLOC_REG_SET (NULL);
 595
 596   /* Allocate array to keep a list of insns which modify memory in each
 597      basic block.  */
 598   modify_mem_list = GCNEWVEC (VEC (rtx,heap) *, last_basic_block);
 599   canon_modify_mem_list = GCNEWVEC (VEC (modify_pair,heap) *,
 600                                     last_basic_block);
 601   modify_mem_list_set = BITMAP_ALLOC (NULL);
 602   blocks_with_calls = BITMAP_ALLOC (NULL);
 603 }
 604
 605 /* Free memory allocated by alloc_gcse_mem.  */
 606
 607 static void
 608 free_gcse_mem (void)
 609 {
 610   FREE_REG_SET (reg_set_bitmap);
 611
 612   free_modify_mem_tables ();
 613   BITMAP_FREE (modify_mem_list_set);
 614   BITMAP_FREE (blocks_with_calls);
 615 }
 616 \f
 617 /* Compute the local properties of each recorded expression.
 618
 619    Local properties are those that are defined by the block, irrespective of
 620    other blocks.
 621
 622    An expression is transparent in a block if its operands are not modified
 623    in the block.
 624
 625    An expression is computed (locally available) in a block if it is computed
 626    at least once and expression would contain the same value if the
 627    computation was moved to the end of the block.
 628
 629    An expression is locally anticipatable in a block if it is computed at
 630    least once and expression would contain the same value if the computation
 631    was moved to the beginning of the block.
 632
 633    We call this routine for pre and code hoisting.  They all compute
 634    basically the same information and thus can easily share this code.
 635
 636    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
 637    properties.  If NULL, then it is not necessary to compute or record that
 638    particular property.
 639
 640    TABLE controls which hash table to look at.  */
 641
 642 static void
 643 compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
 644                           struct hash_table_d *table)
 645 {
 646   unsigned int i;
 647
 648   /* Initialize any bitmaps that were passed in.  */
 649   if (transp)
 650     {
 651       sbitmap_vector_ones (transp, last_basic_block);
 652     }
 653
 654   if (comp)
 655     sbitmap_vector_zero (comp, last_basic_block);
 656   if (antloc)
 657     sbitmap_vector_zero (antloc, last_basic_block);
 658
 659   for (i = 0; i < table->size; i++)
 660     {
 661       struct expr *expr;
 662
 663       for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
 664         {
 665           int indx = expr->bitmap_index;
 666           struct occr *occr;
 667
 668           /* The expression is transparent in this block if it is not killed.
 669              We start by assuming all are transparent [none are killed], and
 670              then reset the bits for those that are.  */
 671           if (transp)
 672             compute_transp (expr->expr, indx, transp);
 673
 674           /* The occurrences recorded in antic_occr are exactly those that
 675              we want to set to nonzero in ANTLOC.  */
 676           if (antloc)
 677             for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
 678               {
 679                 SET_BIT (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
 680
 681                 /* While we're scanning the table, this is a good place to
 682                    initialize this.  */
 683                 occr->deleted_p = 0;
 684               }
 685
 686           /* The occurrences recorded in avail_occr are exactly those that
 687              we want to set to nonzero in COMP.  */
 688           if (comp)
 689             for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
 690               {
 691                 SET_BIT (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
 692
 693                 /* While we're scanning the table, this is a good place to
 694                    initialize this.  */
 695                 occr->copied_p = 0;
 696               }
 697
 698           /* While we're scanning the table, this is a good place to
 699              initialize this.  */
 700           expr->reaching_reg = 0;
 701         }
 702     }
 703 }
 704 \f
 705 /* Hash table support.  */
 706
 707 struct reg_avail_info
 708 {
 709   basic_block last_bb;
 710   int first_set;
 711   int last_set;
 712 };
 713
 714 static struct reg_avail_info *reg_avail_info;
 715 static basic_block current_bb;
 716
 717
 718 /* See whether X, the source of a set, is something we want to consider for
 719    GCSE.  */
 720
 721 static int
 722 want_to_gcse_p (rtx x, int *max_distance_ptr)
 723 {
 724 #ifdef STACK_REGS
 725   /* On register stack architectures, don't GCSE constants from the
 726      constant pool, as the benefits are often swamped by the overhead
 727      of shuffling the register stack between basic blocks.  */
 728   if (IS_STACK_MODE (GET_MODE (x)))
 729     x = avoid_constant_pool_reference (x);
 730 #endif
 731
 732   /* GCSE'ing constants:
 733
 734      We do not specifically distinguish between constant and non-constant
 735      expressions in PRE and Hoist.  We use rtx_cost below to limit
 736      the maximum distance simple expressions can travel.
 737
 738      Nevertheless, constants are much easier to GCSE, and, hence,
 739      it is easy to overdo the optimizations.  Usually, excessive PRE and
 740      Hoisting of constant leads to increased register pressure.
 741
 742      RA can deal with this by rematerialing some of the constants.
 743      Therefore, it is important that the back-end generates sets of constants
 744      in a way that allows reload rematerialize them under high register
 745      pressure, i.e., a pseudo register with REG_EQUAL to constant
 746      is set only once.  Failing to do so will result in IRA/reload
 747      spilling such constants under high register pressure instead of
 748      rematerializing them.  */
 749
 750   switch (GET_CODE (x))
 751     {
 752     case REG:
 753     case SUBREG:
 754     case CALL:
 755       return 0;
 756
 757     case CONST_INT:
 758     case CONST_DOUBLE:
 759     case CONST_FIXED:
 760     case CONST_VECTOR:
 761       if (!doing_code_hoisting_p)
 762         /* Do not PRE constants.  */
 763         return 0;
 764
 765       /* FALLTHRU */
 766
 767     default:
 768       if (doing_code_hoisting_p)
 769         /* PRE doesn't implement max_distance restriction.  */
 770         {
 771           int cost;
 772           int max_distance;
 773
 774           gcc_assert (!optimize_function_for_speed_p (cfun)
 775                       && optimize_function_for_size_p (cfun));
 776           cost = rtx_cost (x, SET, 0);
 777
 778           if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST))
 779             {
 780               max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10;
 781               if (max_distance == 0)
 782                 return 0;
 783
 784               gcc_assert (max_distance > 0);
 785             }
 786           else
 787             max_distance = 0;
 788
 789           if (max_distance_ptr)
 790             *max_distance_ptr = max_distance;
 791         }
 792
 793       return can_assign_to_reg_without_clobbers_p (x);
 794     }
 795 }
 796
 797 /* Used internally by can_assign_to_reg_without_clobbers_p.  */
 798
 799 static GTY(()) rtx test_insn;
 800
 801 /* Return true if we can assign X to a pseudo register such that the
 802    resulting insn does not result in clobbering a hard register as a
 803    side-effect.
 804
 805    Additionally, if the target requires it, check that the resulting insn
 806    can be copied.  If it cannot, this means that X is special and probably
 807    has hidden side-effects we don't want to mess with.
 808
 809    This function is typically used by code motion passes, to verify
 810    that it is safe to insert an insn without worrying about clobbering
 811    maybe live hard regs.  */
 812
 813 bool
 814 can_assign_to_reg_without_clobbers_p (rtx x)
 815 {
 816   int num_clobbers = 0;
 817   int icode;
 818
 819   /* If this is a valid operand, we are OK.  If it's VOIDmode, we aren't.  */
 820   if (general_operand (x, GET_MODE (x)))
 821     return 1;
 822   else if (GET_MODE (x) == VOIDmode)
 823     return 0;
 824
 825   /* Otherwise, check if we can make a valid insn from it.  First initialize
 826      our test insn if we haven't already.  */
 827   if (test_insn == 0)
 828     {
 829       test_insn
 830         = make_insn_raw (gen_rtx_SET (VOIDmode,
 831                                       gen_rtx_REG (word_mode,
 832                                                    FIRST_PSEUDO_REGISTER * 2),
 833                                       const0_rtx));
 834       NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
 835     }
 836
 837   /* Now make an insn like the one we would make when GCSE'ing and see if
 838      valid.  */
 839   PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
 840   SET_SRC (PATTERN (test_insn)) = x;
 841
 842   icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
 843   if (icode < 0)
 844     return false;
 845
 846   if (num_clobbers > 0 && added_clobbers_hard_reg_p (icode))
 847     return false;
 848
 849   if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (test_insn))
 850     return false;
 851
 852   return true;
 853 }
 854
 855 /* Return nonzero if the operands of expression X are unchanged from the
 856    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
 857    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
 858
 859 static int
 860 oprs_unchanged_p (const_rtx x, const_rtx insn, int avail_p)
 861 {
 862   int i, j;
 863   enum rtx_code code;
 864   const char *fmt;
 865
 866   if (x == 0)
 867     return 1;
 868
 869   code = GET_CODE (x);
 870   switch (code)
 871     {
 872     case REG:
 873       {
 874         struct reg_avail_info *info = &reg_avail_info[REGNO (x)];
 875
 876         if (info->last_bb != current_bb)
 877           return 1;
 878         if (avail_p)
 879           return info->last_set < DF_INSN_LUID (insn);
 880         else
 881           return info->first_set >= DF_INSN_LUID (insn);
 882       }
 883
 884     case MEM:
 885       if (load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
 886                                   x, avail_p))
 887         return 0;
 888       else
 889         return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
 890
 891     case PRE_DEC:
 892     case PRE_INC:
 893     case POST_DEC:
 894     case POST_INC:
 895     case PRE_MODIFY:
 896     case POST_MODIFY:
 897       return 0;
 898
 899     case PC:
 900     case CC0: /*FIXME*/
 901     case CONST:
 902     case CONST_INT:
 903     case CONST_DOUBLE:
 904     case CONST_FIXED:
 905     case CONST_VECTOR:
 906     case SYMBOL_REF:
 907     case LABEL_REF:
 908     case ADDR_VEC:
 909     case ADDR_DIFF_VEC:
 910       return 1;
 911
 912     default:
 913       break;
 914     }
 915
 916   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
 917     {
 918       if (fmt[i] == 'e')
 919         {
 920           /* If we are about to do the last recursive call needed at this
 921              level, change it into iteration.  This function is called enough
 922              to be worth it.  */
 923           if (i == 0)
 924             return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
 925
 926           else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
 927             return 0;
 928         }
 929       else if (fmt[i] == 'E')
 930         for (j = 0; j < XVECLEN (x, i); j++)
 931           if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
 932             return 0;
 933     }
 934
 935   return 1;
 936 }
 937
 938 /* Used for communication between mems_conflict_for_gcse_p and
 939    load_killed_in_block_p.  Nonzero if mems_conflict_for_gcse_p finds a
 940    conflict between two memory references.  */
 941 static int gcse_mems_conflict_p;
 942
 943 /* Used for communication between mems_conflict_for_gcse_p and
 944    load_killed_in_block_p.  A memory reference for a load instruction,
 945    mems_conflict_for_gcse_p will see if a memory store conflicts with
 946    this memory load.  */
 947 static const_rtx gcse_mem_operand;
 948
 949 /* DEST is the output of an instruction.  If it is a memory reference, and
 950    possibly conflicts with the load found in gcse_mem_operand, then set
 951    gcse_mems_conflict_p to a nonzero value.  */
 952
 953 static void
 954 mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
 955                           void *data ATTRIBUTE_UNUSED)
 956 {
 957   while (GET_CODE (dest) == SUBREG
 958          || GET_CODE (dest) == ZERO_EXTRACT
 959          || GET_CODE (dest) == STRICT_LOW_PART)
 960     dest = XEXP (dest, 0);
 961
 962   /* If DEST is not a MEM, then it will not conflict with the load.  Note
 963      that function calls are assumed to clobber memory, but are handled
 964      elsewhere.  */
 965   if (! MEM_P (dest))
 966     return;
 967
 968   /* If we are setting a MEM in our list of specially recognized MEMs,
 969      don't mark as killed this time.  */
 970
 971   if (expr_equiv_p (dest, gcse_mem_operand) && pre_ldst_mems != NULL)
 972     {
 973       if (!find_rtx_in_ldst (dest))
 974         gcse_mems_conflict_p = 1;
 975       return;
 976     }
 977
 978   if (true_dependence (dest, GET_MODE (dest), gcse_mem_operand,
 979                        rtx_addr_varies_p))
 980     gcse_mems_conflict_p = 1;
 981 }
 982
 983 /* Return nonzero if the expression in X (a memory reference) is killed
 984    in block BB before or after the insn with the LUID in UID_LIMIT.
 985    AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
 986    before UID_LIMIT.
 987
 988    To check the entire block, set UID_LIMIT to max_uid + 1 and
 989    AVAIL_P to 0.  */
 990
 991 static int
 992 load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x, int avail_p)
 993 {
 994   VEC (rtx,heap) *list = modify_mem_list[bb->index];
 995   rtx setter;
 996   unsigned ix;
 997
 998   /* If this is a readonly then we aren't going to be changing it.  */
 999   if (MEM_READONLY_P (x))
1000     return 0;
1001
1002   FOR_EACH_VEC_ELT_REVERSE (rtx, list, ix, setter)
1003     {
1004       /* Ignore entries in the list that do not apply.  */
1005       if ((avail_p
1006            && DF_INSN_LUID (setter) < uid_limit)
1007           || (! avail_p
1008               && DF_INSN_LUID (setter) > uid_limit))
1009         continue;
1010
1011       /* If SETTER is a call everything is clobbered.  Note that calls
1012          to pure functions are never put on the list, so we need not
1013          worry about them.  */
1014       if (CALL_P (setter))
1015         return 1;
1016
1017       /* SETTER must be an INSN of some kind that sets memory.  Call
1018          note_stores to examine each hunk of memory that is modified.
1019
1020          The note_stores interface is pretty limited, so we have to
1021          communicate via global variables.  Yuk.  */
1022       gcse_mem_operand = x;
1023       gcse_mems_conflict_p = 0;
1024       note_stores (PATTERN (setter), mems_conflict_for_gcse_p, NULL);
1025       if (gcse_mems_conflict_p)
1026         return 1;
1027     }
1028   return 0;
1029 }
1030
1031 /* Return nonzero if the operands of expression X are unchanged from
1032    the start of INSN's basic block up to but not including INSN.  */
1033
1034 static int
1035 oprs_anticipatable_p (const_rtx x, const_rtx insn)
1036 {
1037   return oprs_unchanged_p (x, insn, 0);
1038 }
1039
1040 /* Return nonzero if the operands of expression X are unchanged from
1041    INSN to the end of INSN's basic block.  */
1042
1043 static int
1044 oprs_available_p (const_rtx x, const_rtx insn)
1045 {
1046   return oprs_unchanged_p (x, insn, 1);
1047 }
1048
1049 /* Hash expression X.
1050
1051    MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
1052    indicating if a volatile operand is found or if the expression contains
1053    something we don't want to insert in the table.  HASH_TABLE_SIZE is
1054    the current size of the hash table to be probed.  */
1055
1056 static unsigned int
1057 hash_expr (const_rtx x, enum machine_mode mode, int *do_not_record_p,
1058            int hash_table_size)
1059 {
1060   unsigned int hash;
1061
1062   *do_not_record_p = 0;
1063
1064   hash = hash_rtx (x, mode, do_not_record_p,
1065                    NULL,  /*have_reg_qty=*/false);
1066   return hash % hash_table_size;
1067 }
1068
1069 /* Return nonzero if exp1 is equivalent to exp2.  */
1070
1071 static int
1072 expr_equiv_p (const_rtx x, const_rtx y)
1073 {
1074   return exp_equiv_p (x, y, 0, true);
1075 }
1076
1077 /* Insert expression X in INSN in the hash TABLE.
1078    If it is already present, record it as the last occurrence in INSN's
1079    basic block.
1080
1081    MODE is the mode of the value X is being stored into.
1082    It is only used if X is a CONST_INT.
1083
1084    ANTIC_P is nonzero if X is an anticipatable expression.
1085    AVAIL_P is nonzero if X is an available expression.
1086
1087    MAX_DISTANCE is the maximum distance in instructions this expression can
1088    be moved.  */
1089
1090 static void
1091 insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
1092                       int avail_p, int max_distance, struct hash_table_d *table)
1093 {
1094   int found, do_not_record_p;
1095   unsigned int hash;
1096   struct expr *cur_expr, *last_expr = NULL;
1097   struct occr *antic_occr, *avail_occr;
1098
1099   hash = hash_expr (x, mode, &do_not_record_p, table->size);
1100
1101   /* Do not insert expression in table if it contains volatile operands,
1102      or if hash_expr determines the expression is something we don't want
1103      to or can't handle.  */
1104   if (do_not_record_p)
1105     return;
1106
1107   cur_expr = table->table[hash];
1108   found = 0;
1109
1110   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1111     {
1112       /* If the expression isn't found, save a pointer to the end of
1113          the list.  */
1114       last_expr = cur_expr;
1115       cur_expr = cur_expr->next_same_hash;
1116     }
1117
1118   if (! found)
1119     {
1120       cur_expr = GOBNEW (struct expr);
1121       bytes_used += sizeof (struct expr);
1122       if (table->table[hash] == NULL)
1123         /* This is the first pattern that hashed to this index.  */
1124         table->table[hash] = cur_expr;
1125       else
1126         /* Add EXPR to end of this hash chain.  */
1127         last_expr->next_same_hash = cur_expr;
1128
1129       /* Set the fields of the expr element.  */
1130       cur_expr->expr = x;
1131       cur_expr->bitmap_index = table->n_elems++;
1132       cur_expr->next_same_hash = NULL;
1133       cur_expr->antic_occr = NULL;
1134       cur_expr->avail_occr = NULL;
1135       gcc_assert (max_distance >= 0);
1136       cur_expr->max_distance = max_distance;
1137     }
1138   else
1139     gcc_assert (cur_expr->max_distance == max_distance);
1140
1141   /* Now record the occurrence(s).  */
1142   if (antic_p)
1143     {
1144       antic_occr = cur_expr->antic_occr;
1145
1146       if (antic_occr
1147           && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1148         antic_occr = NULL;
1149
1150       if (antic_occr)
1151         /* Found another instance of the expression in the same basic block.
1152            Prefer the currently recorded one.  We want the first one in the
1153            block and the block is scanned from start to end.  */
1154         ; /* nothing to do */
1155       else
1156         {
1157           /* First occurrence of this expression in this basic block.  */
1158           antic_occr = GOBNEW (struct occr);
1159           bytes_used += sizeof (struct occr);
1160           antic_occr->insn = insn;
1161           antic_occr->next = cur_expr->antic_occr;
1162           antic_occr->deleted_p = 0;
1163           cur_expr->antic_occr = antic_occr;
1164         }
1165     }
1166
1167   if (avail_p)
1168     {
1169       avail_occr = cur_expr->avail_occr;
1170
1171       if (avail_occr
1172           && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1173         {
1174           /* Found another instance of the expression in the same basic block.
1175              Prefer this occurrence to the currently recorded one.  We want
1176              the last one in the block and the block is scanned from start
1177              to end.  */
1178           avail_occr->insn = insn;
1179         }
1180       else
1181         {
1182           /* First occurrence of this expression in this basic block.  */
1183           avail_occr = GOBNEW (struct occr);
1184           bytes_used += sizeof (struct occr);
1185           avail_occr->insn = insn;
1186           avail_occr->next = cur_expr->avail_occr;
1187           avail_occr->deleted_p = 0;
1188           cur_expr->avail_occr = avail_occr;
1189         }
1190     }
1191 }
1192
1193 /* Scan pattern PAT of INSN and add an entry to the hash TABLE.  */
1194
1195 static void
1196 hash_scan_set (rtx pat, rtx insn, struct hash_table_d *table)
1197 {
1198   rtx src = SET_SRC (pat);
1199   rtx dest = SET_DEST (pat);
1200   rtx note;
1201
1202   if (GET_CODE (src) == CALL)
1203     hash_scan_call (src, insn, table);
1204
1205   else if (REG_P (dest))
1206     {
1207       unsigned int regno = REGNO (dest);
1208       int max_distance = 0;
1209
1210       /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1211
1212          This allows us to do a single GCSE pass and still eliminate
1213          redundant constants, addresses or other expressions that are
1214          constructed with multiple instructions.
1215
1216          However, keep the original SRC if INSN is a simple reg-reg move.
1217          In this case, there will almost always be a REG_EQUAL note on the
1218          insn that sets SRC.  By recording the REG_EQUAL value here as SRC
1219          for INSN, we miss copy propagation opportunities and we perform the
1220          same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1221          do more than one PRE GCSE pass.
1222
1223          Note that this does not impede profitable constant propagations.  We
1224          "look through" reg-reg sets in lookup_avail_set.  */
1225       note = find_reg_equal_equiv_note (insn);
1226       if (note != 0
1227           && REG_NOTE_KIND (note) == REG_EQUAL
1228           && !REG_P (src)
1229           && want_to_gcse_p (XEXP (note, 0), NULL))
1230         src = XEXP (note, 0), pat = gen_rtx_SET (VOIDmode, dest, src);
1231
1232       /* Only record sets of pseudo-regs in the hash table.  */
1233       if (regno >= FIRST_PSEUDO_REGISTER
1234           /* Don't GCSE something if we can't do a reg/reg copy.  */
1235           && can_copy_p (GET_MODE (dest))
1236           /* GCSE commonly inserts instruction after the insn.  We can't
1237              do that easily for EH edges so disable GCSE on these for now.  */
1238           /* ??? We can now easily create new EH landing pads at the
1239              gimple level, for splitting edges; there's no reason we
1240              can't do the same thing at the rtl level.  */
1241           && !can_throw_internal (insn)
1242           /* Is SET_SRC something we want to gcse?  */
1243           && want_to_gcse_p (src, &max_distance)
1244           /* Don't CSE a nop.  */
1245           && ! set_noop_p (pat)
1246           /* Don't GCSE if it has attached REG_EQUIV note.
1247              At this point this only function parameters should have
1248              REG_EQUIV notes and if the argument slot is used somewhere
1249              explicitly, it means address of parameter has been taken,
1250              so we should not extend the lifetime of the pseudo.  */
1251           && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1252         {
1253           /* An expression is not anticipatable if its operands are
1254              modified before this insn or if this is not the only SET in
1255              this insn.  The latter condition does not have to mean that
1256              SRC itself is not anticipatable, but we just will not be
1257              able to handle code motion of insns with multiple sets.  */
1258           int antic_p = oprs_anticipatable_p (src, insn)
1259                         && !multiple_sets (insn);
1260           /* An expression is not available if its operands are
1261              subsequently modified, including this insn.  It's also not
1262              available if this is a branch, because we can't insert
1263              a set after the branch.  */
1264           int avail_p = (oprs_available_p (src, insn)
1265                          && ! JUMP_P (insn));
1266
1267           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1268                                 max_distance, table);
1269         }
1270     }
1271   /* In case of store we want to consider the memory value as available in
1272      the REG stored in that memory. This makes it possible to remove
1273      redundant loads from due to stores to the same location.  */
1274   else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1275       {
1276         unsigned int regno = REGNO (src);
1277         int max_distance = 0;
1278
1279         /* Only record sets of pseudo-regs in the hash table.  */
1280         if (regno >= FIRST_PSEUDO_REGISTER
1281            /* Don't GCSE something if we can't do a reg/reg copy.  */
1282            && can_copy_p (GET_MODE (src))
1283            /* GCSE commonly inserts instruction after the insn.  We can't
1284               do that easily for EH edges so disable GCSE on these for now.  */
1285            && !can_throw_internal (insn)
1286            /* Is SET_DEST something we want to gcse?  */
1287            && want_to_gcse_p (dest, &max_distance)
1288            /* Don't CSE a nop.  */
1289            && ! set_noop_p (pat)
1290            /* Don't GCSE if it has attached REG_EQUIV note.
1291               At this point this only function parameters should have
1292               REG_EQUIV notes and if the argument slot is used somewhere
1293               explicitly, it means address of parameter has been taken,
1294               so we should not extend the lifetime of the pseudo.  */
1295            && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1296                || ! MEM_P (XEXP (note, 0))))
1297              {
1298                /* Stores are never anticipatable.  */
1299                int antic_p = 0;
1300                /* An expression is not available if its operands are
1301                   subsequently modified, including this insn.  It's also not
1302                   available if this is a branch, because we can't insert
1303                   a set after the branch.  */
1304                int avail_p = oprs_available_p (dest, insn)
1305                              && ! JUMP_P (insn);
1306
1307                /* Record the memory expression (DEST) in the hash table.  */
1308                insert_expr_in_table (dest, GET_MODE (dest), insn,
1309                                      antic_p, avail_p, max_distance, table);
1310              }
1311       }
1312 }
1313
1314 static void
1315 hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1316                    struct hash_table_d *table ATTRIBUTE_UNUSED)
1317 {
1318   /* Currently nothing to do.  */
1319 }
1320
1321 static void
1322 hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1323                 struct hash_table_d *table ATTRIBUTE_UNUSED)
1324 {
1325   /* Currently nothing to do.  */
1326 }
1327
1328 /* Process INSN and add hash table entries as appropriate.
1329
1330    Only available expressions that set a single pseudo-reg are recorded.
1331
1332    Single sets in a PARALLEL could be handled, but it's an extra complication
1333    that isn't dealt with right now.  The trick is handling the CLOBBERs that
1334    are also in the PARALLEL.  Later.
1335
1336    If SET_P is nonzero, this is for the assignment hash table,
1337    otherwise it is for the expression hash table.  */
1338
1339 static void
1340 hash_scan_insn (rtx insn, struct hash_table_d *table)
1341 {
1342   rtx pat = PATTERN (insn);
1343   int i;
1344
1345   /* Pick out the sets of INSN and for other forms of instructions record
1346      what's been modified.  */
1347
1348   if (GET_CODE (pat) == SET)
1349     hash_scan_set (pat, insn, table);
1350   else if (GET_CODE (pat) == PARALLEL)
1351     for (i = 0; i < XVECLEN (pat, 0); i++)
1352       {
1353         rtx x = XVECEXP (pat, 0, i);
1354
1355         if (GET_CODE (x) == SET)
1356           hash_scan_set (x, insn, table);
1357         else if (GET_CODE (x) == CLOBBER)
1358           hash_scan_clobber (x, insn, table);
1359         else if (GET_CODE (x) == CALL)
1360           hash_scan_call (x, insn, table);
1361       }
1362
1363   else if (GET_CODE (pat) == CLOBBER)
1364     hash_scan_clobber (pat, insn, table);
1365   else if (GET_CODE (pat) == CALL)
1366     hash_scan_call (pat, insn, table);
1367 }
1368
1369 static void
1370 dump_hash_table (FILE *file, const char *name, struct hash_table_d *table)
1371 {
1372   int i;
1373   /* Flattened out table, so it's printed in proper order.  */
1374   struct expr **flat_table;
1375   unsigned int *hash_val;
1376   struct expr *expr;
1377
1378   flat_table = XCNEWVEC (struct expr *, table->n_elems);
1379   hash_val = XNEWVEC (unsigned int, table->n_elems);
1380
1381   for (i = 0; i < (int) table->size; i++)
1382     for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1383       {
1384         flat_table[expr->bitmap_index] = expr;
1385         hash_val[expr->bitmap_index] = i;
1386       }
1387
1388   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1389            name, table->size, table->n_elems);
1390
1391   for (i = 0; i < (int) table->n_elems; i++)
1392     if (flat_table[i] != 0)
1393       {
1394         expr = flat_table[i];
1395         fprintf (file, "Index %d (hash value %d; max distance %d)\n  ",
1396                  expr->bitmap_index, hash_val[i], expr->max_distance);
1397         print_rtl (file, expr->expr);
1398         fprintf (file, "\n");
1399       }
1400
1401   fprintf (file, "\n");
1402
1403   free (flat_table);
1404   free (hash_val);
1405 }
1406
1407 /* Record register first/last/block set information for REGNO in INSN.
1408
1409    first_set records the first place in the block where the register
1410    is set and is used to compute "anticipatability".
1411
1412    last_set records the last place in the block where the register
1413    is set and is used to compute "availability".
1414
1415    last_bb records the block for which first_set and last_set are
1416    valid, as a quick test to invalidate them.  */
1417
1418 static void
1419 record_last_reg_set_info (rtx insn, int regno)
1420 {
1421   struct reg_avail_info *info = &reg_avail_info[regno];
1422   int luid = DF_INSN_LUID (insn);
1423
1424   info->last_set = luid;
1425   if (info->last_bb != current_bb)
1426     {
1427       info->last_bb = current_bb;
1428       info->first_set = luid;
1429     }
1430 }
1431
1432
1433 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
1434    Note we store a pair of elements in the list, so they have to be
1435    taken off pairwise.  */
1436
1437 static void
1438 canon_list_insert (rtx dest ATTRIBUTE_UNUSED, const_rtx unused1 ATTRIBUTE_UNUSED,
1439                    void * v_insn)
1440 {
1441   rtx dest_addr, insn;
1442   int bb;
1443   modify_pair *pair;
1444
1445   while (GET_CODE (dest) == SUBREG
1446       || GET_CODE (dest) == ZERO_EXTRACT
1447       || GET_CODE (dest) == STRICT_LOW_PART)
1448     dest = XEXP (dest, 0);
1449
1450   /* If DEST is not a MEM, then it will not conflict with a load.  Note
1451      that function calls are assumed to clobber memory, but are handled
1452      elsewhere.  */
1453
1454   if (! MEM_P (dest))
1455     return;
1456
1457   dest_addr = get_addr (XEXP (dest, 0));
1458   dest_addr = canon_rtx (dest_addr);
1459   insn = (rtx) v_insn;
1460   bb = BLOCK_FOR_INSN (insn)->index;
1461
1462   pair = VEC_safe_push (modify_pair, heap, canon_modify_mem_list[bb], NULL);
1463   pair->dest = dest;
1464   pair->dest_addr = dest_addr;
1465 }
1466
1467 /* Record memory modification information for INSN.  We do not actually care
1468    about the memory location(s) that are set, or even how they are set (consider
1469    a CALL_INSN).  We merely need to record which insns modify memory.  */
1470
1471 static void
1472 record_last_mem_set_info (rtx insn)
1473 {
1474   int bb = BLOCK_FOR_INSN (insn)->index;
1475
1476   /* load_killed_in_block_p will handle the case of calls clobbering
1477      everything.  */
1478   VEC_safe_push (rtx, heap, modify_mem_list[bb], insn);
1479   bitmap_set_bit (modify_mem_list_set, bb);
1480
1481   if (CALL_P (insn))
1482     bitmap_set_bit (blocks_with_calls, bb);
1483   else
1484     note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
1485 }
1486
1487 /* Called from compute_hash_table via note_stores to handle one
1488    SET or CLOBBER in an insn.  DATA is really the instruction in which
1489    the SET is taking place.  */
1490
1491 static void
1492 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1493 {
1494   rtx last_set_insn = (rtx) data;
1495
1496   if (GET_CODE (dest) == SUBREG)
1497     dest = SUBREG_REG (dest);
1498
1499   if (REG_P (dest))
1500     record_last_reg_set_info (last_set_insn, REGNO (dest));
1501   else if (MEM_P (dest)
1502            /* Ignore pushes, they clobber nothing.  */
1503            && ! push_operand (dest, GET_MODE (dest)))
1504     record_last_mem_set_info (last_set_insn);
1505 }
1506
1507 /* Top level function to create an expression hash table.
1508
1509    Expression entries are placed in the hash table if
1510    - they are of the form (set (pseudo-reg) src),
1511    - src is something we want to perform GCSE on,
1512    - none of the operands are subsequently modified in the block
1513
1514    Currently src must be a pseudo-reg or a const_int.
1515
1516    TABLE is the table computed.  */
1517
1518 static void
1519 compute_hash_table_work (struct hash_table_d *table)
1520 {
1521   int i;
1522
1523   /* re-Cache any INSN_LIST nodes we have allocated.  */
1524   clear_modify_mem_tables ();
1525   /* Some working arrays used to track first and last set in each block.  */
1526   reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1527
1528   for (i = 0; i < max_reg_num (); ++i)
1529     reg_avail_info[i].last_bb = NULL;
1530
1531   FOR_EACH_BB (current_bb)
1532     {
1533       rtx insn;
1534       unsigned int regno;
1535
1536       /* First pass over the instructions records information used to
1537          determine when registers and memory are first and last set.  */
1538       FOR_BB_INSNS (current_bb, insn)
1539         {
1540           if (!NONDEBUG_INSN_P (insn))
1541             continue;
1542
1543           if (CALL_P (insn))
1544             {
1545               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1546                 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
1547                   record_last_reg_set_info (insn, regno);
1548
1549               if (! RTL_CONST_OR_PURE_CALL_P (insn))
1550                 record_last_mem_set_info (insn);
1551             }
1552
1553           note_stores (PATTERN (insn), record_last_set_info, insn);
1554         }
1555
1556       /* The next pass builds the hash table.  */
1557       FOR_BB_INSNS (current_bb, insn)
1558         if (NONDEBUG_INSN_P (insn))
1559           hash_scan_insn (insn, table);
1560     }
1561
1562   free (reg_avail_info);
1563   reg_avail_info = NULL;
1564 }
1565
1566 /* Allocate space for the set/expr hash TABLE.
1567    It is used to determine the number of buckets to use.  */
1568
1569 static void
1570 alloc_hash_table (struct hash_table_d *table)
1571 {
1572   int n;
1573
1574   n = get_max_insn_count ();
1575
1576   table->size = n / 4;
1577   if (table->size < 11)
1578     table->size = 11;
1579
1580   /* Attempt to maintain efficient use of hash table.
1581      Making it an odd number is simplest for now.
1582      ??? Later take some measurements.  */
1583   table->size |= 1;
1584   n = table->size * sizeof (struct expr *);
1585   table->table = GNEWVAR (struct expr *, n);
1586 }
1587
1588 /* Free things allocated by alloc_hash_table.  */
1589
1590 static void
1591 free_hash_table (struct hash_table_d *table)
1592 {
1593   free (table->table);
1594 }
1595
1596 /* Compute the expression hash table TABLE.  */
1597
1598 static void
1599 compute_hash_table (struct hash_table_d *table)
1600 {
1601   /* Initialize count of number of entries in hash table.  */
1602   table->n_elems = 0;
1603   memset (table->table, 0, table->size * sizeof (struct expr *));
1604
1605   compute_hash_table_work (table);
1606 }
1607 \f
1608 /* Expression tracking support.  */
1609
1610 /* Clear canon_modify_mem_list and modify_mem_list tables.  */
1611 static void
1612 clear_modify_mem_tables (void)
1613 {
1614   unsigned i;
1615   bitmap_iterator bi;
1616
1617   EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1618     {
1619       VEC_free (rtx, heap, modify_mem_list[i]);
1620       VEC_free (modify_pair, heap, canon_modify_mem_list[i]);
1621     }
1622   bitmap_clear (modify_mem_list_set);
1623   bitmap_clear (blocks_with_calls);
1624 }
1625
1626 /* Release memory used by modify_mem_list_set.  */
1627
1628 static void
1629 free_modify_mem_tables (void)
1630 {
1631   clear_modify_mem_tables ();
1632   free (modify_mem_list);
1633   free (canon_modify_mem_list);
1634   modify_mem_list = 0;
1635   canon_modify_mem_list = 0;
1636 }
1637
1638 \f
1639 /* For each block, compute whether X is transparent.  X is either an
1640    expression or an assignment [though we don't care which, for this context
1641    an assignment is treated as an expression].  For each block where an
1642    element of X is modified, reset the INDX bit in BMAP.  */
1643
1644 static void
1645 compute_transp (const_rtx x, int indx, sbitmap *bmap)
1646 {
1647   int i, j;
1648   enum rtx_code code;
1649   const char *fmt;
1650
1651   /* repeat is used to turn tail-recursion into iteration since GCC
1652      can't do it when there's no return value.  */
1653  repeat:
1654
1655   if (x == 0)
1656     return;
1657
1658   code = GET_CODE (x);
1659   switch (code)
1660     {
1661     case REG:
1662         {
1663           df_ref def;
1664           for (def = DF_REG_DEF_CHAIN (REGNO (x));
1665                def;
1666                def = DF_REF_NEXT_REG (def))
1667             RESET_BIT (bmap[DF_REF_BB (def)->index], indx);
1668         }
1669
1670       return;
1671
1672     case MEM:
1673       if (! MEM_READONLY_P (x))
1674         {
1675           bitmap_iterator bi;
1676           unsigned bb_index;
1677
1678           /* First handle all the blocks with calls.  We don't need to
1679              do any list walking for them.  */
1680           EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls, 0, bb_index, bi)
1681             {
1682               RESET_BIT (bmap[bb_index], indx);
1683             }
1684
1685             /* Now iterate over the blocks which have memory modifications
1686                but which do not have any calls.  */
1687             EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set,
1688                                             blocks_with_calls,
1689                                             0, bb_index, bi)
1690               {
1691                 VEC (modify_pair,heap) *list
1692                   = canon_modify_mem_list[bb_index];
1693                 modify_pair *pair;
1694                 unsigned ix;
1695
1696                 FOR_EACH_VEC_ELT_REVERSE (modify_pair, list, ix, pair)
1697                   {
1698                     rtx dest = pair->dest;
1699                     rtx dest_addr = pair->dest_addr;
1700
1701                     if (canon_true_dependence (dest, GET_MODE (dest), dest_addr,
1702                                                x, NULL_RTX, rtx_addr_varies_p))
1703                       RESET_BIT (bmap[bb_index], indx);
1704                   }
1705               }
1706         }
1707
1708       x = XEXP (x, 0);
1709       goto repeat;
1710
1711     case PC:
1712     case CC0: /*FIXME*/
1713     case CONST:
1714     case CONST_INT:
1715     case CONST_DOUBLE:
1716     case CONST_FIXED:
1717     case CONST_VECTOR:
1718     case SYMBOL_REF:
1719     case LABEL_REF:
1720     case ADDR_VEC:
1721     case ADDR_DIFF_VEC:
1722       return;
1723
1724     default:
1725       break;
1726     }
1727
1728   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1729     {
1730       if (fmt[i] == 'e')
1731         {
1732           /* If we are about to do the last recursive call
1733              needed at this level, change it into iteration.
1734              This function is called enough to be worth it.  */
1735           if (i == 0)
1736             {
1737               x = XEXP (x, i);
1738               goto repeat;
1739             }
1740
1741           compute_transp (XEXP (x, i), indx, bmap);
1742         }
1743       else if (fmt[i] == 'E')
1744         for (j = 0; j < XVECLEN (x, i); j++)
1745           compute_transp (XVECEXP (x, i, j), indx, bmap);
1746     }
1747 }
1748
1749 \f
1750 /* Compute PRE+LCM working variables.  */
1751
1752 /* Local properties of expressions.  */
1753 /* Nonzero for expressions that are transparent in the block.  */
1754 static sbitmap *transp;
1755
1756 /* Nonzero for expressions that are computed (available) in the block.  */
1757 static sbitmap *comp;
1758
1759 /* Nonzero for expressions that are locally anticipatable in the block.  */
1760 static sbitmap *antloc;
1761
1762 /* Nonzero for expressions where this block is an optimal computation
1763    point.  */
1764 static sbitmap *pre_optimal;
1765
1766 /* Nonzero for expressions which are redundant in a particular block.  */
1767 static sbitmap *pre_redundant;
1768
1769 /* Nonzero for expressions which should be inserted on a specific edge.  */
1770 static sbitmap *pre_insert_map;
1771
1772 /* Nonzero for expressions which should be deleted in a specific block.  */
1773 static sbitmap *pre_delete_map;
1774
1775 /* Contains the edge_list returned by pre_edge_lcm.  */
1776 static struct edge_list *edge_list;
1777
1778 /* Allocate vars used for PRE analysis.  */
1779
1780 static void
1781 alloc_pre_mem (int n_blocks, int n_exprs)
1782 {
1783   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
1784   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
1785   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
1786
1787   pre_optimal = NULL;
1788   pre_redundant = NULL;
1789   pre_insert_map = NULL;
1790   pre_delete_map = NULL;
1791   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
1792
1793   /* pre_insert and pre_delete are allocated later.  */
1794 }
1795
1796 /* Free vars used for PRE analysis.  */
1797
1798 static void
1799 free_pre_mem (void)
1800 {
1801   sbitmap_vector_free (transp);
1802   sbitmap_vector_free (comp);
1803
1804   /* ANTLOC and AE_KILL are freed just after pre_lcm finishes.  */
1805
1806   if (pre_optimal)
1807     sbitmap_vector_free (pre_optimal);
1808   if (pre_redundant)
1809     sbitmap_vector_free (pre_redundant);
1810   if (pre_insert_map)
1811     sbitmap_vector_free (pre_insert_map);
1812   if (pre_delete_map)
1813     sbitmap_vector_free (pre_delete_map);
1814
1815   transp = comp = NULL;
1816   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
1817 }
1818
1819 /* Remove certain expressions from anticipatable and transparent
1820    sets of basic blocks that have incoming abnormal edge.
1821    For PRE remove potentially trapping expressions to avoid placing
1822    them on abnormal edges.  For hoisting remove memory references that
1823    can be clobbered by calls.  */
1824
1825 static void
1826 prune_expressions (bool pre_p)
1827 {
1828   sbitmap prune_exprs;
1829   unsigned int ui;
1830   basic_block bb;
1831
1832   prune_exprs = sbitmap_alloc (expr_hash_table.n_elems);
1833   sbitmap_zero (prune_exprs);
1834   for (ui = 0; ui < expr_hash_table.size; ui++)
1835     {
1836       struct expr *e;
1837       for (e = expr_hash_table.table[ui]; e != NULL; e = e->next_same_hash)
1838         {
1839           /* Note potentially trapping expressions.  */
1840           if (may_trap_p (e->expr))
1841             {
1842               SET_BIT (prune_exprs, e->bitmap_index);
1843               continue;
1844             }
1845
1846           if (!pre_p && MEM_P (e->expr))
1847             /* Note memory references that can be clobbered by a call.
1848                We do not split abnormal edges in hoisting, so would
1849                a memory reference get hoisted along an abnormal edge,
1850                it would be placed /before/ the call.  Therefore, only
1851                constant memory references can be hoisted along abnormal
1852                edges.  */
1853             {
1854               if (GET_CODE (XEXP (e->expr, 0)) == SYMBOL_REF
1855                   && CONSTANT_POOL_ADDRESS_P (XEXP (e->expr, 0)))
1856                 continue;
1857
1858               if (MEM_READONLY_P (e->expr)
1859                   && !MEM_VOLATILE_P (e->expr)
1860                   && MEM_NOTRAP_P (e->expr))
1861                 /* Constant memory reference, e.g., a PIC address.  */
1862                 continue;
1863
1864               /* ??? Optimally, we would use interprocedural alias
1865                  analysis to determine if this mem is actually killed
1866                  by this call.  */
1867
1868               SET_BIT (prune_exprs, e->bitmap_index);
1869             }
1870         }
1871     }
1872
1873   FOR_EACH_BB (bb)
1874     {
1875       edge e;
1876       edge_iterator ei;
1877
1878       /* If the current block is the destination of an abnormal edge, we
1879          kill all trapping (for PRE) and memory (for hoist) expressions
1880          because we won't be able to properly place the instruction on
1881          the edge.  So make them neither anticipatable nor transparent.
1882          This is fairly conservative.
1883
1884          ??? For hoisting it may be necessary to check for set-and-jump
1885          instructions here, not just for abnormal edges.  The general problem
1886          is that when an expression cannot not be placed right at the end of
1887          a basic block we should account for any side-effects of a subsequent
1888          jump instructions that could clobber the expression.  It would
1889          be best to implement this check along the lines of
1890          hoist_expr_reaches_here_p where the target block is already known
1891          and, hence, there's no need to conservatively prune expressions on
1892          "intermediate" set-and-jump instructions.  */
1893       FOR_EACH_EDGE (e, ei, bb->preds)
1894         if ((e->flags & EDGE_ABNORMAL)
1895             && (pre_p || CALL_P (BB_END (e->src))))
1896           {
1897             sbitmap_difference (antloc[bb->index],
1898                                 antloc[bb->index], prune_exprs);
1899             sbitmap_difference (transp[bb->index],
1900                                 transp[bb->index], prune_exprs);
1901             break;
1902           }
1903     }
1904
1905   sbitmap_free (prune_exprs);
1906 }
1907
1908 /* It may be necessary to insert a large number of insns on edges to
1909    make the existing occurrences of expressions fully redundant.  This
1910    routine examines the set of insertions and deletions and if the ratio
1911    of insertions to deletions is too high for a particular expression, then
1912    the expression is removed from the insertion/deletion sets.
1913
1914    N_ELEMS is the number of elements in the hash table.  */
1915
1916 static void
1917 prune_insertions_deletions (int n_elems)
1918 {
1919   sbitmap_iterator sbi;
1920   sbitmap prune_exprs;
1921
1922   /* We always use I to iterate over blocks/edges and J to iterate over
1923      expressions.  */
1924   unsigned int i, j;
1925
1926   /* Counts for the number of times an expression needs to be inserted and
1927      number of times an expression can be removed as a result.  */
1928   int *insertions = GCNEWVEC (int, n_elems);
1929   int *deletions = GCNEWVEC (int, n_elems);
1930
1931   /* Set of expressions which require too many insertions relative to
1932      the number of deletions achieved.  We will prune these out of the
1933      insertion/deletion sets.  */
1934   prune_exprs = sbitmap_alloc (n_elems);
1935   sbitmap_zero (prune_exprs);
1936
1937   /* Iterate over the edges counting the number of times each expression
1938      needs to be inserted.  */
1939   for (i = 0; i < (unsigned) n_edges; i++)
1940     {
1941       EXECUTE_IF_SET_IN_SBITMAP (pre_insert_map[i], 0, j, sbi)
1942         insertions[j]++;
1943     }
1944
1945   /* Similarly for deletions, but those occur in blocks rather than on
1946      edges.  */
1947   for (i = 0; i < (unsigned) last_basic_block; i++)
1948     {
1949       EXECUTE_IF_SET_IN_SBITMAP (pre_delete_map[i], 0, j, sbi)
1950         deletions[j]++;
1951     }
1952
1953   /* Now that we have accurate counts, iterate over the elements in the
1954      hash table and see if any need too many insertions relative to the
1955      number of evaluations that can be removed.  If so, mark them in
1956      PRUNE_EXPRS.  */
1957   for (j = 0; j < (unsigned) n_elems; j++)
1958     if (deletions[j]
1959         && ((unsigned) insertions[j] / deletions[j]) > MAX_GCSE_INSERTION_RATIO)
1960       SET_BIT (prune_exprs, j);
1961
1962   /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS.  */
1963   EXECUTE_IF_SET_IN_SBITMAP (prune_exprs, 0, j, sbi)
1964     {
1965       for (i = 0; i < (unsigned) n_edges; i++)
1966         RESET_BIT (pre_insert_map[i], j);
1967
1968       for (i = 0; i < (unsigned) last_basic_block; i++)
1969         RESET_BIT (pre_delete_map[i], j);
1970     }
1971
1972   sbitmap_free (prune_exprs);
1973   free (insertions);
1974   free (deletions);
1975 }
1976
1977 /* Top level routine to do the dataflow analysis needed by PRE.  */
1978
1979 static void
1980 compute_pre_data (void)
1981 {
1982   basic_block bb;
1983
1984   compute_local_properties (transp, comp, antloc, &expr_hash_table);
1985   prune_expressions (true);
1986   sbitmap_vector_zero (ae_kill, last_basic_block);
1987
1988   /* Compute ae_kill for each basic block using:
1989
1990      ~(TRANSP | COMP)
1991   */
1992
1993   FOR_EACH_BB (bb)
1994     {
1995       sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
1996       sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
1997     }
1998
1999   edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
2000                             ae_kill, &pre_insert_map, &pre_delete_map);
2001   sbitmap_vector_free (antloc);
2002   antloc = NULL;
2003   sbitmap_vector_free (ae_kill);
2004   ae_kill = NULL;
2005
2006   prune_insertions_deletions (expr_hash_table.n_elems);
2007 }
2008 \f
2009 /* PRE utilities */
2010
2011 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
2012    block BB.
2013
2014    VISITED is a pointer to a working buffer for tracking which BB's have
2015    been visited.  It is NULL for the top-level call.
2016
2017    We treat reaching expressions that go through blocks containing the same
2018    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2019    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2020    2 as not reaching.  The intent is to improve the probability of finding
2021    only one reaching expression and to reduce register lifetimes by picking
2022    the closest such expression.  */
2023
2024 static int
2025 pre_expr_reaches_here_p_work (basic_block occr_bb, struct expr *expr, basic_block bb, char *visited)
2026 {
2027   edge pred;
2028   edge_iterator ei;
2029
2030   FOR_EACH_EDGE (pred, ei, bb->preds)
2031     {
2032       basic_block pred_bb = pred->src;
2033
2034       if (pred->src == ENTRY_BLOCK_PTR
2035           /* Has predecessor has already been visited?  */
2036           || visited[pred_bb->index])
2037         ;/* Nothing to do.  */
2038
2039       /* Does this predecessor generate this expression?  */
2040       else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index))
2041         {
2042           /* Is this the occurrence we're looking for?
2043              Note that there's only one generating occurrence per block
2044              so we just need to check the block number.  */
2045           if (occr_bb == pred_bb)
2046             return 1;
2047
2048           visited[pred_bb->index] = 1;
2049         }
2050       /* Ignore this predecessor if it kills the expression.  */
2051       else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index))
2052         visited[pred_bb->index] = 1;
2053
2054       /* Neither gen nor kill.  */
2055       else
2056         {
2057           visited[pred_bb->index] = 1;
2058           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
2059             return 1;
2060         }
2061     }
2062
2063   /* All paths have been checked.  */
2064   return 0;
2065 }
2066
2067 /* The wrapper for pre_expr_reaches_here_work that ensures that any
2068    memory allocated for that function is returned.  */
2069
2070 static int
2071 pre_expr_reaches_here_p (basic_block occr_bb, struct expr *expr, basic_block bb)
2072 {
2073   int rval;
2074   char *visited = XCNEWVEC (char, last_basic_block);
2075
2076   rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
2077
2078   free (visited);
2079   return rval;
2080 }
2081 \f
2082
2083 /* Given an expr, generate RTL which we can insert at the end of a BB,
2084    or on an edge.  Set the block number of any insns generated to
2085    the value of BB.  */
2086
2087 static rtx
2088 process_insert_insn (struct expr *expr)
2089 {
2090   rtx reg = expr->reaching_reg;
2091   rtx exp = copy_rtx (expr->expr);
2092   rtx pat;
2093
2094   start_sequence ();
2095
2096   /* If the expression is something that's an operand, like a constant,
2097      just copy it to a register.  */
2098   if (general_operand (exp, GET_MODE (reg)))
2099     emit_move_insn (reg, exp);
2100
2101   /* Otherwise, make a new insn to compute this expression and make sure the
2102      insn will be recognized (this also adds any needed CLOBBERs).  Copy the
2103      expression to make sure we don't have any sharing issues.  */
2104   else
2105     {
2106       rtx insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp));
2107
2108       if (insn_invalid_p (insn))
2109         gcc_unreachable ();
2110     }
2111
2112
2113   pat = get_insns ();
2114   end_sequence ();
2115
2116   return pat;
2117 }
2118
2119 /* Add EXPR to the end of basic block BB.
2120
2121    This is used by both the PRE and code hoisting.  */
2122
2123 static void
2124 insert_insn_end_basic_block (struct expr *expr, basic_block bb)
2125 {
2126   rtx insn = BB_END (bb);
2127   rtx new_insn;
2128   rtx reg = expr->reaching_reg;
2129   int regno = REGNO (reg);
2130   rtx pat, pat_end;
2131
2132   pat = process_insert_insn (expr);
2133   gcc_assert (pat && INSN_P (pat));
2134
2135   pat_end = pat;
2136   while (NEXT_INSN (pat_end) != NULL_RTX)
2137     pat_end = NEXT_INSN (pat_end);
2138
2139   /* If the last insn is a jump, insert EXPR in front [taking care to
2140      handle cc0, etc. properly].  Similarly we need to care trapping
2141      instructions in presence of non-call exceptions.  */
2142
2143   if (JUMP_P (insn)
2144       || (NONJUMP_INSN_P (insn)
2145           && (!single_succ_p (bb)
2146               || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
2147     {
2148 #ifdef HAVE_cc0
2149       rtx note;
2150 #endif
2151
2152       /* If this is a jump table, then we can't insert stuff here.  Since
2153          we know the previous real insn must be the tablejump, we insert
2154          the new instruction just before the tablejump.  */
2155       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
2156           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
2157         insn = prev_active_insn (insn);
2158
2159 #ifdef HAVE_cc0
2160       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
2161          if cc0 isn't set.  */
2162       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
2163       if (note)
2164         insn = XEXP (note, 0);
2165       else
2166         {
2167           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
2168           if (maybe_cc0_setter
2169               && INSN_P (maybe_cc0_setter)
2170               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
2171             insn = maybe_cc0_setter;
2172         }
2173 #endif
2174       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
2175       new_insn = emit_insn_before_noloc (pat, insn, bb);
2176     }
2177
2178   /* Likewise if the last insn is a call, as will happen in the presence
2179      of exception handling.  */
2180   else if (CALL_P (insn)
2181            && (!single_succ_p (bb)
2182                || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
2183     {
2184       /* Keeping in mind targets with small register classes and parameters
2185          in registers, we search backward and place the instructions before
2186          the first parameter is loaded.  Do this for everyone for consistency
2187          and a presumption that we'll get better code elsewhere as well.  */
2188
2189       /* Since different machines initialize their parameter registers
2190          in different orders, assume nothing.  Collect the set of all
2191          parameter registers.  */
2192       insn = find_first_parameter_load (insn, BB_HEAD (bb));
2193
2194       /* If we found all the parameter loads, then we want to insert
2195          before the first parameter load.
2196
2197          If we did not find all the parameter loads, then we might have
2198          stopped on the head of the block, which could be a CODE_LABEL.
2199          If we inserted before the CODE_LABEL, then we would be putting
2200          the insn in the wrong basic block.  In that case, put the insn
2201          after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
2202       while (LABEL_P (insn)
2203              || NOTE_INSN_BASIC_BLOCK_P (insn))
2204         insn = NEXT_INSN (insn);
2205
2206       new_insn = emit_insn_before_noloc (pat, insn, bb);
2207     }
2208   else
2209     new_insn = emit_insn_after_noloc (pat, insn, bb);
2210
2211   while (1)
2212     {
2213       if (INSN_P (pat))
2214         add_label_notes (PATTERN (pat), new_insn);
2215       if (pat == pat_end)
2216         break;
2217       pat = NEXT_INSN (pat);
2218     }
2219
2220   gcse_create_count++;
2221
2222   if (dump_file)
2223     {
2224       fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
2225                bb->index, INSN_UID (new_insn));
2226       fprintf (dump_file, "copying expression %d to reg %d\n",
2227                expr->bitmap_index, regno);
2228     }
2229 }
2230
2231 /* Insert partially redundant expressions on edges in the CFG to make
2232    the expressions fully redundant.  */
2233
2234 static int
2235 pre_edge_insert (struct edge_list *edge_list, struct expr **index_map)
2236 {
2237   int e, i, j, num_edges, set_size, did_insert = 0;
2238   sbitmap *inserted;
2239
2240   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2241      if it reaches any of the deleted expressions.  */
2242
2243   set_size = pre_insert_map[0]->size;
2244   num_edges = NUM_EDGES (edge_list);
2245   inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
2246   sbitmap_vector_zero (inserted, num_edges);
2247
2248   for (e = 0; e < num_edges; e++)
2249     {
2250       int indx;
2251       basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
2252
2253       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
2254         {
2255           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
2256
2257           for (j = indx; insert && j < (int) expr_hash_table.n_elems; j++, insert >>= 1)
2258             if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
2259               {
2260                 struct expr *expr = index_map[j];
2261                 struct occr *occr;
2262
2263                 /* Now look at each deleted occurrence of this expression.  */
2264                 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2265                   {
2266                     if (! occr->deleted_p)
2267                       continue;
2268
2269                     /* Insert this expression on this edge if it would
2270                        reach the deleted occurrence in BB.  */
2271                     if (!TEST_BIT (inserted[e], j))
2272                       {
2273                         rtx insn;
2274                         edge eg = INDEX_EDGE (edge_list, e);
2275
2276                         /* We can't insert anything on an abnormal and
2277                            critical edge, so we insert the insn at the end of
2278                            the previous block. There are several alternatives
2279                            detailed in Morgans book P277 (sec 10.5) for
2280                            handling this situation.  This one is easiest for
2281                            now.  */
2282
2283                         if (eg->flags & EDGE_ABNORMAL)
2284                           insert_insn_end_basic_block (index_map[j], bb);
2285                         else
2286                           {
2287                             insn = process_insert_insn (index_map[j]);
2288                             insert_insn_on_edge (insn, eg);
2289                           }
2290
2291                         if (dump_file)
2292                           {
2293                             fprintf (dump_file, "PRE: edge (%d,%d), ",
2294                                      bb->index,
2295                                      INDEX_EDGE_SUCC_BB (edge_list, e)->index);
2296                             fprintf (dump_file, "copy expression %d\n",
2297                                      expr->bitmap_index);
2298                           }
2299
2300                         update_ld_motion_stores (expr);
2301                         SET_BIT (inserted[e], j);
2302                         did_insert = 1;
2303                         gcse_create_count++;
2304                       }
2305                   }
2306               }
2307         }
2308     }
2309
2310   sbitmap_vector_free (inserted);
2311   return did_insert;
2312 }
2313
2314 /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2315    Given "old_reg <- expr" (INSN), instead of adding after it
2316      reaching_reg <- old_reg
2317    it's better to do the following:
2318      reaching_reg <- expr
2319      old_reg      <- reaching_reg
2320    because this way copy propagation can discover additional PRE
2321    opportunities.  But if this fails, we try the old way.
2322    When "expr" is a store, i.e.
2323    given "MEM <- old_reg", instead of adding after it
2324      reaching_reg <- old_reg
2325    it's better to add it before as follows:
2326      reaching_reg <- old_reg
2327      MEM          <- reaching_reg.  */
2328
2329 static void
2330 pre_insert_copy_insn (struct expr *expr, rtx insn)
2331 {
2332   rtx reg = expr->reaching_reg;
2333   int regno = REGNO (reg);
2334   int indx = expr->bitmap_index;
2335   rtx pat = PATTERN (insn);
2336   rtx set, first_set, new_insn;
2337   rtx old_reg;
2338   int i;
2339
2340   /* This block matches the logic in hash_scan_insn.  */
2341   switch (GET_CODE (pat))
2342     {
2343     case SET:
2344       set = pat;
2345       break;
2346
2347     case PARALLEL:
2348       /* Search through the parallel looking for the set whose
2349          source was the expression that we're interested in.  */
2350       first_set = NULL_RTX;
2351       set = NULL_RTX;
2352       for (i = 0; i < XVECLEN (pat, 0); i++)
2353         {
2354           rtx x = XVECEXP (pat, 0, i);
2355           if (GET_CODE (x) == SET)
2356             {
2357               /* If the source was a REG_EQUAL or REG_EQUIV note, we
2358                  may not find an equivalent expression, but in this
2359                  case the PARALLEL will have a single set.  */
2360               if (first_set == NULL_RTX)
2361                 first_set = x;
2362               if (expr_equiv_p (SET_SRC (x), expr->expr))
2363                 {
2364                   set = x;
2365                   break;
2366                 }
2367             }
2368         }
2369
2370       gcc_assert (first_set);
2371       if (set == NULL_RTX)
2372         set = first_set;
2373       break;
2374
2375     default:
2376       gcc_unreachable ();
2377     }
2378
2379   if (REG_P (SET_DEST (set)))
2380     {
2381       old_reg = SET_DEST (set);
2382       /* Check if we can modify the set destination in the original insn.  */
2383       if (validate_change (insn, &SET_DEST (set), reg, 0))
2384         {
2385           new_insn = gen_move_insn (old_reg, reg);
2386           new_insn = emit_insn_after (new_insn, insn);
2387         }
2388       else
2389         {
2390           new_insn = gen_move_insn (reg, old_reg);
2391           new_insn = emit_insn_after (new_insn, insn);
2392         }
2393     }
2394   else /* This is possible only in case of a store to memory.  */
2395     {
2396       old_reg = SET_SRC (set);
2397       new_insn = gen_move_insn (reg, old_reg);
2398
2399       /* Check if we can modify the set source in the original insn.  */
2400       if (validate_change (insn, &SET_SRC (set), reg, 0))
2401         new_insn = emit_insn_before (new_insn, insn);
2402       else
2403         new_insn = emit_insn_after (new_insn, insn);
2404     }
2405
2406   gcse_create_count++;
2407
2408   if (dump_file)
2409     fprintf (dump_file,
2410              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2411               BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
2412               INSN_UID (insn), regno);
2413 }
2414
2415 /* Copy available expressions that reach the redundant expression
2416    to `reaching_reg'.  */
2417
2418 static void
2419 pre_insert_copies (void)
2420 {
2421   unsigned int i, added_copy;
2422   struct expr *expr;
2423   struct occr *occr;
2424   struct occr *avail;
2425
2426   /* For each available expression in the table, copy the result to
2427      `reaching_reg' if the expression reaches a deleted one.
2428
2429      ??? The current algorithm is rather brute force.
2430      Need to do some profiling.  */
2431
2432   for (i = 0; i < expr_hash_table.size; i++)
2433     for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
2434       {
2435         /* If the basic block isn't reachable, PPOUT will be TRUE.  However,
2436            we don't want to insert a copy here because the expression may not
2437            really be redundant.  So only insert an insn if the expression was
2438            deleted.  This test also avoids further processing if the
2439            expression wasn't deleted anywhere.  */
2440         if (expr->reaching_reg == NULL)
2441           continue;
2442
2443         /* Set when we add a copy for that expression.  */
2444         added_copy = 0;
2445
2446         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2447           {
2448             if (! occr->deleted_p)
2449               continue;
2450
2451             for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
2452               {
2453                 rtx insn = avail->insn;
2454
2455                 /* No need to handle this one if handled already.  */
2456                 if (avail->copied_p)
2457                   continue;
2458
2459                 /* Don't handle this one if it's a redundant one.  */
2460                 if (INSN_DELETED_P (insn))
2461                   continue;
2462
2463                 /* Or if the expression doesn't reach the deleted one.  */
2464                 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
2465                                                expr,
2466                                                BLOCK_FOR_INSN (occr->insn)))
2467                   continue;
2468
2469                 added_copy = 1;
2470
2471                 /* Copy the result of avail to reaching_reg.  */
2472                 pre_insert_copy_insn (expr, insn);
2473                 avail->copied_p = 1;
2474               }
2475           }
2476
2477           if (added_copy)
2478             update_ld_motion_stores (expr);
2479       }
2480 }
2481
2482 /* Emit move from SRC to DEST noting the equivalence with expression computed
2483    in INSN.  */
2484 static rtx
2485 gcse_emit_move_after (rtx src, rtx dest, rtx insn)
2486 {
2487   rtx new_rtx;
2488   rtx set = single_set (insn), set2;
2489   rtx note;
2490   rtx eqv;
2491
2492   /* This should never fail since we're creating a reg->reg copy
2493      we've verified to be valid.  */
2494
2495   new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
2496
2497   /* Note the equivalence for local CSE pass.  */
2498   set2 = single_set (new_rtx);
2499   if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
2500     return new_rtx;
2501   if ((note = find_reg_equal_equiv_note (insn)))
2502     eqv = XEXP (note, 0);
2503   else
2504     eqv = SET_SRC (set);
2505
2506   set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
2507
2508   return new_rtx;
2509 }
2510
2511 /* Delete redundant computations.
2512    Deletion is done by changing the insn to copy the `reaching_reg' of
2513    the expression into the result of the SET.  It is left to later passes
2514    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
2515
2516    Returns nonzero if a change is made.  */
2517
2518 static int
2519 pre_delete (void)
2520 {
2521   unsigned int i;
2522   int changed;
2523   struct expr *expr;
2524   struct occr *occr;
2525
2526   changed = 0;
2527   for (i = 0; i < expr_hash_table.size; i++)
2528     for (expr = expr_hash_table.table[i];
2529          expr != NULL;
2530          expr = expr->next_same_hash)
2531       {
2532         int indx = expr->bitmap_index;
2533
2534         /* We only need to search antic_occr since we require
2535            ANTLOC != 0.  */
2536
2537         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2538           {
2539             rtx insn = occr->insn;
2540             rtx set;
2541             basic_block bb = BLOCK_FOR_INSN (insn);
2542
2543             /* We only delete insns that have a single_set.  */
2544             if (TEST_BIT (pre_delete_map[bb->index], indx)
2545                 && (set = single_set (insn)) != 0
2546                 && dbg_cnt (pre_insn))
2547               {
2548                 /* Create a pseudo-reg to store the result of reaching
2549                    expressions into.  Get the mode for the new pseudo from
2550                    the mode of the original destination pseudo.  */
2551                 if (expr->reaching_reg == NULL)
2552                   expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
2553
2554                 gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), insn);
2555                 delete_insn (insn);
2556                 occr->deleted_p = 1;
2557                 changed = 1;
2558                 gcse_subst_count++;
2559
2560                 if (dump_file)
2561                   {
2562                     fprintf (dump_file,
2563                              "PRE: redundant insn %d (expression %d) in ",
2564                                INSN_UID (insn), indx);
2565                     fprintf (dump_file, "bb %d, reaching reg is %d\n",
2566                              bb->index, REGNO (expr->reaching_reg));
2567                   }
2568               }
2569           }
2570       }
2571
2572   return changed;
2573 }
2574
2575 /* Perform GCSE optimizations using PRE.
2576    This is called by one_pre_gcse_pass after all the dataflow analysis
2577    has been done.
2578
2579    This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2580    lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2581    Compiler Design and Implementation.
2582
2583    ??? A new pseudo reg is created to hold the reaching expression.  The nice
2584    thing about the classical approach is that it would try to use an existing
2585    reg.  If the register can't be adequately optimized [i.e. we introduce
2586    reload problems], one could add a pass here to propagate the new register
2587    through the block.
2588
2589    ??? We don't handle single sets in PARALLELs because we're [currently] not
2590    able to copy the rest of the parallel when we insert copies to create full
2591    redundancies from partial redundancies.  However, there's no reason why we
2592    can't handle PARALLELs in the cases where there are no partial
2593    redundancies.  */
2594
2595 static int
2596 pre_gcse (void)
2597 {
2598   unsigned int i;
2599   int did_insert, changed;
2600   struct expr **index_map;
2601   struct expr *expr;
2602
2603   /* Compute a mapping from expression number (`bitmap_index') to
2604      hash table entry.  */
2605
2606   index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2607   for (i = 0; i < expr_hash_table.size; i++)
2608     for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
2609       index_map[expr->bitmap_index] = expr;
2610
2611   /* Delete the redundant insns first so that
2612      - we know what register to use for the new insns and for the other
2613        ones with reaching expressions
2614      - we know which insns are redundant when we go to create copies  */
2615
2616   changed = pre_delete ();
2617   did_insert = pre_edge_insert (edge_list, index_map);
2618
2619   /* In other places with reaching expressions, copy the expression to the
2620      specially allocated pseudo-reg that reaches the redundant expr.  */
2621   pre_insert_copies ();
2622   if (did_insert)
2623     {
2624       commit_edge_insertions ();
2625       changed = 1;
2626     }
2627
2628   free (index_map);
2629   return changed;
2630 }
2631
2632 /* Top level routine to perform one PRE GCSE pass.
2633
2634    Return nonzero if a change was made.  */
2635
2636 static int
2637 one_pre_gcse_pass (void)
2638 {
2639   int changed = 0;
2640
2641   gcse_subst_count = 0;
2642   gcse_create_count = 0;
2643
2644   /* Return if there's nothing to do, or it is too expensive.  */
2645   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
2646       || is_too_expensive (_("PRE disabled")))
2647     return 0;
2648
2649   /* We need alias.  */
2650   init_alias_analysis ();
2651
2652   bytes_used = 0;
2653   gcc_obstack_init (&gcse_obstack);
2654   alloc_gcse_mem ();
2655
2656   alloc_hash_table (&expr_hash_table);
2657   add_noreturn_fake_exit_edges ();
2658   if (flag_gcse_lm)
2659     compute_ld_motion_mems ();
2660
2661   compute_hash_table (&expr_hash_table);
2662   trim_ld_motion_mems ();
2663   if (dump_file)
2664     dump_hash_table (dump_file, "Expression", &expr_hash_table);
2665
2666   if (expr_hash_table.n_elems > 0)
2667     {
2668       alloc_pre_mem (last_basic_block, expr_hash_table.n_elems);
2669       compute_pre_data ();
2670       changed |= pre_gcse ();
2671       free_edge_list (edge_list);
2672       free_pre_mem ();
2673     }
2674
2675   free_ldst_mems ();
2676   remove_fake_exit_edges ();
2677   free_hash_table (&expr_hash_table);
2678
2679   free_gcse_mem ();
2680   obstack_free (&gcse_obstack, NULL);
2681
2682   /* We are finished with alias.  */
2683   end_alias_analysis ();
2684
2685   if (dump_file)
2686     {
2687       fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2688                current_function_name (), n_basic_blocks, bytes_used);
2689       fprintf (dump_file, "%d substs, %d insns created\n",
2690                gcse_subst_count, gcse_create_count);
2691     }
2692
2693   return changed;
2694 }
2695 \f
2696 /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2697    to INSN.  If such notes are added to an insn which references a
2698    CODE_LABEL, the LABEL_NUSES count is incremented.  We have to add
2699    that note, because the following loop optimization pass requires
2700    them.  */
2701
2702 /* ??? If there was a jump optimization pass after gcse and before loop,
2703    then we would not need to do this here, because jump would add the
2704    necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes.  */
2705
2706 static void
2707 add_label_notes (rtx x, rtx insn)
2708 {
2709   enum rtx_code code = GET_CODE (x);
2710   int i, j;
2711   const char *fmt;
2712
2713   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2714     {
2715       /* This code used to ignore labels that referred to dispatch tables to
2716          avoid flow generating (slightly) worse code.
2717
2718          We no longer ignore such label references (see LABEL_REF handling in
2719          mark_jump_label for additional information).  */
2720
2721       /* There's no reason for current users to emit jump-insns with
2722          such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2723          notes.  */
2724       gcc_assert (!JUMP_P (insn));
2725       add_reg_note (insn, REG_LABEL_OPERAND, XEXP (x, 0));
2726
2727       if (LABEL_P (XEXP (x, 0)))
2728         LABEL_NUSES (XEXP (x, 0))++;
2729
2730       return;
2731     }
2732
2733   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2734     {
2735       if (fmt[i] == 'e')
2736         add_label_notes (XEXP (x, i), insn);
2737       else if (fmt[i] == 'E')
2738         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2739           add_label_notes (XVECEXP (x, i, j), insn);
2740     }
2741 }
2742
2743 /* Code Hoisting variables and subroutines.  */
2744
2745 /* Very busy expressions.  */
2746 static sbitmap *hoist_vbein;
2747 static sbitmap *hoist_vbeout;
2748
2749 /* ??? We could compute post dominators and run this algorithm in
2750    reverse to perform tail merging, doing so would probably be
2751    more effective than the tail merging code in jump.c.
2752
2753    It's unclear if tail merging could be run in parallel with
2754    code hoisting.  It would be nice.  */
2755
2756 /* Allocate vars used for code hoisting analysis.  */
2757
2758 static void
2759 alloc_code_hoist_mem (int n_blocks, int n_exprs)
2760 {
2761   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
2762   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
2763   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
2764
2765   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
2766   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
2767 }
2768
2769 /* Free vars used for code hoisting analysis.  */
2770
2771 static void
2772 free_code_hoist_mem (void)
2773 {
2774   sbitmap_vector_free (antloc);
2775   sbitmap_vector_free (transp);
2776   sbitmap_vector_free (comp);
2777
2778   sbitmap_vector_free (hoist_vbein);
2779   sbitmap_vector_free (hoist_vbeout);
2780
2781   free_dominance_info (CDI_DOMINATORS);
2782 }
2783
2784 /* Compute the very busy expressions at entry/exit from each block.
2785
2786    An expression is very busy if all paths from a given point
2787    compute the expression.  */
2788
2789 static void
2790 compute_code_hoist_vbeinout (void)
2791 {
2792   int changed, passes;
2793   basic_block bb;
2794
2795   sbitmap_vector_zero (hoist_vbeout, last_basic_block);
2796   sbitmap_vector_zero (hoist_vbein, last_basic_block);
2797
2798   passes = 0;
2799   changed = 1;
2800
2801   while (changed)
2802     {
2803       changed = 0;
2804
2805       /* We scan the blocks in the reverse order to speed up
2806          the convergence.  */
2807       FOR_EACH_BB_REVERSE (bb)
2808         {
2809           if (bb->next_bb != EXIT_BLOCK_PTR)
2810             {
2811               sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
2812                                              hoist_vbein, bb->index);
2813
2814               /* Include expressions in VBEout that are calculated
2815                  in BB and available at its end.  */
2816               sbitmap_a_or_b (hoist_vbeout[bb->index],
2817                               hoist_vbeout[bb->index], comp[bb->index]);
2818             }
2819
2820           changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
2821                                               antloc[bb->index],
2822                                               hoist_vbeout[bb->index],
2823                                               transp[bb->index]);
2824         }
2825
2826       passes++;
2827     }
2828
2829   if (dump_file)
2830     {
2831       fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
2832
2833       FOR_EACH_BB (bb)
2834         {
2835           fprintf (dump_file, "vbein (%d): ", bb->index);
2836           dump_sbitmap_file (dump_file, hoist_vbein[bb->index]);
2837           fprintf (dump_file, "vbeout(%d): ", bb->index);
2838           dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
2839         }
2840     }
2841 }
2842
2843 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
2844
2845 static void
2846 compute_code_hoist_data (void)
2847 {
2848   compute_local_properties (transp, comp, antloc, &expr_hash_table);
2849   prune_expressions (false);
2850   compute_code_hoist_vbeinout ();
2851   calculate_dominance_info (CDI_DOMINATORS);
2852   if (dump_file)
2853     fprintf (dump_file, "\n");
2854 }
2855
2856 /* Determine if the expression identified by EXPR_INDEX would
2857    reach BB unimpared if it was placed at the end of EXPR_BB.
2858    Stop the search if the expression would need to be moved more
2859    than DISTANCE instructions.
2860
2861    It's unclear exactly what Muchnick meant by "unimpared".  It seems
2862    to me that the expression must either be computed or transparent in
2863    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
2864    would allow the expression to be hoisted out of loops, even if
2865    the expression wasn't a loop invariant.
2866
2867    Contrast this to reachability for PRE where an expression is
2868    considered reachable if *any* path reaches instead of *all*
2869    paths.  */
2870
2871 static int
2872 hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
2873                            char *visited, int distance, int *bb_size)
2874 {
2875   edge pred;
2876   edge_iterator ei;
2877   int visited_allocated_locally = 0;
2878
2879   /* Terminate the search if distance, for which EXPR is allowed to move,
2880      is exhausted.  */
2881   if (distance > 0)
2882     {
2883       distance -= bb_size[bb->index];
2884
2885       if (distance <= 0)
2886         return 0;
2887     }
2888   else
2889     gcc_assert (distance == 0);
2890
2891   if (visited == NULL)
2892     {
2893       visited_allocated_locally = 1;
2894       visited = XCNEWVEC (char, last_basic_block);
2895     }
2896
2897   FOR_EACH_EDGE (pred, ei, bb->preds)
2898     {
2899       basic_block pred_bb = pred->src;
2900
2901       if (pred->src == ENTRY_BLOCK_PTR)
2902         break;
2903       else if (pred_bb == expr_bb)
2904         continue;
2905       else if (visited[pred_bb->index])
2906         continue;
2907
2908       else if (! TEST_BIT (transp[pred_bb->index], expr_index))
2909         break;
2910
2911       /* Not killed.  */
2912       else
2913         {
2914           visited[pred_bb->index] = 1;
2915           if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb,
2916                                            visited, distance, bb_size))
2917             break;
2918         }
2919     }
2920   if (visited_allocated_locally)
2921     free (visited);
2922
2923   return (pred == NULL);
2924 }
2925 \f
2926 /* Find occurence in BB.  */
2927 static struct occr *
2928 find_occr_in_bb (struct occr *occr, basic_block bb)
2929 {
2930   /* Find the right occurrence of this expression.  */
2931   while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
2932     occr = occr->next;
2933
2934   return occr;
2935 }
2936
2937 /* Actually perform code hoisting.  */
2938
2939 static int
2940 hoist_code (void)
2941 {
2942   basic_block bb, dominated;
2943   VEC (basic_block, heap) *dom_tree_walk;
2944   unsigned int dom_tree_walk_index;
2945   VEC (basic_block, heap) *domby;
2946   unsigned int i,j;
2947   struct expr **index_map;
2948   struct expr *expr;
2949   int *to_bb_head;
2950   int *bb_size;
2951   int changed = 0;
2952
2953   /* Compute a mapping from expression number (`bitmap_index') to
2954      hash table entry.  */
2955
2956   index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2957   for (i = 0; i < expr_hash_table.size; i++)
2958     for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
2959       index_map[expr->bitmap_index] = expr;
2960
2961   /* Calculate sizes of basic blocks and note how far
2962      each instruction is from the start of its block.  We then use this
2963      data to restrict distance an expression can travel.  */
2964
2965   to_bb_head = XCNEWVEC (int, get_max_uid ());
2966   bb_size = XCNEWVEC (int, last_basic_block);
2967
2968   FOR_EACH_BB (bb)
2969     {
2970       rtx insn;
2971       int to_head;
2972
2973       to_head = 0;
2974       FOR_BB_INSNS (bb, insn)
2975         {
2976           /* Don't count debug instructions to avoid them affecting
2977              decision choices.  */
2978           if (NONDEBUG_INSN_P (insn))
2979             to_bb_head[INSN_UID (insn)] = to_head++;
2980         }
2981
2982       bb_size[bb->index] = to_head;
2983     }
2984
2985   gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1
2986               && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest
2987                   == ENTRY_BLOCK_PTR->next_bb));
2988
2989   dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS,
2990                                             ENTRY_BLOCK_PTR->next_bb);
2991
2992   /* Walk over each basic block looking for potentially hoistable
2993      expressions, nothing gets hoisted from the entry block.  */
2994   FOR_EACH_VEC_ELT (basic_block, dom_tree_walk, dom_tree_walk_index, bb)
2995     {
2996       domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);
2997
2998       if (VEC_length (basic_block, domby) == 0)
2999         continue;
3000
3001       /* Examine each expression that is very busy at the exit of this
3002          block.  These are the potentially hoistable expressions.  */
3003       for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++)
3004         {
3005           if (TEST_BIT (hoist_vbeout[bb->index], i))
3006             {
3007               /* Current expression.  */
3008               struct expr *expr = index_map[i];
3009               /* Number of occurences of EXPR that can be hoisted to BB.  */
3010               int hoistable = 0;
3011               /* Basic blocks that have occurences reachable from BB.  */
3012               bitmap_head _from_bbs, *from_bbs = &_from_bbs;
3013               /* Occurences reachable from BB.  */
3014               VEC (occr_t, heap) *occrs_to_hoist = NULL;
3015               /* We want to insert the expression into BB only once, so
3016                  note when we've inserted it.  */
3017               int insn_inserted_p;
3018               occr_t occr;
3019
3020               bitmap_initialize (from_bbs, 0);
3021
3022               /* If an expression is computed in BB and is available at end of
3023                  BB, hoist all occurences dominated by BB to BB.  */
3024               if (TEST_BIT (comp[bb->index], i))
3025                 {
3026                   occr = find_occr_in_bb (expr->antic_occr, bb);
3027
3028                   if (occr)
3029                     {
3030                       /* An occurence might've been already deleted
3031                          while processing a dominator of BB.  */
3032                       if (!occr->deleted_p)
3033                         {
3034                           gcc_assert (NONDEBUG_INSN_P (occr->insn));
3035                           hoistable++;
3036                         }
3037                     }
3038                   else
3039                     hoistable++;
3040                 }
3041
3042               /* We've found a potentially hoistable expression, now
3043                  we look at every block BB dominates to see if it
3044                  computes the expression.  */
3045               FOR_EACH_VEC_ELT (basic_block, domby, j, dominated)
3046                 {
3047                   int max_distance;
3048
3049                   /* Ignore self dominance.  */
3050                   if (bb == dominated)
3051                     continue;
3052                   /* We've found a dominated block, now see if it computes
3053                      the busy expression and whether or not moving that
3054                      expression to the "beginning" of that block is safe.  */
3055                   if (!TEST_BIT (antloc[dominated->index], i))
3056                     continue;
3057
3058                   occr = find_occr_in_bb (expr->antic_occr, dominated);
3059                   gcc_assert (occr);
3060
3061                   /* An occurence might've been already deleted
3062                      while processing a dominator of BB.  */
3063                   if (occr->deleted_p)
3064                     continue;
3065                   gcc_assert (NONDEBUG_INSN_P (occr->insn));
3066
3067                   max_distance = expr->max_distance;
3068                   if (max_distance > 0)
3069                     /* Adjust MAX_DISTANCE to account for the fact that
3070                        OCCR won't have to travel all of DOMINATED, but
3071                        only part of it.  */
3072                     max_distance += (bb_size[dominated->index]
3073                                      - to_bb_head[INSN_UID (occr->insn)]);
3074
3075                   /* Note if the expression would reach the dominated block
3076                      unimpared if it was placed at the end of BB.
3077
3078                      Keep track of how many times this expression is hoistable
3079                      from a dominated block into BB.  */
3080                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
3081                                                  max_distance, bb_size))
3082                     {
3083                       hoistable++;
3084                       VEC_safe_push (occr_t, heap,
3085                                      occrs_to_hoist, occr);
3086                       bitmap_set_bit (from_bbs, dominated->index);
3087                     }
3088                 }
3089
3090               /* If we found more than one hoistable occurrence of this
3091                  expression, then note it in the vector of expressions to
3092                  hoist.  It makes no sense to hoist things which are computed
3093                  in only one BB, and doing so tends to pessimize register
3094                  allocation.  One could increase this value to try harder
3095                  to avoid any possible code expansion due to register
3096                  allocation issues; however experiments have shown that
3097                  the vast majority of hoistable expressions are only movable
3098                  from two successors, so raising this threshold is likely
3099                  to nullify any benefit we get from code hoisting.  */
3100               if (hoistable > 1 && dbg_cnt (hoist_insn))
3101                 {
3102                   /* If (hoistable != VEC_length), then there is
3103                      an occurence of EXPR in BB itself.  Don't waste
3104                      time looking for LCA in this case.  */
3105                   if ((unsigned) hoistable
3106                       == VEC_length (occr_t, occrs_to_hoist))
3107                     {
3108                       basic_block lca;
3109
3110                       lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
3111                                                               from_bbs);
3112                       if (lca != bb)
3113                         /* Punt, it's better to hoist these occurences to
3114                            LCA.  */
3115                         VEC_free (occr_t, heap, occrs_to_hoist);
3116                     }
3117                 }
3118               else
3119                 /* Punt, no point hoisting a single occurence.  */
3120                 VEC_free (occr_t, heap, occrs_to_hoist);
3121
3122               insn_inserted_p = 0;
3123
3124               /* Walk through occurences of I'th expressions we want
3125                  to hoist to BB and make the transformations.  */
3126               FOR_EACH_VEC_ELT (occr_t, occrs_to_hoist, j, occr)
3127                 {
3128                   rtx insn;
3129                   rtx set;
3130
3131                   gcc_assert (!occr->deleted_p);
3132
3133                   insn = occr->insn;
3134                   set = single_set (insn);
3135                   gcc_assert (set);
3136
3137                   /* Create a pseudo-reg to store the result of reaching
3138                      expressions into.  Get the mode for the new pseudo
3139                      from the mode of the original destination pseudo.
3140
3141                      It is important to use new pseudos whenever we
3142                      emit a set.  This will allow reload to use
3143                      rematerialization for such registers.  */
3144                   if (!insn_inserted_p)
3145                     expr->reaching_reg
3146                       = gen_reg_rtx_and_attrs (SET_DEST (set));
3147
3148                   gcse_emit_move_after (expr->reaching_reg, SET_DEST (set),
3149                                         insn);
3150                   delete_insn (insn);
3151                   occr->deleted_p = 1;
3152                   changed = 1;
3153                   gcse_subst_count++;
3154
3155                   if (!insn_inserted_p)
3156                     {
3157                       insert_insn_end_basic_block (expr, bb);
3158                       insn_inserted_p = 1;
3159                     }
3160                 }
3161
3162               VEC_free (occr_t, heap, occrs_to_hoist);
3163               bitmap_clear (from_bbs);
3164             }
3165         }
3166       VEC_free (basic_block, heap, domby);
3167     }
3168
3169   VEC_free (basic_block, heap, dom_tree_walk);
3170   free (bb_size);
3171   free (to_bb_head);
3172   free (index_map);
3173
3174   return changed;
3175 }
3176
3177 /* Top level routine to perform one code hoisting (aka unification) pass
3178
3179    Return nonzero if a change was made.  */
3180
3181 static int
3182 one_code_hoisting_pass (void)
3183 {
3184   int changed = 0;
3185
3186   gcse_subst_count = 0;
3187   gcse_create_count = 0;
3188
3189   /* Return if there's nothing to do, or it is too expensive.  */
3190   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
3191       || is_too_expensive (_("GCSE disabled")))
3192     return 0;
3193
3194   doing_code_hoisting_p = true;
3195
3196   /* We need alias.  */
3197   init_alias_analysis ();
3198
3199   bytes_used = 0;
3200   gcc_obstack_init (&gcse_obstack);
3201   alloc_gcse_mem ();
3202
3203   alloc_hash_table (&expr_hash_table);
3204   compute_hash_table (&expr_hash_table);
3205   if (dump_file)
3206     dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
3207
3208   if (expr_hash_table.n_elems > 0)
3209     {
3210       alloc_code_hoist_mem (last_basic_block, expr_hash_table.n_elems);
3211       compute_code_hoist_data ();
3212       changed = hoist_code ();
3213       free_code_hoist_mem ();
3214     }
3215
3216   free_hash_table (&expr_hash_table);
3217   free_gcse_mem ();
3218   obstack_free (&gcse_obstack, NULL);
3219
3220   /* We are finished with alias.  */
3221   end_alias_analysis ();
3222
3223   if (dump_file)
3224     {
3225       fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3226                current_function_name (), n_basic_blocks, bytes_used);
3227       fprintf (dump_file, "%d substs, %d insns created\n",
3228                gcse_subst_count, gcse_create_count);
3229     }
3230
3231   doing_code_hoisting_p = false;
3232
3233   return changed;
3234 }
3235 \f
3236 /*  Here we provide the things required to do store motion towards
3237     the exit. In order for this to be effective, gcse also needed to
3238     be taught how to move a load when it is kill only by a store to itself.
3239
3240             int i;
3241             float a[10];
3242
3243             void foo(float scale)
3244             {
3245               for (i=0; i<10; i++)
3246                 a[i] *= scale;
3247             }
3248
3249     'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3250     the load out since its live around the loop, and stored at the bottom
3251     of the loop.
3252
3253       The 'Load Motion' referred to and implemented in this file is
3254     an enhancement to gcse which when using edge based lcm, recognizes
3255     this situation and allows gcse to move the load out of the loop.
3256
3257       Once gcse has hoisted the load, store motion can then push this
3258     load towards the exit, and we end up with no loads or stores of 'i'
3259     in the loop.  */
3260
3261 static hashval_t
3262 pre_ldst_expr_hash (const void *p)
3263 {
3264   int do_not_record_p = 0;
3265   const struct ls_expr *const x = (const struct ls_expr *) p;
3266   return hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
3267 }
3268
3269 static int
3270 pre_ldst_expr_eq (const void *p1, const void *p2)
3271 {
3272   const struct ls_expr *const ptr1 = (const struct ls_expr *) p1,
3273     *const ptr2 = (const struct ls_expr *) p2;
3274   return expr_equiv_p (ptr1->pattern, ptr2->pattern);
3275 }
3276
3277 /* This will search the ldst list for a matching expression. If it
3278    doesn't find one, we create one and initialize it.  */
3279
3280 static struct ls_expr *
3281 ldst_entry (rtx x)
3282 {
3283   int do_not_record_p = 0;
3284   struct ls_expr * ptr;
3285   unsigned int hash;
3286   void **slot;
3287   struct ls_expr e;
3288
3289   hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
3290                    NULL,  /*have_reg_qty=*/false);
3291
3292   e.pattern = x;
3293   slot = htab_find_slot_with_hash (pre_ldst_table, &e, hash, INSERT);
3294   if (*slot)
3295     return (struct ls_expr *)*slot;
3296
3297   ptr = XNEW (struct ls_expr);
3298
3299   ptr->next         = pre_ldst_mems;
3300   ptr->expr         = NULL;
3301   ptr->pattern      = x;
3302   ptr->pattern_regs = NULL_RTX;
3303   ptr->loads        = NULL_RTX;
3304   ptr->stores       = NULL_RTX;
3305   ptr->reaching_reg = NULL_RTX;
3306   ptr->invalid      = 0;
3307   ptr->index        = 0;
3308   ptr->hash_index   = hash;
3309   pre_ldst_mems     = ptr;
3310   *slot = ptr;
3311
3312   return ptr;
3313 }
3314
3315 /* Free up an individual ldst entry.  */
3316
3317 static void
3318 free_ldst_entry (struct ls_expr * ptr)
3319 {
3320   free_INSN_LIST_list (& ptr->loads);
3321   free_INSN_LIST_list (& ptr->stores);
3322
3323   free (ptr);
3324 }
3325
3326 /* Free up all memory associated with the ldst list.  */
3327
3328 static void
3329 free_ldst_mems (void)
3330 {
3331   if (pre_ldst_table)
3332     htab_delete (pre_ldst_table);
3333   pre_ldst_table = NULL;
3334
3335   while (pre_ldst_mems)
3336     {
3337       struct ls_expr * tmp = pre_ldst_mems;
3338
3339       pre_ldst_mems = pre_ldst_mems->next;
3340
3341       free_ldst_entry (tmp);
3342     }
3343
3344   pre_ldst_mems = NULL;
3345 }
3346
3347 /* Dump debugging info about the ldst list.  */
3348
3349 static void
3350 print_ldst_list (FILE * file)
3351 {
3352   struct ls_expr * ptr;
3353
3354   fprintf (file, "LDST list: \n");
3355
3356   for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
3357     {
3358       fprintf (file, "  Pattern (%3d): ", ptr->index);
3359
3360       print_rtl (file, ptr->pattern);
3361
3362       fprintf (file, "\n         Loads : ");
3363
3364       if (ptr->loads)
3365         print_rtl (file, ptr->loads);
3366       else
3367         fprintf (file, "(nil)");
3368
3369       fprintf (file, "\n        Stores : ");
3370
3371       if (ptr->stores)
3372         print_rtl (file, ptr->stores);
3373       else
3374         fprintf (file, "(nil)");
3375
3376       fprintf (file, "\n\n");
3377     }
3378
3379   fprintf (file, "\n");
3380 }
3381
3382 /* Returns 1 if X is in the list of ldst only expressions.  */
3383
3384 static struct ls_expr *
3385 find_rtx_in_ldst (rtx x)
3386 {
3387   struct ls_expr e;
3388   void **slot;
3389   if (!pre_ldst_table)
3390     return NULL;
3391   e.pattern = x;
3392   slot = htab_find_slot (pre_ldst_table, &e, NO_INSERT);
3393   if (!slot || ((struct ls_expr *)*slot)->invalid)
3394     return NULL;
3395   return (struct ls_expr *) *slot;
3396 }
3397
3398 /* Return first item in the list.  */
3399
3400 static inline struct ls_expr *
3401 first_ls_expr (void)
3402 {
3403   return pre_ldst_mems;
3404 }
3405
3406 /* Return the next item in the list after the specified one.  */
3407
3408 static inline struct ls_expr *
3409 next_ls_expr (struct ls_expr * ptr)
3410 {
3411   return ptr->next;
3412 }
3413 \f
3414 /* Load Motion for loads which only kill themselves.  */
3415
3416 /* Return true if x is a simple MEM operation, with no registers or
3417    side effects. These are the types of loads we consider for the
3418    ld_motion list, otherwise we let the usual aliasing take care of it.  */
3419
3420 static int
3421 simple_mem (const_rtx x)
3422 {
3423   if (! MEM_P (x))
3424     return 0;
3425
3426   if (MEM_VOLATILE_P (x))
3427     return 0;
3428
3429   if (GET_MODE (x) == BLKmode)
3430     return 0;
3431
3432   /* If we are handling exceptions, we must be careful with memory references
3433      that may trap.  If we are not, the behavior is undefined, so we may just
3434      continue.  */
3435   if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
3436     return 0;
3437
3438   if (side_effects_p (x))
3439     return 0;
3440
3441   /* Do not consider function arguments passed on stack.  */
3442   if (reg_mentioned_p (stack_pointer_rtx, x))
3443     return 0;
3444
3445   if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
3446     return 0;
3447
3448   return 1;
3449 }
3450
3451 /* Make sure there isn't a buried reference in this pattern anywhere.
3452    If there is, invalidate the entry for it since we're not capable
3453    of fixing it up just yet.. We have to be sure we know about ALL
3454    loads since the aliasing code will allow all entries in the
3455    ld_motion list to not-alias itself.  If we miss a load, we will get
3456    the wrong value since gcse might common it and we won't know to
3457    fix it up.  */
3458
3459 static void
3460 invalidate_any_buried_refs (rtx x)
3461 {
3462   const char * fmt;
3463   int i, j;
3464   struct ls_expr * ptr;
3465
3466   /* Invalidate it in the list.  */
3467   if (MEM_P (x) && simple_mem (x))
3468     {
3469       ptr = ldst_entry (x);
3470       ptr->invalid = 1;
3471     }
3472
3473   /* Recursively process the insn.  */
3474   fmt = GET_RTX_FORMAT (GET_CODE (x));
3475
3476   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3477     {
3478       if (fmt[i] == 'e')
3479         invalidate_any_buried_refs (XEXP (x, i));
3480       else if (fmt[i] == 'E')
3481         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3482           invalidate_any_buried_refs (XVECEXP (x, i, j));
3483     }
3484 }
3485
3486 /* Find all the 'simple' MEMs which are used in LOADs and STORES.  Simple
3487    being defined as MEM loads and stores to symbols, with no side effects
3488    and no registers in the expression.  For a MEM destination, we also
3489    check that the insn is still valid if we replace the destination with a
3490    REG, as is done in update_ld_motion_stores.  If there are any uses/defs
3491    which don't match this criteria, they are invalidated and trimmed out
3492    later.  */
3493
3494 static void
3495 compute_ld_motion_mems (void)
3496 {
3497   struct ls_expr * ptr;
3498   basic_block bb;
3499   rtx insn;
3500
3501   pre_ldst_mems = NULL;
3502   pre_ldst_table = htab_create (13, pre_ldst_expr_hash,
3503                                 pre_ldst_expr_eq, NULL);
3504
3505   FOR_EACH_BB (bb)
3506     {
3507       FOR_BB_INSNS (bb, insn)
3508         {
3509           if (NONDEBUG_INSN_P (insn))
3510             {
3511               if (GET_CODE (PATTERN (insn)) == SET)
3512                 {
3513                   rtx src = SET_SRC (PATTERN (insn));
3514                   rtx dest = SET_DEST (PATTERN (insn));
3515
3516                   /* Check for a simple LOAD...  */
3517                   if (MEM_P (src) && simple_mem (src))
3518                     {
3519                       ptr = ldst_entry (src);
3520                       if (REG_P (dest))
3521                         ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
3522                       else
3523                         ptr->invalid = 1;
3524                     }
3525                   else
3526                     {
3527                       /* Make sure there isn't a buried load somewhere.  */
3528                       invalidate_any_buried_refs (src);
3529                     }
3530
3531                   /* Check for stores. Don't worry about aliased ones, they
3532                      will block any movement we might do later. We only care
3533                      about this exact pattern since those are the only
3534                      circumstance that we will ignore the aliasing info.  */
3535                   if (MEM_P (dest) && simple_mem (dest))
3536                     {
3537                       ptr = ldst_entry (dest);
3538
3539                       if (! MEM_P (src)
3540                           && GET_CODE (src) != ASM_OPERANDS
3541                           /* Check for REG manually since want_to_gcse_p
3542                              returns 0 for all REGs.  */
3543                           && can_assign_to_reg_without_clobbers_p (src))
3544                         ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
3545                       else
3546                         ptr->invalid = 1;
3547                     }
3548                 }
3549               else
3550                 invalidate_any_buried_refs (PATTERN (insn));
3551             }
3552         }
3553     }
3554 }
3555
3556 /* Remove any references that have been either invalidated or are not in the
3557    expression list for pre gcse.  */
3558
3559 static void
3560 trim_ld_motion_mems (void)
3561 {
3562   struct ls_expr * * last = & pre_ldst_mems;
3563   struct ls_expr * ptr = pre_ldst_mems;
3564
3565   while (ptr != NULL)
3566     {
3567       struct expr * expr;
3568
3569       /* Delete if entry has been made invalid.  */
3570       if (! ptr->invalid)
3571         {
3572           /* Delete if we cannot find this mem in the expression list.  */
3573           unsigned int hash = ptr->hash_index % expr_hash_table.size;
3574
3575           for (expr = expr_hash_table.table[hash];
3576                expr != NULL;
3577                expr = expr->next_same_hash)
3578             if (expr_equiv_p (expr->expr, ptr->pattern))
3579               break;
3580         }
3581       else
3582         expr = (struct expr *) 0;
3583
3584       if (expr)
3585         {
3586           /* Set the expression field if we are keeping it.  */
3587           ptr->expr = expr;
3588           last = & ptr->next;
3589           ptr = ptr->next;
3590         }
3591       else
3592         {
3593           *last = ptr->next;
3594           htab_remove_elt_with_hash (pre_ldst_table, ptr, ptr->hash_index);
3595           free_ldst_entry (ptr);
3596           ptr = * last;
3597         }
3598     }
3599
3600   /* Show the world what we've found.  */
3601   if (dump_file && pre_ldst_mems != NULL)
3602     print_ldst_list (dump_file);
3603 }
3604
3605 /* This routine will take an expression which we are replacing with
3606    a reaching register, and update any stores that are needed if
3607    that expression is in the ld_motion list.  Stores are updated by
3608    copying their SRC to the reaching register, and then storing
3609    the reaching register into the store location. These keeps the
3610    correct value in the reaching register for the loads.  */
3611
3612 static void
3613 update_ld_motion_stores (struct expr * expr)
3614 {
3615   struct ls_expr * mem_ptr;
3616
3617   if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
3618     {
3619       /* We can try to find just the REACHED stores, but is shouldn't
3620          matter to set the reaching reg everywhere...  some might be
3621          dead and should be eliminated later.  */
3622
3623       /* We replace (set mem expr) with (set reg expr) (set mem reg)
3624          where reg is the reaching reg used in the load.  We checked in
3625          compute_ld_motion_mems that we can replace (set mem expr) with
3626          (set reg expr) in that insn.  */
3627       rtx list = mem_ptr->stores;
3628
3629       for ( ; list != NULL_RTX; list = XEXP (list, 1))
3630         {
3631           rtx insn = XEXP (list, 0);
3632           rtx pat = PATTERN (insn);
3633           rtx src = SET_SRC (pat);
3634           rtx reg = expr->reaching_reg;
3635           rtx copy;
3636
3637           /* If we've already copied it, continue.  */
3638           if (expr->reaching_reg == src)
3639             continue;
3640
3641           if (dump_file)
3642             {
3643               fprintf (dump_file, "PRE:  store updated with reaching reg ");
3644               print_rtl (dump_file, expr->reaching_reg);
3645               fprintf (dump_file, ":\n  ");
3646               print_inline_rtx (dump_file, insn, 8);
3647               fprintf (dump_file, "\n");
3648             }
3649
3650           copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
3651           emit_insn_before (copy, insn);
3652           SET_SRC (pat) = reg;
3653           df_insn_rescan (insn);
3654
3655           /* un-recognize this pattern since it's probably different now.  */
3656           INSN_CODE (insn) = -1;
3657           gcse_create_count++;
3658         }
3659     }
3660 }
3661 \f
3662 /* Return true if the graph is too expensive to optimize. PASS is the
3663    optimization about to be performed.  */
3664
3665 static bool
3666 is_too_expensive (const char *pass)
3667 {
3668   /* Trying to perform global optimizations on flow graphs which have
3669      a high connectivity will take a long time and is unlikely to be
3670      particularly useful.
3671
3672      In normal circumstances a cfg should have about twice as many
3673      edges as blocks.  But we do not want to punish small functions
3674      which have a couple switch statements.  Rather than simply
3675      threshold the number of blocks, uses something with a more
3676      graceful degradation.  */
3677   if (n_edges > 20000 + n_basic_blocks * 4)
3678     {
3679       warning (OPT_Wdisabled_optimization,
3680                "%s: %d basic blocks and %d edges/basic block",
3681                pass, n_basic_blocks, n_edges / n_basic_blocks);
3682
3683       return true;
3684     }
3685
3686   /* If allocating memory for the dataflow bitmaps would take up too much
3687      storage it's better just to disable the optimization.  */
3688   if ((n_basic_blocks
3689        * SBITMAP_SET_SIZE (max_reg_num ())
3690        * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
3691     {
3692       warning (OPT_Wdisabled_optimization,
3693                "%s: %d basic blocks and %d registers",
3694                pass, n_basic_blocks, max_reg_num ());
3695
3696       return true;
3697     }
3698
3699   return false;
3700 }
3701
3702 \f
3703 /* All the passes implemented in this file.  Each pass has its
3704    own gate and execute function, and at the end of the file a
3705    pass definition for passes.c.
3706
3707    We do not construct an accurate cfg in functions which call
3708    setjmp, so none of these passes runs if the function calls
3709    setjmp.
3710    FIXME: Should just handle setjmp via REG_SETJMP notes.  */
3711
3712 static bool
3713 gate_rtl_pre (void)
3714 {
3715   return optimize > 0 && flag_gcse
3716     && !cfun->calls_setjmp
3717     && optimize_function_for_speed_p (cfun)
3718     && dbg_cnt (pre);
3719 }
3720
3721 static unsigned int
3722 execute_rtl_pre (void)
3723 {
3724   int changed;
3725   delete_unreachable_blocks ();
3726   df_analyze ();
3727   changed = one_pre_gcse_pass ();
3728   flag_rerun_cse_after_global_opts |= changed;
3729   if (changed)
3730     cleanup_cfg (0);
3731   return 0;
3732 }
3733
3734 static bool
3735 gate_rtl_hoist (void)
3736 {
3737   return optimize > 0 && flag_gcse
3738     && !cfun->calls_setjmp
3739     /* It does not make sense to run code hoisting unless we are optimizing
3740        for code size -- it rarely makes programs faster, and can make then
3741        bigger if we did PRE (when optimizing for space, we don't run PRE).  */
3742     && optimize_function_for_size_p (cfun)
3743     && dbg_cnt (hoist);
3744 }
3745
3746 static unsigned int
3747 execute_rtl_hoist (void)
3748 {
3749   int changed;
3750   delete_unreachable_blocks ();
3751   df_analyze ();
3752   changed = one_code_hoisting_pass ();
3753   flag_rerun_cse_after_global_opts |= changed;
3754   if (changed)
3755     cleanup_cfg (0);
3756   return 0;
3757 }
3758
3759 struct rtl_opt_pass pass_rtl_pre =
3760 {
3761  {
3762   RTL_PASS,
3763   "rtl pre",                            /* name */
3764   gate_rtl_pre,                         /* gate */
3765   execute_rtl_pre,                      /* execute */
3766   NULL,                                 /* sub */
3767   NULL,                                 /* next */
3768   0,                                    /* static_pass_number */
3769   TV_PRE,                               /* tv_id */
3770   PROP_cfglayout,                       /* properties_required */
3771   0,                                    /* properties_provided */
3772   0,                                    /* properties_destroyed */
3773   0,                                    /* todo_flags_start */
3774   TODO_df_finish | TODO_verify_rtl_sharing |
3775   TODO_dump_func |
3776   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
3777  }
3778 };
3779
3780 struct rtl_opt_pass pass_rtl_hoist =
3781 {
3782  {
3783   RTL_PASS,
3784   "hoist",                              /* name */
3785   gate_rtl_hoist,                       /* gate */
3786   execute_rtl_hoist,                    /* execute */
3787   NULL,                                 /* sub */
3788   NULL,                                 /* next */
3789   0,                                    /* static_pass_number */
3790   TV_HOIST,                             /* tv_id */
3791   PROP_cfglayout,                       /* properties_required */
3792   0,                                    /* properties_provided */
3793   0,                                    /* properties_destroyed */
3794   0,                                    /* todo_flags_start */
3795   TODO_df_finish | TODO_verify_rtl_sharing |
3796   TODO_dump_func |
3797   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
3798  }
3799 };
3800
3801 #include "gt-gcse.h"
3802