gcc/tree-ssa-threadupdate.c

   1 /* Thread edges through blocks and update the control flow and SSA graphs.
   2    Copyright (C) 2004, 2005, 2006, 2007, 2008 Free Software Foundation,
   3    Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "tree.h"
  26 #include "flags.h"
  27 #include "tm_p.h"
  28 #include "basic-block.h"
  29 #include "output.h"
  30 #include "expr.h"
  31 #include "function.h"
  32 #include "diagnostic.h"
  33 #include "tree-flow.h"
  34 #include "tree-dump.h"
  35 #include "tree-pass.h"
  36 #include "cfgloop.h"
  37
  38 /* Given a block B, update the CFG and SSA graph to reflect redirecting
  39    one or more in-edges to B to instead reach the destination of an
  40    out-edge from B while preserving any side effects in B.
  41
  42    i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
  43    side effects of executing B.
  44
  45      1. Make a copy of B (including its outgoing edges and statements).  Call
  46         the copy B'.  Note B' has no incoming edges or PHIs at this time.
  47
  48      2. Remove the control statement at the end of B' and all outgoing edges
  49         except B'->C.
  50
  51      3. Add a new argument to each PHI in C with the same value as the existing
  52         argument associated with edge B->C.  Associate the new PHI arguments
  53         with the edge B'->C.
  54
  55      4. For each PHI in B, find or create a PHI in B' with an identical
  56         PHI_RESULT.  Add an argument to the PHI in B' which has the same
  57         value as the PHI in B associated with the edge A->B.  Associate
  58         the new argument in the PHI in B' with the edge A->B.
  59
  60      5. Change the edge A->B to A->B'.
  61
  62         5a. This automatically deletes any PHI arguments associated with the
  63             edge A->B in B.
  64
  65         5b. This automatically associates each new argument added in step 4
  66             with the edge A->B'.
  67
  68      6. Repeat for other incoming edges into B.
  69
  70      7. Put the duplicated resources in B and all the B' blocks into SSA form.
  71
  72    Note that block duplication can be minimized by first collecting the
  73    set of unique destination blocks that the incoming edges should
  74    be threaded to.  Block duplication can be further minimized by using
  75    B instead of creating B' for one destination if all edges into B are
  76    going to be threaded to a successor of B.
  77
  78    We further reduce the number of edges and statements we create by
  79    not copying all the outgoing edges and the control statement in
  80    step #1.  We instead create a template block without the outgoing
  81    edges and duplicate the template.  */
  82
  83
  84 /* Steps #5 and #6 of the above algorithm are best implemented by walking
  85    all the incoming edges which thread to the same destination edge at
  86    the same time.  That avoids lots of table lookups to get information
  87    for the destination edge.
  88
  89    To realize that implementation we create a list of incoming edges
  90    which thread to the same outgoing edge.  Thus to implement steps
  91    #5 and #6 we traverse our hash table of outgoing edge information.
  92    For each entry we walk the list of incoming edges which thread to
  93    the current outgoing edge.  */
  94
  95 struct el
  96 {
  97   edge e;
  98   struct el *next;
  99 };
 100
 101 /* Main data structure recording information regarding B's duplicate
 102    blocks.  */
 103
 104 /* We need to efficiently record the unique thread destinations of this
 105    block and specific information associated with those destinations.  We
 106    may have many incoming edges threaded to the same outgoing edge.  This
 107    can be naturally implemented with a hash table.  */
 108
 109 struct redirection_data
 110 {
 111   /* A duplicate of B with the trailing control statement removed and which
 112      targets a single successor of B.  */
 113   basic_block dup_block;
 114
 115   /* An outgoing edge from B.  DUP_BLOCK will have OUTGOING_EDGE->dest as
 116      its single successor.  */
 117   edge outgoing_edge;
 118
 119   /* A list of incoming edges which we want to thread to
 120      OUTGOING_EDGE->dest.  */
 121   struct el *incoming_edges;
 122
 123   /* Flag indicating whether or not we should create a duplicate block
 124      for this thread destination.  This is only true if we are threading
 125      all incoming edges and thus are using BB itself as a duplicate block.  */
 126   bool do_not_duplicate;
 127 };
 128
 129 /* Main data structure to hold information for duplicates of BB.  */
 130 static htab_t redirection_data;
 131
 132 /* Data structure of information to pass to hash table traversal routines.  */
 133 struct local_info
 134 {
 135   /* The current block we are working on.  */
 136   basic_block bb;
 137
 138   /* A template copy of BB with no outgoing edges or control statement that
 139      we use for creating copies.  */
 140   basic_block template_block;
 141
 142   /* TRUE if we thread one or more jumps, FALSE otherwise.  */
 143   bool jumps_threaded;
 144 };
 145
 146 /* Passes which use the jump threading code register jump threading
 147    opportunities as they are discovered.  We keep the registered
 148    jump threading opportunities in this vector as edge pairs
 149    (original_edge, target_edge).  */
 150 static VEC(edge,heap) *threaded_edges;
 151
 152
 153 /* Jump threading statistics.  */
 154
 155 struct thread_stats_d
 156 {
 157   unsigned long num_threaded_edges;
 158 };
 159
 160 struct thread_stats_d thread_stats;
 161
 162
 163 /* Remove the last statement in block BB if it is a control statement
 164    Also remove all outgoing edges except the edge which reaches DEST_BB.
 165    If DEST_BB is NULL, then remove all outgoing edges.  */
 166
 167 static void
 168 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
 169 {
 170   gimple_stmt_iterator gsi;
 171   edge e;
 172   edge_iterator ei;
 173
 174   gsi = gsi_last_bb (bb);
 175
 176   /* If the duplicate ends with a control statement, then remove it.
 177
 178      Note that if we are duplicating the template block rather than the
 179      original basic block, then the duplicate might not have any real
 180      statements in it.  */
 181   if (!gsi_end_p (gsi)
 182       && gsi_stmt (gsi)
 183       && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 184           || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 185           || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
 186     gsi_remove (&gsi, true);
 187
 188   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
 189     {
 190       if (e->dest != dest_bb)
 191         remove_edge (e);
 192       else
 193         ei_next (&ei);
 194     }
 195 }
 196
 197 /* Create a duplicate of BB which only reaches the destination of the edge
 198    stored in RD.  Record the duplicate block in RD.  */
 199
 200 static void
 201 create_block_for_threading (basic_block bb, struct redirection_data *rd)
 202 {
 203   /* We can use the generic block duplication code and simply remove
 204      the stuff we do not need.  */
 205   rd->dup_block = duplicate_block (bb, NULL, NULL);
 206
 207   /* Zero out the profile, since the block is unreachable for now.  */
 208   rd->dup_block->frequency = 0;
 209   rd->dup_block->count = 0;
 210
 211   /* The call to duplicate_block will copy everything, including the
 212      useless COND_EXPR or SWITCH_EXPR at the end of BB.  We just remove
 213      the useless COND_EXPR or SWITCH_EXPR here rather than having a
 214      specialized block copier.  We also remove all outgoing edges
 215      from the duplicate block.  The appropriate edge will be created
 216      later.  */
 217   remove_ctrl_stmt_and_useless_edges (rd->dup_block, NULL);
 218 }
 219
 220 /* Hashing and equality routines for our hash table.  */
 221 static hashval_t
 222 redirection_data_hash (const void *p)
 223 {
 224   edge e = ((const struct redirection_data *)p)->outgoing_edge;
 225   return e->dest->index;
 226 }
 227
 228 static int
 229 redirection_data_eq (const void *p1, const void *p2)
 230 {
 231   edge e1 = ((const struct redirection_data *)p1)->outgoing_edge;
 232   edge e2 = ((const struct redirection_data *)p2)->outgoing_edge;
 233
 234   return e1 == e2;
 235 }
 236
 237 /* Given an outgoing edge E lookup and return its entry in our hash table.
 238
 239    If INSERT is true, then we insert the entry into the hash table if
 240    it is not already present.  INCOMING_EDGE is added to the list of incoming
 241    edges associated with E in the hash table.  */
 242
 243 static struct redirection_data *
 244 lookup_redirection_data (edge e, edge incoming_edge, enum insert_option insert)
 245 {
 246   void **slot;
 247   struct redirection_data *elt;
 248
 249  /* Build a hash table element so we can see if E is already
 250      in the table.  */
 251   elt = XNEW (struct redirection_data);
 252   elt->outgoing_edge = e;
 253   elt->dup_block = NULL;
 254   elt->do_not_duplicate = false;
 255   elt->incoming_edges = NULL;
 256
 257   slot = htab_find_slot (redirection_data, elt, insert);
 258
 259   /* This will only happen if INSERT is false and the entry is not
 260      in the hash table.  */
 261   if (slot == NULL)
 262     {
 263       free (elt);
 264       return NULL;
 265     }
 266
 267   /* This will only happen if E was not in the hash table and
 268      INSERT is true.  */
 269   if (*slot == NULL)
 270     {
 271       *slot = (void *)elt;
 272       elt->incoming_edges = XNEW (struct el);
 273       elt->incoming_edges->e = incoming_edge;
 274       elt->incoming_edges->next = NULL;
 275       return elt;
 276     }
 277   /* E was in the hash table.  */
 278   else
 279     {
 280       /* Free ELT as we do not need it anymore, we will extract the
 281          relevant entry from the hash table itself.  */
 282       free (elt);
 283
 284       /* Get the entry stored in the hash table.  */
 285       elt = (struct redirection_data *) *slot;
 286
 287       /* If insertion was requested, then we need to add INCOMING_EDGE
 288          to the list of incoming edges associated with E.  */
 289       if (insert)
 290         {
 291           struct el *el = XNEW (struct el);
 292           el->next = elt->incoming_edges;
 293           el->e = incoming_edge;
 294           elt->incoming_edges = el;
 295         }
 296
 297       return elt;
 298     }
 299 }
 300
 301 /* Given a duplicate block and its single destination (both stored
 302    in RD).  Create an edge between the duplicate and its single
 303    destination.
 304
 305    Add an additional argument to any PHI nodes at the single
 306    destination.  */
 307
 308 static void
 309 create_edge_and_update_destination_phis (struct redirection_data *rd)
 310 {
 311   edge e = make_edge (rd->dup_block, rd->outgoing_edge->dest, EDGE_FALLTHRU);
 312   gimple_stmt_iterator gsi;
 313
 314   rescan_loop_exit (e, true, false);
 315   e->probability = REG_BR_PROB_BASE;
 316   e->count = rd->dup_block->count;
 317   e->aux = rd->outgoing_edge->aux;
 318
 319   /* If there are any PHI nodes at the destination of the outgoing edge
 320      from the duplicate block, then we will need to add a new argument
 321      to them.  The argument should have the same value as the argument
 322      associated with the outgoing edge stored in RD.  */
 323   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
 324     {
 325       gimple phi = gsi_stmt (gsi);
 326       source_location locus;
 327       int indx = rd->outgoing_edge->dest_idx;
 328
 329       locus = gimple_phi_arg_location (phi, indx);
 330       add_phi_arg (phi, gimple_phi_arg_def (phi, indx), e, locus);
 331     }
 332 }
 333
 334 /* Hash table traversal callback routine to create duplicate blocks.  */
 335
 336 static int
 337 create_duplicates (void **slot, void *data)
 338 {
 339   struct redirection_data *rd = (struct redirection_data *) *slot;
 340   struct local_info *local_info = (struct local_info *)data;
 341
 342   /* If this entry should not have a duplicate created, then there's
 343      nothing to do.  */
 344   if (rd->do_not_duplicate)
 345     return 1;
 346
 347   /* Create a template block if we have not done so already.  Otherwise
 348      use the template to create a new block.  */
 349   if (local_info->template_block == NULL)
 350     {
 351       create_block_for_threading (local_info->bb, rd);
 352       local_info->template_block = rd->dup_block;
 353
 354       /* We do not create any outgoing edges for the template.  We will
 355          take care of that in a later traversal.  That way we do not
 356          create edges that are going to just be deleted.  */
 357     }
 358   else
 359     {
 360       create_block_for_threading (local_info->template_block, rd);
 361
 362       /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
 363          block.  */
 364       create_edge_and_update_destination_phis (rd);
 365     }
 366
 367   /* Keep walking the hash table.  */
 368   return 1;
 369 }
 370
 371 /* We did not create any outgoing edges for the template block during
 372    block creation.  This hash table traversal callback creates the
 373    outgoing edge for the template block.  */
 374
 375 static int
 376 fixup_template_block (void **slot, void *data)
 377 {
 378   struct redirection_data *rd = (struct redirection_data *) *slot;
 379   struct local_info *local_info = (struct local_info *)data;
 380
 381   /* If this is the template block, then create its outgoing edges
 382      and halt the hash table traversal.  */
 383   if (rd->dup_block && rd->dup_block == local_info->template_block)
 384     {
 385       create_edge_and_update_destination_phis (rd);
 386       return 0;
 387     }
 388
 389   return 1;
 390 }
 391
 392 /* Hash table traversal callback to redirect each incoming edge
 393    associated with this hash table element to its new destination.  */
 394
 395 static int
 396 redirect_edges (void **slot, void *data)
 397 {
 398   struct redirection_data *rd = (struct redirection_data *) *slot;
 399   struct local_info *local_info = (struct local_info *)data;
 400   struct el *next, *el;
 401
 402   /* Walk over all the incoming edges associated associated with this
 403      hash table entry.  */
 404   for (el = rd->incoming_edges; el; el = next)
 405     {
 406       edge e = el->e;
 407
 408       /* Go ahead and free this element from the list.  Doing this now
 409          avoids the need for another list walk when we destroy the hash
 410          table.  */
 411       next = el->next;
 412       free (el);
 413
 414       /* Go ahead and clear E->aux.  It's not needed anymore and failure
 415          to clear it will cause all kinds of unpleasant problems later.  */
 416       e->aux = NULL;
 417
 418       thread_stats.num_threaded_edges++;
 419
 420       if (rd->dup_block)
 421         {
 422           edge e2;
 423
 424           if (dump_file && (dump_flags & TDF_DETAILS))
 425             fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
 426                      e->src->index, e->dest->index, rd->dup_block->index);
 427
 428           rd->dup_block->count += e->count;
 429           rd->dup_block->frequency += EDGE_FREQUENCY (e);
 430           EDGE_SUCC (rd->dup_block, 0)->count += e->count;
 431           /* Redirect the incoming edge to the appropriate duplicate
 432              block.  */
 433           e2 = redirect_edge_and_branch (e, rd->dup_block);
 434           gcc_assert (e == e2);
 435           flush_pending_stmts (e2);
 436         }
 437       else
 438         {
 439           if (dump_file && (dump_flags & TDF_DETAILS))
 440             fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
 441                      e->src->index, e->dest->index, local_info->bb->index);
 442
 443           /* We are using BB as the duplicate.  Remove the unnecessary
 444              outgoing edges and statements from BB.  */
 445           remove_ctrl_stmt_and_useless_edges (local_info->bb,
 446                                               rd->outgoing_edge->dest);
 447
 448           /* Fixup the flags on the single remaining edge.  */
 449           single_succ_edge (local_info->bb)->flags
 450             &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
 451           single_succ_edge (local_info->bb)->flags |= EDGE_FALLTHRU;
 452
 453           /* And adjust count and frequency on BB.  */
 454           local_info->bb->count = e->count;
 455           local_info->bb->frequency = EDGE_FREQUENCY (e);
 456         }
 457     }
 458
 459   /* Indicate that we actually threaded one or more jumps.  */
 460   if (rd->incoming_edges)
 461     local_info->jumps_threaded = true;
 462
 463   return 1;
 464 }
 465
 466 /* Return true if this block has no executable statements other than
 467    a simple ctrl flow instruction.  When the number of outgoing edges
 468    is one, this is equivalent to a "forwarder" block.  */
 469
 470 static bool
 471 redirection_block_p (basic_block bb)
 472 {
 473   gimple_stmt_iterator gsi;
 474
 475   /* Advance to the first executable statement.  */
 476   gsi = gsi_start_bb (bb);
 477   while (!gsi_end_p (gsi)
 478          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
 479              || is_gimple_debug (gsi_stmt (gsi))
 480              || gimple_nop_p (gsi_stmt (gsi))))
 481     gsi_next (&gsi);
 482
 483   /* Check if this is an empty block.  */
 484   if (gsi_end_p (gsi))
 485     return true;
 486
 487   /* Test that we've reached the terminating control statement.  */
 488   return gsi_stmt (gsi)
 489          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 490              || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 491              || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
 492 }
 493
 494 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
 495    is reached via one or more specific incoming edges, we know which
 496    outgoing edge from BB will be traversed.
 497
 498    We want to redirect those incoming edges to the target of the
 499    appropriate outgoing edge.  Doing so avoids a conditional branch
 500    and may expose new optimization opportunities.  Note that we have
 501    to update dominator tree and SSA graph after such changes.
 502
 503    The key to keeping the SSA graph update manageable is to duplicate
 504    the side effects occurring in BB so that those side effects still
 505    occur on the paths which bypass BB after redirecting edges.
 506
 507    We accomplish this by creating duplicates of BB and arranging for
 508    the duplicates to unconditionally pass control to one specific
 509    successor of BB.  We then revector the incoming edges into BB to
 510    the appropriate duplicate of BB.
 511
 512    If NOLOOP_ONLY is true, we only perform the threading as long as it
 513    does not affect the structure of the loops in a nontrivial way.  */
 514
 515 static bool
 516 thread_block (basic_block bb, bool noloop_only)
 517 {
 518   /* E is an incoming edge into BB that we may or may not want to
 519      redirect to a duplicate of BB.  */
 520   edge e, e2;
 521   edge_iterator ei;
 522   struct local_info local_info;
 523   struct loop *loop = bb->loop_father;
 524
 525   /* ALL indicates whether or not all incoming edges into BB should
 526      be threaded to a duplicate of BB.  */
 527   bool all = true;
 528
 529   /* To avoid scanning a linear array for the element we need we instead
 530      use a hash table.  For normal code there should be no noticeable
 531      difference.  However, if we have a block with a large number of
 532      incoming and outgoing edges such linear searches can get expensive.  */
 533   redirection_data = htab_create (EDGE_COUNT (bb->succs),
 534                                   redirection_data_hash,
 535                                   redirection_data_eq,
 536                                   free);
 537
 538   /* If we thread the latch of the loop to its exit, the loop ceases to
 539      exist.  Make sure we do not restrict ourselves in order to preserve
 540      this loop.  */
 541   if (loop->header == bb)
 542     {
 543       e = loop_latch_edge (loop);
 544       e2 = (edge) e->aux;
 545
 546       if (e2 && loop_exit_edge_p (loop, e2))
 547         {
 548           loop->header = NULL;
 549           loop->latch = NULL;
 550         }
 551     }
 552
 553   /* Record each unique threaded destination into a hash table for
 554      efficient lookups.  */
 555   FOR_EACH_EDGE (e, ei, bb->preds)
 556     {
 557       e2 = (edge) e->aux;
 558
 559       if (!e2
 560           /* If NOLOOP_ONLY is true, we only allow threading through the
 561              header of a loop to exit edges.  */
 562           || (noloop_only
 563               && bb == bb->loop_father->header
 564               && !loop_exit_edge_p (bb->loop_father, e2)))
 565         {
 566           all = false;
 567           continue;
 568         }
 569
 570       update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e),
 571                                        e->count, (edge) e->aux);
 572
 573       /* Insert the outgoing edge into the hash table if it is not
 574          already in the hash table.  */
 575       lookup_redirection_data (e2, e, INSERT);
 576     }
 577
 578   /* If we are going to thread all incoming edges to an outgoing edge, then
 579      BB will become unreachable.  Rather than just throwing it away, use
 580      it for one of the duplicates.  Mark the first incoming edge with the
 581      DO_NOT_DUPLICATE attribute.  */
 582   if (all)
 583     {
 584       edge e = (edge) EDGE_PRED (bb, 0)->aux;
 585       lookup_redirection_data (e, NULL, NO_INSERT)->do_not_duplicate = true;
 586     }
 587
 588   /* We do not update dominance info.  */
 589   free_dominance_info (CDI_DOMINATORS);
 590
 591   /* Now create duplicates of BB.
 592
 593      Note that for a block with a high outgoing degree we can waste
 594      a lot of time and memory creating and destroying useless edges.
 595
 596      So we first duplicate BB and remove the control structure at the
 597      tail of the duplicate as well as all outgoing edges from the
 598      duplicate.  We then use that duplicate block as a template for
 599      the rest of the duplicates.  */
 600   local_info.template_block = NULL;
 601   local_info.bb = bb;
 602   local_info.jumps_threaded = false;
 603   htab_traverse (redirection_data, create_duplicates, &local_info);
 604
 605   /* The template does not have an outgoing edge.  Create that outgoing
 606      edge and update PHI nodes as the edge's target as necessary.
 607
 608      We do this after creating all the duplicates to avoid creating
 609      unnecessary edges.  */
 610   htab_traverse (redirection_data, fixup_template_block, &local_info);
 611
 612   /* The hash table traversals above created the duplicate blocks (and the
 613      statements within the duplicate blocks).  This loop creates PHI nodes for
 614      the duplicated blocks and redirects the incoming edges into BB to reach
 615      the duplicates of BB.  */
 616   htab_traverse (redirection_data, redirect_edges, &local_info);
 617
 618   /* Done with this block.  Clear REDIRECTION_DATA.  */
 619   htab_delete (redirection_data);
 620   redirection_data = NULL;
 621
 622   /* Indicate to our caller whether or not any jumps were threaded.  */
 623   return local_info.jumps_threaded;
 624 }
 625
 626 /* Threads edge E through E->dest to the edge E->aux.  Returns the copy
 627    of E->dest created during threading, or E->dest if it was not necessary
 628    to copy it (E is its single predecessor).  */
 629
 630 static basic_block
 631 thread_single_edge (edge e)
 632 {
 633   basic_block bb = e->dest;
 634   edge eto = (edge) e->aux;
 635   struct redirection_data rd;
 636
 637   e->aux = NULL;
 638
 639   thread_stats.num_threaded_edges++;
 640
 641   if (single_pred_p (bb))
 642     {
 643       /* If BB has just a single predecessor, we should only remove the
 644          control statements at its end, and successors except for ETO.  */
 645       remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
 646
 647       /* And fixup the flags on the single remaining edge.  */
 648       eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
 649       eto->flags |= EDGE_FALLTHRU;
 650
 651       return bb;
 652     }
 653
 654   /* Otherwise, we need to create a copy.  */
 655   update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
 656
 657   rd.outgoing_edge = eto;
 658
 659   create_block_for_threading (bb, &rd);
 660   create_edge_and_update_destination_phis (&rd);
 661
 662   if (dump_file && (dump_flags & TDF_DETAILS))
 663     fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
 664              e->src->index, e->dest->index, rd.dup_block->index);
 665
 666   rd.dup_block->count = e->count;
 667   rd.dup_block->frequency = EDGE_FREQUENCY (e);
 668   single_succ_edge (rd.dup_block)->count = e->count;
 669   redirect_edge_and_branch (e, rd.dup_block);
 670   flush_pending_stmts (e);
 671
 672   return rd.dup_block;
 673 }
 674
 675 /* Callback for dfs_enumerate_from.  Returns true if BB is different
 676    from STOP and DBDS_CE_STOP.  */
 677
 678 static basic_block dbds_ce_stop;
 679 static bool
 680 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
 681 {
 682   return (bb != (const_basic_block) stop
 683           && bb != dbds_ce_stop);
 684 }
 685
 686 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
 687    returns the state.  */
 688
 689 enum bb_dom_status
 690 {
 691   /* BB does not dominate latch of the LOOP.  */
 692   DOMST_NONDOMINATING,
 693   /* The LOOP is broken (there is no path from the header to its latch.  */
 694   DOMST_LOOP_BROKEN,
 695   /* BB dominates the latch of the LOOP.  */
 696   DOMST_DOMINATING
 697 };
 698
 699 static enum bb_dom_status
 700 determine_bb_domination_status (struct loop *loop, basic_block bb)
 701 {
 702   basic_block *bblocks;
 703   unsigned nblocks, i;
 704   bool bb_reachable = false;
 705   edge_iterator ei;
 706   edge e;
 707
 708 #ifdef ENABLE_CHECKING
 709   /* This function assumes BB is a successor of LOOP->header.  */
 710     {
 711       bool ok = false;
 712
 713       FOR_EACH_EDGE (e, ei, bb->preds)
 714         {
 715           if (e->src == loop->header)
 716             {
 717               ok = true;
 718               break;
 719             }
 720         }
 721
 722       gcc_assert (ok);
 723     }
 724 #endif
 725
 726   if (bb == loop->latch)
 727     return DOMST_DOMINATING;
 728
 729   /* Check that BB dominates LOOP->latch, and that it is back-reachable
 730      from it.  */
 731
 732   bblocks = XCNEWVEC (basic_block, loop->num_nodes);
 733   dbds_ce_stop = loop->header;
 734   nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
 735                                 bblocks, loop->num_nodes, bb);
 736   for (i = 0; i < nblocks; i++)
 737     FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
 738       {
 739         if (e->src == loop->header)
 740           {
 741             free (bblocks);
 742             return DOMST_NONDOMINATING;
 743           }
 744         if (e->src == bb)
 745           bb_reachable = true;
 746       }
 747
 748   free (bblocks);
 749   return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
 750 }
 751
 752 /* Thread jumps through the header of LOOP.  Returns true if cfg changes.
 753    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
 754    to the inside of the loop.  */
 755
 756 static bool
 757 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
 758 {
 759   basic_block header = loop->header;
 760   edge e, tgt_edge, latch = loop_latch_edge (loop);
 761   edge_iterator ei;
 762   basic_block tgt_bb, atgt_bb;
 763   enum bb_dom_status domst;
 764
 765   /* We have already threaded through headers to exits, so all the threading
 766      requests now are to the inside of the loop.  We need to avoid creating
 767      irreducible regions (i.e., loops with more than one entry block), and
 768      also loop with several latch edges, or new subloops of the loop (although
 769      there are cases where it might be appropriate, it is difficult to decide,
 770      and doing it wrongly may confuse other optimizers).
 771
 772      We could handle more general cases here.  However, the intention is to
 773      preserve some information about the loop, which is impossible if its
 774      structure changes significantly, in a way that is not well understood.
 775      Thus we only handle few important special cases, in which also updating
 776      of the loop-carried information should be feasible:
 777
 778      1) Propagation of latch edge to a block that dominates the latch block
 779         of a loop.  This aims to handle the following idiom:
 780
 781         first = 1;
 782         while (1)
 783           {
 784             if (first)
 785               initialize;
 786             first = 0;
 787             body;
 788           }
 789
 790         After threading the latch edge, this becomes
 791
 792         first = 1;
 793         if (first)
 794           initialize;
 795         while (1)
 796           {
 797             first = 0;
 798             body;
 799           }
 800
 801         The original header of the loop is moved out of it, and we may thread
 802         the remaining edges through it without further constraints.
 803
 804      2) All entry edges are propagated to a single basic block that dominates
 805         the latch block of the loop.  This aims to handle the following idiom
 806         (normally created for "for" loops):
 807
 808         i = 0;
 809         while (1)
 810           {
 811             if (i >= 100)
 812               break;
 813             body;
 814             i++;
 815           }
 816
 817         This becomes
 818
 819         i = 0;
 820         while (1)
 821           {
 822             body;
 823             i++;
 824             if (i >= 100)
 825               break;
 826           }
 827      */
 828
 829   /* Threading through the header won't improve the code if the header has just
 830      one successor.  */
 831   if (single_succ_p (header))
 832     goto fail;
 833
 834   if (latch->aux)
 835     {
 836       tgt_edge = (edge) latch->aux;
 837       tgt_bb = tgt_edge->dest;
 838     }
 839   else if (!may_peel_loop_headers
 840            && !redirection_block_p (loop->header))
 841     goto fail;
 842   else
 843     {
 844       tgt_bb = NULL;
 845       tgt_edge = NULL;
 846       FOR_EACH_EDGE (e, ei, header->preds)
 847         {
 848           if (!e->aux)
 849             {
 850               if (e == latch)
 851                 continue;
 852
 853               /* If latch is not threaded, and there is a header
 854                  edge that is not threaded, we would create loop
 855                  with multiple entries.  */
 856               goto fail;
 857             }
 858
 859           tgt_edge = (edge) e->aux;
 860           atgt_bb = tgt_edge->dest;
 861           if (!tgt_bb)
 862             tgt_bb = atgt_bb;
 863           /* Two targets of threading would make us create loop
 864              with multiple entries.  */
 865           else if (tgt_bb != atgt_bb)
 866             goto fail;
 867         }
 868
 869       if (!tgt_bb)
 870         {
 871           /* There are no threading requests.  */
 872           return false;
 873         }
 874
 875       /* Redirecting to empty loop latch is useless.  */
 876       if (tgt_bb == loop->latch
 877           && empty_block_p (loop->latch))
 878         goto fail;
 879     }
 880
 881   /* The target block must dominate the loop latch, otherwise we would be
 882      creating a subloop.  */
 883   domst = determine_bb_domination_status (loop, tgt_bb);
 884   if (domst == DOMST_NONDOMINATING)
 885     goto fail;
 886   if (domst == DOMST_LOOP_BROKEN)
 887     {
 888       /* If the loop ceased to exist, mark it as such, and thread through its
 889          original header.  */
 890       loop->header = NULL;
 891       loop->latch = NULL;
 892       return thread_block (header, false);
 893     }
 894
 895   if (tgt_bb->loop_father->header == tgt_bb)
 896     {
 897       /* If the target of the threading is a header of a subloop, we need
 898          to create a preheader for it, so that the headers of the two loops
 899          do not merge.  */
 900       if (EDGE_COUNT (tgt_bb->preds) > 2)
 901         {
 902           tgt_bb = create_preheader (tgt_bb->loop_father, 0);
 903           gcc_assert (tgt_bb != NULL);
 904         }
 905       else
 906         tgt_bb = split_edge (tgt_edge);
 907     }
 908
 909   if (latch->aux)
 910     {
 911       /* First handle the case latch edge is redirected.  */
 912       loop->latch = thread_single_edge (latch);
 913       gcc_assert (single_succ (loop->latch) == tgt_bb);
 914       loop->header = tgt_bb;
 915
 916       /* Thread the remaining edges through the former header.  */
 917       thread_block (header, false);
 918     }
 919   else
 920     {
 921       basic_block new_preheader;
 922
 923       /* Now consider the case entry edges are redirected to the new entry
 924          block.  Remember one entry edge, so that we can find the new
 925         preheader (its destination after threading).  */
 926       FOR_EACH_EDGE (e, ei, header->preds)
 927         {
 928           if (e->aux)
 929             break;
 930         }
 931
 932       /* The duplicate of the header is the new preheader of the loop.  Ensure
 933          that it is placed correctly in the loop hierarchy.  */
 934       set_loop_copy (loop, loop_outer (loop));
 935
 936       thread_block (header, false);
 937       set_loop_copy (loop, NULL);
 938       new_preheader = e->dest;
 939
 940       /* Create the new latch block.  This is always necessary, as the latch
 941          must have only a single successor, but the original header had at
 942          least two successors.  */
 943       loop->latch = NULL;
 944       mfb_kj_edge = single_succ_edge (new_preheader);
 945       loop->header = mfb_kj_edge->dest;
 946       latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
 947       loop->header = latch->dest;
 948       loop->latch = latch->src;
 949     }
 950
 951   return true;
 952
 953 fail:
 954   /* We failed to thread anything.  Cancel the requests.  */
 955   FOR_EACH_EDGE (e, ei, header->preds)
 956     {
 957       e->aux = NULL;
 958     }
 959   return false;
 960 }
 961
 962 /* Walk through the registered jump threads and convert them into a
 963    form convenient for this pass.
 964
 965    Any block which has incoming edges threaded to outgoing edges
 966    will have its entry in THREADED_BLOCK set.
 967
 968    Any threaded edge will have its new outgoing edge stored in the
 969    original edge's AUX field.
 970
 971    This form avoids the need to walk all the edges in the CFG to
 972    discover blocks which need processing and avoids unnecessary
 973    hash table lookups to map from threaded edge to new target.  */
 974
 975 static void
 976 mark_threaded_blocks (bitmap threaded_blocks)
 977 {
 978   unsigned int i;
 979   bitmap_iterator bi;
 980   bitmap tmp = BITMAP_ALLOC (NULL);
 981   basic_block bb;
 982   edge e;
 983   edge_iterator ei;
 984
 985   for (i = 0; i < VEC_length (edge, threaded_edges); i += 2)
 986     {
 987       edge e = VEC_index (edge, threaded_edges, i);
 988       edge e2 = VEC_index (edge, threaded_edges, i + 1);
 989
 990       e->aux = e2;
 991       bitmap_set_bit (tmp, e->dest->index);
 992     }
 993
 994   /* If optimizing for size, only thread through block if we don't have
 995      to duplicate it or it's an otherwise empty redirection block.  */
 996   if (optimize_function_for_size_p (cfun))
 997     {
 998       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
 999         {
1000           bb = BASIC_BLOCK (i);
1001           if (EDGE_COUNT (bb->preds) > 1
1002               && !redirection_block_p (bb))
1003             {
1004               FOR_EACH_EDGE (e, ei, bb->preds)
1005                       e->aux = NULL;
1006             }
1007           else
1008             bitmap_set_bit (threaded_blocks, i);
1009         }
1010     }
1011   else
1012     bitmap_copy (threaded_blocks, tmp);
1013
1014   BITMAP_FREE(tmp);
1015 }
1016
1017
1018 /* Walk through all blocks and thread incoming edges to the appropriate
1019    outgoing edge for each edge pair recorded in THREADED_EDGES.
1020
1021    It is the caller's responsibility to fix the dominance information
1022    and rewrite duplicated SSA_NAMEs back into SSA form.
1023
1024    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
1025    loop headers if it does not simplify the loop.
1026
1027    Returns true if one or more edges were threaded, false otherwise.  */
1028
1029 bool
1030 thread_through_all_blocks (bool may_peel_loop_headers)
1031 {
1032   bool retval = false;
1033   unsigned int i;
1034   bitmap_iterator bi;
1035   bitmap threaded_blocks;
1036   struct loop *loop;
1037   loop_iterator li;
1038
1039   /* We must know about loops in order to preserve them.  */
1040   gcc_assert (current_loops != NULL);
1041
1042   if (threaded_edges == NULL)
1043     return false;
1044
1045   threaded_blocks = BITMAP_ALLOC (NULL);
1046   memset (&thread_stats, 0, sizeof (thread_stats));
1047
1048   mark_threaded_blocks (threaded_blocks);
1049
1050   initialize_original_copy_tables ();
1051
1052   /* First perform the threading requests that do not affect
1053      loop structure.  */
1054   EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
1055     {
1056       basic_block bb = BASIC_BLOCK (i);
1057
1058       if (EDGE_COUNT (bb->preds) > 0)
1059         retval |= thread_block (bb, true);
1060     }
1061
1062   /* Then perform the threading through loop headers.  We start with the
1063      innermost loop, so that the changes in cfg we perform won't affect
1064      further threading.  */
1065   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
1066     {
1067       if (!loop->header
1068           || !bitmap_bit_p (threaded_blocks, loop->header->index))
1069         continue;
1070
1071       retval |= thread_through_loop_header (loop, may_peel_loop_headers);
1072     }
1073
1074   statistics_counter_event (cfun, "Jumps threaded",
1075                             thread_stats.num_threaded_edges);
1076
1077   free_original_copy_tables ();
1078
1079   BITMAP_FREE (threaded_blocks);
1080   threaded_blocks = NULL;
1081   VEC_free (edge, heap, threaded_edges);
1082   threaded_edges = NULL;
1083
1084   if (retval)
1085     loops_state_set (LOOPS_NEED_FIXUP);
1086
1087   return retval;
1088 }
1089
1090 /* Register a jump threading opportunity.  We queue up all the jump
1091    threading opportunities discovered by a pass and update the CFG
1092    and SSA form all at once.
1093
1094    E is the edge we can thread, E2 is the new target edge, i.e., we
1095    are effectively recording that E->dest can be changed to E2->dest
1096    after fixing the SSA graph.  */
1097
1098 void
1099 register_jump_thread (edge e, edge e2)
1100 {
1101   if (threaded_edges == NULL)
1102     threaded_edges = VEC_alloc (edge, heap, 10);
1103
1104   VEC_safe_push (edge, heap, threaded_edges, e);
1105   VEC_safe_push (edge, heap, threaded_edges, e2);
1106 }