gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
   3    Foundation, Inc.
   4    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   5    and Ira Rosen <irar@il.ibm.com>
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 3, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "tm.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "basic-block.h"
  30 #include "diagnostic.h"
  31 #include "tree-flow.h"
  32 #include "tree-dump.h"
  33 #include "cfgloop.h"
  34 #include "cfglayout.h"
  35 #include "expr.h"
  36 #include "toplev.h"
  37 #include "tree-scalar-evolution.h"
  38 #include "tree-vectorizer.h"
  39 #include "langhooks.h"
  40
  41 /*************************************************************************
  42   Simple Loop Peeling Utilities
  43
  44   Utilities to support loop peeling for vectorization purposes.
  45  *************************************************************************/
  46
  47
  48 /* Renames the use *OP_P.  */
  49
  50 static void
  51 rename_use_op (use_operand_p op_p)
  52 {
  53   tree new_name;
  54
  55   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  56     return;
  57
  58   new_name = get_current_def (USE_FROM_PTR (op_p));
  59
  60   /* Something defined outside of the loop.  */
  61   if (!new_name)
  62     return;
  63
  64   /* An ordinary ssa name defined in the loop.  */
  65
  66   SET_USE (op_p, new_name);
  67 }
  68
  69
  70 /* Renames the variables in basic block BB.  */
  71
  72 void
  73 rename_variables_in_bb (basic_block bb)
  74 {
  75   gimple_stmt_iterator gsi;
  76   gimple stmt;
  77   use_operand_p use_p;
  78   ssa_op_iter iter;
  79   edge e;
  80   edge_iterator ei;
  81   struct loop *loop = bb->loop_father;
  82
  83   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  84     {
  85       stmt = gsi_stmt (gsi);
  86       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
  87         rename_use_op (use_p);
  88     }
  89
  90   FOR_EACH_EDGE (e, ei, bb->succs)
  91     {
  92       if (!flow_bb_inside_loop_p (loop, e->dest))
  93         continue;
  94       for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
  95         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
  96     }
  97 }
  98
  99
 100 /* Renames variables in new generated LOOP.  */
 101
 102 void
 103 rename_variables_in_loop (struct loop *loop)
 104 {
 105   unsigned i;
 106   basic_block *bbs;
 107
 108   bbs = get_loop_body (loop);
 109
 110   for (i = 0; i < loop->num_nodes; i++)
 111     rename_variables_in_bb (bbs[i]);
 112
 113   free (bbs);
 114 }
 115
 116
 117 /* Update the PHI nodes of NEW_LOOP.
 118
 119    NEW_LOOP is a duplicate of ORIG_LOOP.
 120    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 121    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 122    executes before it.  */
 123
 124 static void
 125 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 126                                        struct loop *new_loop, bool after)
 127 {
 128   tree new_ssa_name;
 129   gimple phi_new, phi_orig;
 130   tree def;
 131   edge orig_loop_latch = loop_latch_edge (orig_loop);
 132   edge orig_entry_e = loop_preheader_edge (orig_loop);
 133   edge new_loop_exit_e = single_exit (new_loop);
 134   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 135   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 136   gimple_stmt_iterator gsi_new, gsi_orig;
 137
 138   /*
 139      step 1. For each loop-header-phi:
 140              Add the first phi argument for the phi in NEW_LOOP
 141             (the one associated with the entry of NEW_LOOP)
 142
 143      step 2. For each loop-header-phi:
 144              Add the second phi argument for the phi in NEW_LOOP
 145             (the one associated with the latch of NEW_LOOP)
 146
 147      step 3. Update the phis in the successor block of NEW_LOOP.
 148
 149         case 1: NEW_LOOP was placed before ORIG_LOOP:
 150                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 151                 Updating the phis in the successor block can therefore be done
 152                 along with the scanning of the loop header phis, because the
 153                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 154                 phi nodes, organized in the same order.
 155
 156         case 2: NEW_LOOP was placed after ORIG_LOOP:
 157                 The successor block of NEW_LOOP is the original exit block of
 158                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 159                 We postpone updating these phis to a later stage (when
 160                 loop guards are added).
 161    */
 162
 163
 164   /* Scan the phis in the headers of the old and new loops
 165      (they are organized in exactly the same order).  */
 166
 167   for (gsi_new = gsi_start_phis (new_loop->header),
 168        gsi_orig = gsi_start_phis (orig_loop->header);
 169        !gsi_end_p (gsi_new) && !gsi_end_p (gsi_orig);
 170        gsi_next (&gsi_new), gsi_next (&gsi_orig))
 171     {
 172       phi_new = gsi_stmt (gsi_new);
 173       phi_orig = gsi_stmt (gsi_orig);
 174
 175       /* step 1.  */
 176       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 177       add_phi_arg (phi_new, def, new_loop_entry_e);
 178
 179       /* step 2.  */
 180       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 181       if (TREE_CODE (def) != SSA_NAME)
 182         continue;
 183
 184       new_ssa_name = get_current_def (def);
 185       if (!new_ssa_name)
 186         {
 187           /* This only happens if there are no definitions
 188              inside the loop. use the phi_result in this case.  */
 189           new_ssa_name = PHI_RESULT (phi_new);
 190         }
 191
 192       /* An ordinary ssa name defined in the loop.  */
 193       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
 194
 195       /* step 3 (case 1).  */
 196       if (!after)
 197         {
 198           gcc_assert (new_loop_exit_e == orig_entry_e);
 199           SET_PHI_ARG_DEF (phi_orig,
 200                            new_loop_exit_e->dest_idx,
 201                            new_ssa_name);
 202         }
 203     }
 204 }
 205
 206
 207 /* Update PHI nodes for a guard of the LOOP.
 208
 209    Input:
 210    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 211         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 212         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 213         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 214         We denote this bb NEW_MERGE_BB because before the guard code was added
 215         it had a single predecessor (the LOOP header), and now it became a merge
 216         point of two paths - the path that ends with the LOOP exit-edge, and
 217         the path that ends with GUARD_EDGE.
 218    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 219         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 220
 221    ===> The CFG before the guard-code was added:
 222         LOOP_header_bb:
 223           loop_body
 224           if (exit_loop) goto update_bb
 225           else           goto LOOP_header_bb
 226         update_bb:
 227
 228    ==> The CFG after the guard-code was added:
 229         guard_bb:
 230           if (LOOP_guard_condition) goto new_merge_bb
 231           else                      goto LOOP_header_bb
 232         LOOP_header_bb:
 233           loop_body
 234           if (exit_loop_condition) goto new_merge_bb
 235           else                     goto LOOP_header_bb
 236         new_merge_bb:
 237           goto update_bb
 238         update_bb:
 239
 240    ==> The CFG after this function:
 241         guard_bb:
 242           if (LOOP_guard_condition) goto new_merge_bb
 243           else                      goto LOOP_header_bb
 244         LOOP_header_bb:
 245           loop_body
 246           if (exit_loop_condition) goto new_exit_bb
 247           else                     goto LOOP_header_bb
 248         new_exit_bb:
 249         new_merge_bb:
 250           goto update_bb
 251         update_bb:
 252
 253    This function:
 254    1. creates and updates the relevant phi nodes to account for the new
 255       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 256       1.1. Create phi nodes at NEW_MERGE_BB.
 257       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 258            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 259    2. preserves loop-closed-ssa-form by creating the required phi nodes
 260       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 261
 262    There are two flavors to this function:
 263
 264    slpeel_update_phi_nodes_for_guard1:
 265      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 266      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 267      for variables that have phis in the loop header.
 268
 269    slpeel_update_phi_nodes_for_guard2:
 270      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 271      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 272      for variables that have phis in the loop exit.
 273
 274    I.E., the overall structure is:
 275
 276         loop1_preheader_bb:
 277                 guard1 (goto loop1/merge1_bb)
 278         loop1
 279         loop1_exit_bb:
 280                 guard2 (goto merge1_bb/merge2_bb)
 281         merge1_bb
 282         loop2
 283         loop2_exit_bb
 284         merge2_bb
 285         next_bb
 286
 287    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 288    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 289    that have phis in loop1->header).
 290
 291    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 292    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 293    that have phis in next_bb). It also adds some of these phis to
 294    loop1_exit_bb.
 295
 296    slpeel_update_phi_nodes_for_guard1 is always called before
 297    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 298    to create correct data-flow and loop-closed-ssa-form.
 299
 300    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 301    that change between iterations of a loop (and therefore have a phi-node
 302    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 303    phis for variables that are used out of the loop (and therefore have
 304    loop-closed exit phis). Some variables may be both updated between
 305    iterations and used after the loop. This is why in loop1_exit_bb we
 306    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 307    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 308
 309    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 310      an original loop. i.e., we have:
 311
 312            orig_loop
 313            guard_bb (goto LOOP/new_merge)
 314            new_loop <-- LOOP
 315            new_exit
 316            new_merge
 317            next_bb
 318
 319      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 320      have:
 321
 322            new_loop
 323            guard_bb (goto LOOP/new_merge)
 324            orig_loop <-- LOOP
 325            new_exit
 326            new_merge
 327            next_bb
 328
 329      The SSA names defined in the original loop have a current
 330      reaching definition that that records the corresponding new
 331      ssa-name used in the new duplicated loop copy.
 332   */
 333
 334 /* Function slpeel_update_phi_nodes_for_guard1
 335
 336    Input:
 337    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 338    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 339             information.
 340
 341    In the context of the overall structure, we have:
 342
 343         loop1_preheader_bb:
 344                 guard1 (goto loop1/merge1_bb)
 345 LOOP->  loop1
 346         loop1_exit_bb:
 347                 guard2 (goto merge1_bb/merge2_bb)
 348         merge1_bb
 349         loop2
 350         loop2_exit_bb
 351         merge2_bb
 352         next_bb
 353
 354    For each name updated between loop iterations (i.e - for each name that has
 355    an entry (loop-header) phi in LOOP) we create a new phi in:
 356    1. merge1_bb (to account for the edge from guard1)
 357    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 358 */
 359
 360 static void
 361 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 362                                     bool is_new_loop, basic_block *new_exit_bb,
 363                                     bitmap *defs)
 364 {
 365   gimple orig_phi, new_phi;
 366   gimple update_phi, update_phi2;
 367   tree guard_arg, loop_arg;
 368   basic_block new_merge_bb = guard_edge->dest;
 369   edge e = EDGE_SUCC (new_merge_bb, 0);
 370   basic_block update_bb = e->dest;
 371   basic_block orig_bb = loop->header;
 372   edge new_exit_e;
 373   tree current_new_name;
 374   tree name;
 375   gimple_stmt_iterator gsi_orig, gsi_update;
 376
 377   /* Create new bb between loop and new_merge_bb.  */
 378   *new_exit_bb = split_edge (single_exit (loop));
 379
 380   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 381
 382   for (gsi_orig = gsi_start_phis (orig_bb),
 383        gsi_update = gsi_start_phis (update_bb);
 384        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
 385        gsi_next (&gsi_orig), gsi_next (&gsi_update))
 386     {
 387       orig_phi = gsi_stmt (gsi_orig);
 388       update_phi = gsi_stmt (gsi_update);
 389
 390       /* Virtual phi; Mark it for renaming. We actually want to call
 391          mar_sym_for_renaming, but since all ssa renaming datastructures
 392          are going to be freed before we get to call ssa_update, we just
 393          record this name for now in a bitmap, and will mark it for
 394          renaming later.  */
 395       name = PHI_RESULT (orig_phi);
 396       if (!is_gimple_reg (SSA_NAME_VAR (name)))
 397         bitmap_set_bit (vect_memsyms_to_rename, DECL_UID (SSA_NAME_VAR (name)));
 398
 399       /** 1. Handle new-merge-point phis  **/
 400
 401       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 402       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 403                                  new_merge_bb);
 404
 405       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 406             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 407       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 408       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 409
 410       add_phi_arg (new_phi, loop_arg, new_exit_e);
 411       add_phi_arg (new_phi, guard_arg, guard_edge);
 412
 413       /* 1.3. Update phi in successor block.  */
 414       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 415                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 416       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 417       update_phi2 = new_phi;
 418
 419
 420       /** 2. Handle loop-closed-ssa-form phis  **/
 421
 422       if (!is_gimple_reg (PHI_RESULT (orig_phi)))
 423         continue;
 424
 425       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 426       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 427                                  *new_exit_bb);
 428
 429       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 430       add_phi_arg (new_phi, loop_arg, single_exit (loop));
 431
 432       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 433       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 434       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 435
 436       /* 2.4. Record the newly created name with set_current_def.
 437          We want to find a name such that
 438                 name = get_current_def (orig_loop_name)
 439          and to set its current definition as follows:
 440                 set_current_def (name, new_phi_name)
 441
 442          If LOOP is a new loop then loop_arg is already the name we're
 443          looking for. If LOOP is the original loop, then loop_arg is
 444          the orig_loop_name and the relevant name is recorded in its
 445          current reaching definition.  */
 446       if (is_new_loop)
 447         current_new_name = loop_arg;
 448       else
 449         {
 450           current_new_name = get_current_def (loop_arg);
 451           /* current_def is not available only if the variable does not
 452              change inside the loop, in which case we also don't care
 453              about recording a current_def for it because we won't be
 454              trying to create loop-exit-phis for it.  */
 455           if (!current_new_name)
 456             continue;
 457         }
 458       gcc_assert (get_current_def (current_new_name) == NULL_TREE);
 459
 460       set_current_def (current_new_name, PHI_RESULT (new_phi));
 461       bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
 462     }
 463 }
 464
 465
 466 /* Function slpeel_update_phi_nodes_for_guard2
 467
 468    Input:
 469    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 470
 471    In the context of the overall structure, we have:
 472
 473         loop1_preheader_bb:
 474                 guard1 (goto loop1/merge1_bb)
 475         loop1
 476         loop1_exit_bb:
 477                 guard2 (goto merge1_bb/merge2_bb)
 478         merge1_bb
 479 LOOP->  loop2
 480         loop2_exit_bb
 481         merge2_bb
 482         next_bb
 483
 484    For each name used out side the loop (i.e - for each name that has an exit
 485    phi in next_bb) we create a new phi in:
 486    1. merge2_bb (to account for the edge from guard_bb)
 487    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 488    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 489       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 490 */
 491
 492 static void
 493 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 494                                     bool is_new_loop, basic_block *new_exit_bb)
 495 {
 496   gimple orig_phi, new_phi;
 497   gimple update_phi, update_phi2;
 498   tree guard_arg, loop_arg;
 499   basic_block new_merge_bb = guard_edge->dest;
 500   edge e = EDGE_SUCC (new_merge_bb, 0);
 501   basic_block update_bb = e->dest;
 502   edge new_exit_e;
 503   tree orig_def, orig_def_new_name;
 504   tree new_name, new_name2;
 505   tree arg;
 506   gimple_stmt_iterator gsi;
 507
 508   /* Create new bb between loop and new_merge_bb.  */
 509   *new_exit_bb = split_edge (single_exit (loop));
 510
 511   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 512
 513   for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
 514     {
 515       update_phi = gsi_stmt (gsi);
 516       orig_phi = update_phi;
 517       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 518       /* This loop-closed-phi actually doesn't represent a use
 519          out of the loop - the phi arg is a constant.  */
 520       if (TREE_CODE (orig_def) != SSA_NAME)
 521         continue;
 522       orig_def_new_name = get_current_def (orig_def);
 523       arg = NULL_TREE;
 524
 525       /** 1. Handle new-merge-point phis  **/
 526
 527       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 528       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 529                                  new_merge_bb);
 530
 531       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 532             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 533       new_name = orig_def;
 534       new_name2 = NULL_TREE;
 535       if (orig_def_new_name)
 536         {
 537           new_name = orig_def_new_name;
 538           /* Some variables have both loop-entry-phis and loop-exit-phis.
 539              Such variables were given yet newer names by phis placed in
 540              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 541              new_name2 = get_current_def (get_current_def (orig_name)).  */
 542           new_name2 = get_current_def (new_name);
 543         }
 544
 545       if (is_new_loop)
 546         {
 547           guard_arg = orig_def;
 548           loop_arg = new_name;
 549         }
 550       else
 551         {
 552           guard_arg = new_name;
 553           loop_arg = orig_def;
 554         }
 555       if (new_name2)
 556         guard_arg = new_name2;
 557
 558       add_phi_arg (new_phi, loop_arg, new_exit_e);
 559       add_phi_arg (new_phi, guard_arg, guard_edge);
 560
 561       /* 1.3. Update phi in successor block.  */
 562       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 563       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 564       update_phi2 = new_phi;
 565
 566
 567       /** 2. Handle loop-closed-ssa-form phis  **/
 568
 569       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 570       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 571                                  *new_exit_bb);
 572
 573       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 574       add_phi_arg (new_phi, loop_arg, single_exit (loop));
 575
 576       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 577       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 578       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 579
 580
 581       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 582
 583       /* 3.1. Find the relevant names that need an exit-phi in
 584          GUARD_BB, i.e. names for which
 585          slpeel_update_phi_nodes_for_guard1 had not already created a
 586          phi node. This is the case for names that are used outside
 587          the loop (and therefore need an exit phi) but are not updated
 588          across loop iterations (and therefore don't have a
 589          loop-header-phi).
 590
 591          slpeel_update_phi_nodes_for_guard1 is responsible for
 592          creating loop-exit phis in GUARD_BB for names that have a
 593          loop-header-phi.  When such a phi is created we also record
 594          the new name in its current definition.  If this new name
 595          exists, then guard_arg was set to this new name (see 1.2
 596          above).  Therefore, if guard_arg is not this new name, this
 597          is an indication that an exit-phi in GUARD_BB was not yet
 598          created, so we take care of it here.  */
 599       if (guard_arg == new_name2)
 600         continue;
 601       arg = guard_arg;
 602
 603       /* 3.2. Generate new phi node in GUARD_BB:  */
 604       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 605                                  guard_edge->src);
 606
 607       /* 3.3. GUARD_BB has one incoming edge:  */
 608       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 609       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
 610
 611       /* 3.4. Update phi in successor of GUARD_BB:  */
 612       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 613                                                                 == guard_arg);
 614       SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
 615     }
 616 }
 617
 618
 619 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 620    that starts at zero, increases by one and its limit is NITERS.
 621
 622    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 623
 624 void
 625 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 626 {
 627   tree indx_before_incr, indx_after_incr;
 628   gimple cond_stmt;
 629   gimple orig_cond;
 630   edge exit_edge = single_exit (loop);
 631   gimple_stmt_iterator loop_cond_gsi;
 632   gimple_stmt_iterator incr_gsi;
 633   bool insert_after;
 634   tree init = build_int_cst (TREE_TYPE (niters), 0);
 635   tree step = build_int_cst (TREE_TYPE (niters), 1);
 636   LOC loop_loc;
 637   enum tree_code code;
 638
 639   orig_cond = get_loop_exit_condition (loop);
 640   gcc_assert (orig_cond);
 641   loop_cond_gsi = gsi_for_stmt (orig_cond);
 642
 643   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 644   create_iv (init, step, NULL_TREE, loop,
 645              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 646
 647   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 648                                               true, NULL_TREE, true,
 649                                               GSI_SAME_STMT);
 650   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 651                                      true, GSI_SAME_STMT);
 652
 653   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 654   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 655                                  NULL_TREE);
 656
 657   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 658
 659   /* Remove old loop exit test:  */
 660   gsi_remove (&loop_cond_gsi, true);
 661
 662   loop_loc = find_loop_location (loop);
 663   if (dump_file && (dump_flags & TDF_DETAILS))
 664     {
 665       if (loop_loc != UNKNOWN_LOC)
 666         fprintf (dump_file, "\nloop at %s:%d: ",
 667                  LOC_FILE (loop_loc), LOC_LINE (loop_loc));
 668       print_gimple_stmt (dump_file, cond_stmt, 0, TDF_SLIM);
 669     }
 670
 671   loop->nb_iterations = niters;
 672 }
 673
 674
 675 /* Given LOOP this function generates a new copy of it and puts it
 676    on E which is either the entry or exit of LOOP.  */
 677
 678 struct loop *
 679 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
 680 {
 681   struct loop *new_loop;
 682   basic_block *new_bbs, *bbs;
 683   bool at_exit;
 684   bool was_imm_dom;
 685   basic_block exit_dest;
 686   gimple phi;
 687   tree phi_arg;
 688   edge exit, new_exit;
 689   gimple_stmt_iterator gsi;
 690
 691   at_exit = (e == single_exit (loop));
 692   if (!at_exit && e != loop_preheader_edge (loop))
 693     return NULL;
 694
 695   bbs = get_loop_body (loop);
 696
 697   /* Check whether duplication is possible.  */
 698   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 699     {
 700       free (bbs);
 701       return NULL;
 702     }
 703
 704   /* Generate new loop structure.  */
 705   new_loop = duplicate_loop (loop, loop_outer (loop));
 706   if (!new_loop)
 707     {
 708       free (bbs);
 709       return NULL;
 710     }
 711
 712   exit_dest = single_exit (loop)->dest;
 713   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 714                                           exit_dest) == loop->header ?
 715                  true : false);
 716
 717   new_bbs = XNEWVEC (basic_block, loop->num_nodes);
 718
 719   exit = single_exit (loop);
 720   copy_bbs (bbs, loop->num_nodes, new_bbs,
 721             &exit, 1, &new_exit, NULL,
 722             e->src);
 723
 724   /* Duplicating phi args at exit bbs as coming
 725      also from exit of duplicated loop.  */
 726   for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi); gsi_next (&gsi))
 727     {
 728       phi = gsi_stmt (gsi);
 729       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop));
 730       if (phi_arg)
 731         {
 732           edge new_loop_exit_edge;
 733
 734           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 735             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 736           else
 737             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 738
 739           add_phi_arg (phi, phi_arg, new_loop_exit_edge);
 740         }
 741     }
 742
 743   if (at_exit) /* Add the loop copy at exit.  */
 744     {
 745       redirect_edge_and_branch_force (e, new_loop->header);
 746       PENDING_STMT (e) = NULL;
 747       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 748       if (was_imm_dom)
 749         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 750     }
 751   else /* Add the copy at entry.  */
 752     {
 753       edge new_exit_e;
 754       edge entry_e = loop_preheader_edge (loop);
 755       basic_block preheader = entry_e->src;
 756
 757       if (!flow_bb_inside_loop_p (new_loop,
 758                                   EDGE_SUCC (new_loop->header, 0)->dest))
 759         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 760       else
 761         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 762
 763       redirect_edge_and_branch_force (new_exit_e, loop->header);
 764       PENDING_STMT (new_exit_e) = NULL;
 765       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 766                                new_exit_e->src);
 767
 768       /* We have to add phi args to the loop->header here as coming
 769          from new_exit_e edge.  */
 770       for (gsi = gsi_start_phis (loop->header);
 771            !gsi_end_p (gsi);
 772            gsi_next (&gsi))
 773         {
 774           phi = gsi_stmt (gsi);
 775           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 776           if (phi_arg)
 777             add_phi_arg (phi, phi_arg, new_exit_e);
 778         }
 779
 780       redirect_edge_and_branch_force (entry_e, new_loop->header);
 781       PENDING_STMT (entry_e) = NULL;
 782       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 783     }
 784
 785   free (new_bbs);
 786   free (bbs);
 787
 788   return new_loop;
 789 }
 790
 791
 792 /* Given the condition statement COND, put it as the last statement
 793    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 794    Assumes that this is the single exit of the guarded loop.
 795    Returns the skip edge.  */
 796
 797 static edge
 798 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
 799                        basic_block dom_bb)
 800 {
 801   gimple_stmt_iterator gsi;
 802   edge new_e, enter_e;
 803   gimple cond_stmt;
 804   gimple_seq gimplify_stmt_list = NULL;
 805
 806   enter_e = EDGE_SUCC (guard_bb, 0);
 807   enter_e->flags &= ~EDGE_FALLTHRU;
 808   enter_e->flags |= EDGE_FALSE_VALUE;
 809   gsi = gsi_last_bb (guard_bb);
 810
 811   cond = force_gimple_operand (cond, &gimplify_stmt_list, true, NULL_TREE);
 812   cond_stmt = gimple_build_cond (NE_EXPR,
 813                                  cond, build_int_cst (TREE_TYPE (cond), 0),
 814                                  NULL_TREE, NULL_TREE);
 815   if (gimplify_stmt_list)
 816     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
 817
 818   gsi = gsi_last_bb (guard_bb);
 819   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 820
 821   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 822   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 823   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 824   return new_e;
 825 }
 826
 827
 828 /* This function verifies that the following restrictions apply to LOOP:
 829    (1) it is innermost
 830    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 831    (3) it is single entry, single exit
 832    (4) its exit condition is the last stmt in the header
 833    (5) E is the entry/exit edge of LOOP.
 834  */
 835
 836 bool
 837 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 838 {
 839   edge exit_e = single_exit (loop);
 840   edge entry_e = loop_preheader_edge (loop);
 841   gimple orig_cond = get_loop_exit_condition (loop);
 842   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 843
 844   if (need_ssa_update_p ())
 845     return false;
 846
 847   if (loop->inner
 848       /* All loops have an outer scope; the only case loop->outer is NULL is for
 849          the function itself.  */
 850       || !loop_outer (loop)
 851       || loop->num_nodes != 2
 852       || !empty_block_p (loop->latch)
 853       || !single_exit (loop)
 854       /* Verify that new loop exit condition can be trivially modified.  */
 855       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 856       || (e != exit_e && e != entry_e))
 857     return false;
 858
 859   return true;
 860 }
 861
 862 #ifdef ENABLE_CHECKING
 863 static void
 864 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 865                                  struct loop *second_loop)
 866 {
 867   basic_block loop1_exit_bb = single_exit (first_loop)->dest;
 868   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
 869   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
 870
 871   /* A guard that controls whether the second_loop is to be executed or skipped
 872      is placed in first_loop->exit.  first_loop->exit therefore has two
 873      successors - one is the preheader of second_loop, and the other is a bb
 874      after second_loop.
 875    */
 876   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
 877
 878   /* 1. Verify that one of the successors of first_loop->exit is the preheader
 879         of second_loop.  */
 880
 881   /* The preheader of new_loop is expected to have two predecessors:
 882      first_loop->exit and the block that precedes first_loop.  */
 883
 884   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
 885               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
 886                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
 887                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
 888                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
 889
 890   /* Verify that the other successor of first_loop->exit is after the
 891      second_loop.  */
 892   /* TODO */
 893 }
 894 #endif
 895
 896 /* If the run time cost model check determines that vectorization is
 897    not profitable and hence scalar loop should be generated then set
 898    FIRST_NITERS to prologue peeled iterations. This will allow all the
 899    iterations to be executed in the prologue peeled scalar loop.  */
 900
 901 static void
 902 set_prologue_iterations (basic_block bb_before_first_loop,
 903                          tree first_niters,
 904                          struct loop *loop,
 905                          unsigned int th)
 906 {
 907   edge e;
 908   basic_block cond_bb, then_bb;
 909   tree var, prologue_after_cost_adjust_name;
 910   gimple_stmt_iterator gsi;
 911   gimple newphi;
 912   edge e_true, e_false, e_fallthru;
 913   gimple cond_stmt;
 914   gimple_seq gimplify_stmt_list = NULL, stmts = NULL;
 915   tree cost_pre_condition = NULL_TREE;
 916   tree scalar_loop_iters =
 917     unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
 918
 919   e = single_pred_edge (bb_before_first_loop);
 920   cond_bb = split_edge(e);
 921
 922   e = single_pred_edge (bb_before_first_loop);
 923   then_bb = split_edge(e);
 924   set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
 925
 926   e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
 927                                    EDGE_FALSE_VALUE);
 928   set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
 929
 930   e_true = EDGE_PRED (then_bb, 0);
 931   e_true->flags &= ~EDGE_FALLTHRU;
 932   e_true->flags |= EDGE_TRUE_VALUE;
 933
 934   e_fallthru = EDGE_SUCC (then_bb, 0);
 935
 936   cost_pre_condition =
 937     fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
 938                  build_int_cst (TREE_TYPE (scalar_loop_iters), th));
 939   cost_pre_condition =
 940     force_gimple_operand (cost_pre_condition, &gimplify_stmt_list,
 941                           true, NULL_TREE);
 942   cond_stmt = gimple_build_cond (NE_EXPR, cost_pre_condition,
 943                                  build_int_cst (TREE_TYPE (cost_pre_condition),
 944                                                 0), NULL_TREE, NULL_TREE);
 945
 946   gsi = gsi_last_bb (cond_bb);
 947   if (gimplify_stmt_list)
 948     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
 949
 950   gsi = gsi_last_bb (cond_bb);
 951   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 952
 953   var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
 954                         "prologue_after_cost_adjust");
 955   add_referenced_var (var);
 956   prologue_after_cost_adjust_name =
 957     force_gimple_operand (scalar_loop_iters, &stmts, false, var);
 958
 959   gsi = gsi_last_bb (then_bb);
 960   if (stmts)
 961     gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
 962
 963   newphi = create_phi_node (var, bb_before_first_loop);
 964   add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru);
 965   add_phi_arg (newphi, first_niters, e_false);
 966
 967   first_niters = PHI_RESULT (newphi);
 968 }
 969
 970
 971 /* Function slpeel_tree_peel_loop_to_edge.
 972
 973    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
 974    that is placed on the entry (exit) edge E of LOOP. After this transformation
 975    we have two loops one after the other - first-loop iterates FIRST_NITERS
 976    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
 977    If the cost model indicates that it is profitable to emit a scalar
 978    loop instead of the vector one, then the prolog (epilog) loop will iterate
 979    for the entire unchanged scalar iterations of the loop.
 980
 981    Input:
 982    - LOOP: the loop to be peeled.
 983    - E: the exit or entry edge of LOOP.
 984         If it is the entry edge, we peel the first iterations of LOOP. In this
 985         case first-loop is LOOP, and second-loop is the newly created loop.
 986         If it is the exit edge, we peel the last iterations of LOOP. In this
 987         case, first-loop is the newly created loop, and second-loop is LOOP.
 988    - NITERS: the number of iterations that LOOP iterates.
 989    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
 990    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
 991         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
 992         is false, the caller of this function may want to take care of this
 993         (this can be useful if we don't want new stmts added to first-loop).
 994    - TH: cost model profitability threshold of iterations for vectorization.
 995    - CHECK_PROFITABILITY: specify whether cost model check has not occurred
 996                           during versioning and hence needs to occur during
 997                           prologue generation or whether cost model check
 998                           has not occurred during prologue generation and hence
 999                           needs to occur during epilogue generation.
1000
1001
1002    Output:
1003    The function returns a pointer to the new loop-copy, or NULL if it failed
1004    to perform the transformation.
1005
1006    The function generates two if-then-else guards: one before the first loop,
1007    and the other before the second loop:
1008    The first guard is:
1009      if (FIRST_NITERS == 0) then skip the first loop,
1010      and go directly to the second loop.
1011    The second guard is:
1012      if (FIRST_NITERS == NITERS) then skip the second loop.
1013
1014    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1015    FORNOW the resulting code will not be in loop-closed-ssa form.
1016 */
1017
1018 static struct loop*
1019 slpeel_tree_peel_loop_to_edge (struct loop *loop,
1020                                edge e, tree first_niters,
1021                                tree niters, bool update_first_loop_count,
1022                                unsigned int th, bool check_profitability)
1023 {
1024   struct loop *new_loop = NULL, *first_loop, *second_loop;
1025   edge skip_e;
1026   tree pre_condition = NULL_TREE;
1027   bitmap definitions;
1028   basic_block bb_before_second_loop, bb_after_second_loop;
1029   basic_block bb_before_first_loop;
1030   basic_block bb_between_loops;
1031   basic_block new_exit_bb;
1032   edge exit_e = single_exit (loop);
1033   LOC loop_loc;
1034   tree cost_pre_condition = NULL_TREE;
1035
1036   if (!slpeel_can_duplicate_loop_p (loop, e))
1037     return NULL;
1038
1039   /* We have to initialize cfg_hooks. Then, when calling
1040    cfg_hooks->split_edge, the function tree_split_edge
1041    is actually called and, when calling cfg_hooks->duplicate_block,
1042    the function tree_duplicate_bb is called.  */
1043   gimple_register_cfg_hooks ();
1044
1045
1046   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1047         Resulting CFG would be:
1048
1049         first_loop:
1050         do {
1051         } while ...
1052
1053         second_loop:
1054         do {
1055         } while ...
1056
1057         orig_exit_bb:
1058    */
1059
1060   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1061     {
1062       loop_loc = find_loop_location (loop);
1063       if (dump_file && (dump_flags & TDF_DETAILS))
1064         {
1065           if (loop_loc != UNKNOWN_LOC)
1066             fprintf (dump_file, "\n%s:%d: note: ",
1067                      LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1068           fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1069         }
1070       return NULL;
1071     }
1072
1073   if (e == exit_e)
1074     {
1075       /* NEW_LOOP was placed after LOOP.  */
1076       first_loop = loop;
1077       second_loop = new_loop;
1078     }
1079   else
1080     {
1081       /* NEW_LOOP was placed before LOOP.  */
1082       first_loop = new_loop;
1083       second_loop = loop;
1084     }
1085
1086   definitions = ssa_names_to_replace ();
1087   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1088   rename_variables_in_loop (new_loop);
1089
1090
1091   /* 2.  Add the guard code in one of the following ways:
1092
1093      2.a Add the guard that controls whether the first loop is executed.
1094          This occurs when this function is invoked for prologue or epilogue
1095          generation and when the cost model check can be done at compile time.
1096
1097          Resulting CFG would be:
1098
1099          bb_before_first_loop:
1100          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1101                                 GOTO first-loop
1102
1103          first_loop:
1104          do {
1105          } while ...
1106
1107          bb_before_second_loop:
1108
1109          second_loop:
1110          do {
1111          } while ...
1112
1113          orig_exit_bb:
1114
1115      2.b Add the cost model check that allows the prologue
1116          to iterate for the entire unchanged scalar
1117          iterations of the loop in the event that the cost
1118          model indicates that the scalar loop is more
1119          profitable than the vector one. This occurs when
1120          this function is invoked for prologue generation
1121          and the cost model check needs to be done at run
1122          time.
1123
1124          Resulting CFG after prologue peeling would be:
1125
1126          if (scalar_loop_iterations <= th)
1127            FIRST_NITERS = scalar_loop_iterations
1128
1129          bb_before_first_loop:
1130          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1131                                 GOTO first-loop
1132
1133          first_loop:
1134          do {
1135          } while ...
1136
1137          bb_before_second_loop:
1138
1139          second_loop:
1140          do {
1141          } while ...
1142
1143          orig_exit_bb:
1144
1145      2.c Add the cost model check that allows the epilogue
1146          to iterate for the entire unchanged scalar
1147          iterations of the loop in the event that the cost
1148          model indicates that the scalar loop is more
1149          profitable than the vector one. This occurs when
1150          this function is invoked for epilogue generation
1151          and the cost model check needs to be done at run
1152          time.
1153
1154          Resulting CFG after prologue peeling would be:
1155
1156          bb_before_first_loop:
1157          if ((scalar_loop_iterations <= th)
1158              ||
1159              FIRST_NITERS == 0) GOTO bb_before_second_loop
1160                                 GOTO first-loop
1161
1162          first_loop:
1163          do {
1164          } while ...
1165
1166          bb_before_second_loop:
1167
1168          second_loop:
1169          do {
1170          } while ...
1171
1172          orig_exit_bb:
1173   */
1174
1175   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1176   bb_before_second_loop = split_edge (single_exit (first_loop));
1177
1178   /* Epilogue peeling.  */
1179   if (!update_first_loop_count)
1180     {
1181       pre_condition =
1182         fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1183                      build_int_cst (TREE_TYPE (first_niters), 0));
1184       if (check_profitability)
1185         {
1186           tree scalar_loop_iters
1187             = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1188                                         (loop_vec_info_for_loop (loop)));
1189           cost_pre_condition =
1190             fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1191                          build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1192
1193           pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1194                                        cost_pre_condition, pre_condition);
1195         }
1196     }
1197
1198   /* Prologue peeling.  */
1199   else
1200     {
1201       if (check_profitability)
1202         set_prologue_iterations (bb_before_first_loop, first_niters,
1203                                  loop, th);
1204
1205       pre_condition =
1206         fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1207                      build_int_cst (TREE_TYPE (first_niters), 0));
1208     }
1209
1210   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1211                                   bb_before_second_loop, bb_before_first_loop);
1212   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1213                                       first_loop == new_loop,
1214                                       &new_exit_bb, &definitions);
1215
1216
1217   /* 3. Add the guard that controls whether the second loop is executed.
1218         Resulting CFG would be:
1219
1220         bb_before_first_loop:
1221         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1222                                GOTO first-loop
1223
1224         first_loop:
1225         do {
1226         } while ...
1227
1228         bb_between_loops:
1229         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1230                                     GOTO bb_before_second_loop
1231
1232         bb_before_second_loop:
1233
1234         second_loop:
1235         do {
1236         } while ...
1237
1238         bb_after_second_loop:
1239
1240         orig_exit_bb:
1241    */
1242
1243   bb_between_loops = new_exit_bb;
1244   bb_after_second_loop = split_edge (single_exit (second_loop));
1245
1246   pre_condition =
1247         fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1248   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1249                                   bb_after_second_loop, bb_before_first_loop);
1250   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1251                                      second_loop == new_loop, &new_exit_bb);
1252
1253   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1254    */
1255   if (update_first_loop_count)
1256     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1257
1258   BITMAP_FREE (definitions);
1259   delete_update_ssa ();
1260
1261   return new_loop;
1262 }
1263
1264 /* Function vect_get_loop_location.
1265
1266    Extract the location of the loop in the source code.
1267    If the loop is not well formed for vectorization, an estimated
1268    location is calculated.
1269    Return the loop location if succeed and NULL if not.  */
1270
1271 LOC
1272 find_loop_location (struct loop *loop)
1273 {
1274   gimple stmt = NULL;
1275   basic_block bb;
1276   gimple_stmt_iterator si;
1277
1278   if (!loop)
1279     return UNKNOWN_LOC;
1280
1281   stmt = get_loop_exit_condition (loop);
1282
1283   if (stmt && gimple_location (stmt) != UNKNOWN_LOC)
1284     return gimple_location (stmt);
1285
1286   /* If we got here the loop is probably not "well formed",
1287      try to estimate the loop location */
1288
1289   if (!loop->header)
1290     return UNKNOWN_LOC;
1291
1292   bb = loop->header;
1293
1294   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1295     {
1296       stmt = gsi_stmt (si);
1297       if (gimple_location (stmt) != UNKNOWN_LOC)
1298         return gimple_location (stmt);
1299     }
1300
1301   return UNKNOWN_LOC;
1302 }
1303
1304
1305 /* This function builds ni_name = number of iterations loop executes
1306    on the loop preheader.  */
1307
1308 static tree
1309 vect_build_loop_niters (loop_vec_info loop_vinfo)
1310 {
1311   tree ni_name, var;
1312   gimple_seq stmts = NULL;
1313   edge pe;
1314   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1315   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1316
1317   var = create_tmp_var (TREE_TYPE (ni), "niters");
1318   add_referenced_var (var);
1319   ni_name = force_gimple_operand (ni, &stmts, false, var);
1320
1321   pe = loop_preheader_edge (loop);
1322   if (stmts)
1323     {
1324       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1325       gcc_assert (!new_bb);
1326     }
1327
1328   return ni_name;
1329 }
1330
1331
1332 /* This function generates the following statements:
1333
1334  ni_name = number of iterations loop executes
1335  ratio = ni_name / vf
1336  ratio_mult_vf_name = ratio * vf
1337
1338  and places them at the loop preheader edge.  */
1339
1340 static void
1341 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1342                                  tree *ni_name_ptr,
1343                                  tree *ratio_mult_vf_name_ptr,
1344                                  tree *ratio_name_ptr)
1345 {
1346
1347   edge pe;
1348   basic_block new_bb;
1349   gimple_seq stmts;
1350   tree ni_name;
1351   tree var;
1352   tree ratio_name;
1353   tree ratio_mult_vf_name;
1354   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1355   tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1356   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1357   tree log_vf;
1358
1359   pe = loop_preheader_edge (loop);
1360
1361   /* Generate temporary variable that contains
1362      number of iterations loop executes.  */
1363
1364   ni_name = vect_build_loop_niters (loop_vinfo);
1365   log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
1366
1367   /* Create: ratio = ni >> log2(vf) */
1368
1369   ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
1370   if (!is_gimple_val (ratio_name))
1371     {
1372       var = create_tmp_var (TREE_TYPE (ni), "bnd");
1373       add_referenced_var (var);
1374
1375       stmts = NULL;
1376       ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
1377       pe = loop_preheader_edge (loop);
1378       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1379       gcc_assert (!new_bb);
1380     }
1381
1382   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
1383
1384   ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
1385                                     ratio_name, log_vf);
1386   if (!is_gimple_val (ratio_mult_vf_name))
1387     {
1388       var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1389       add_referenced_var (var);
1390
1391       stmts = NULL;
1392       ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
1393                                                  true, var);
1394       pe = loop_preheader_edge (loop);
1395       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1396       gcc_assert (!new_bb);
1397     }
1398
1399   *ni_name_ptr = ni_name;
1400   *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1401   *ratio_name_ptr = ratio_name;
1402
1403   return;
1404 }
1405
1406 /* Function vect_can_advance_ivs_p
1407
1408    In case the number of iterations that LOOP iterates is unknown at compile
1409    time, an epilog loop will be generated, and the loop induction variables
1410    (IVs) will be "advanced" to the value they are supposed to take just before
1411    the epilog loop.  Here we check that the access function of the loop IVs
1412    and the expression that represents the loop bound are simple enough.
1413    These restrictions will be relaxed in the future.  */
1414
1415 bool
1416 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1417 {
1418   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1419   basic_block bb = loop->header;
1420   gimple phi;
1421   gimple_stmt_iterator gsi;
1422
1423   /* Analyze phi functions of the loop header.  */
1424
1425   if (vect_print_dump_info (REPORT_DETAILS))
1426     fprintf (vect_dump, "vect_can_advance_ivs_p:");
1427
1428   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1429     {
1430       tree access_fn = NULL;
1431       tree evolution_part;
1432
1433       phi = gsi_stmt (gsi);
1434       if (vect_print_dump_info (REPORT_DETAILS))
1435         {
1436           fprintf (vect_dump, "Analyze phi: ");
1437           print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1438         }
1439
1440       /* Skip virtual phi's. The data dependences that are associated with
1441          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1442
1443       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1444         {
1445           if (vect_print_dump_info (REPORT_DETAILS))
1446             fprintf (vect_dump, "virtual phi. skip.");
1447           continue;
1448         }
1449
1450       /* Skip reduction phis.  */
1451
1452       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1453         {
1454           if (vect_print_dump_info (REPORT_DETAILS))
1455             fprintf (vect_dump, "reduc phi. skip.");
1456           continue;
1457         }
1458
1459       /* Analyze the evolution function.  */
1460
1461       access_fn = instantiate_parameters
1462         (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1463
1464       if (!access_fn)
1465         {
1466           if (vect_print_dump_info (REPORT_DETAILS))
1467             fprintf (vect_dump, "No Access function.");
1468           return false;
1469         }
1470
1471       if (vect_print_dump_info (REPORT_DETAILS))
1472         {
1473           fprintf (vect_dump, "Access function of PHI: ");
1474           print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1475         }
1476
1477       evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1478
1479       if (evolution_part == NULL_TREE)
1480         {
1481           if (vect_print_dump_info (REPORT_DETAILS))
1482             fprintf (vect_dump, "No evolution.");
1483           return false;
1484         }
1485
1486       /* FORNOW: We do not transform initial conditions of IVs
1487          which evolution functions are a polynomial of degree >= 2.  */
1488
1489       if (tree_is_chrec (evolution_part))
1490         return false;
1491     }
1492
1493   return true;
1494 }
1495
1496
1497 /*   Function vect_update_ivs_after_vectorizer.
1498
1499      "Advance" the induction variables of LOOP to the value they should take
1500      after the execution of LOOP.  This is currently necessary because the
1501      vectorizer does not handle induction variables that are used after the
1502      loop.  Such a situation occurs when the last iterations of LOOP are
1503      peeled, because:
1504      1. We introduced new uses after LOOP for IVs that were not originally used
1505         after LOOP: the IVs of LOOP are now used by an epilog loop.
1506      2. LOOP is going to be vectorized; this means that it will iterate N/VF
1507         times, whereas the loop IVs should be bumped N times.
1508
1509      Input:
1510      - LOOP - a loop that is going to be vectorized. The last few iterations
1511               of LOOP were peeled.
1512      - NITERS - the number of iterations that LOOP executes (before it is
1513                 vectorized). i.e, the number of times the ivs should be bumped.
1514      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1515                   coming out from LOOP on which there are uses of the LOOP ivs
1516                   (this is the path from LOOP->exit to epilog_loop->preheader).
1517
1518                   The new definitions of the ivs are placed in LOOP->exit.
1519                   The phi args associated with the edge UPDATE_E in the bb
1520                   UPDATE_E->dest are updated accordingly.
1521
1522      Assumption 1: Like the rest of the vectorizer, this function assumes
1523      a single loop exit that has a single predecessor.
1524
1525      Assumption 2: The phi nodes in the LOOP header and in update_bb are
1526      organized in the same order.
1527
1528      Assumption 3: The access function of the ivs is simple enough (see
1529      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1530
1531      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1532      coming out of LOOP on which the ivs of LOOP are used (this is the path
1533      that leads to the epilog loop; other paths skip the epilog loop).  This
1534      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1535      needs to have its phis updated.
1536  */
1537
1538 static void
1539 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1540                                   edge update_e)
1541 {
1542   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1543   basic_block exit_bb = single_exit (loop)->dest;
1544   gimple phi, phi1;
1545   gimple_stmt_iterator gsi, gsi1;
1546   basic_block update_bb = update_e->dest;
1547
1548   /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1549
1550   /* Make sure there exists a single-predecessor exit bb:  */
1551   gcc_assert (single_pred_p (exit_bb));
1552
1553   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1554        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1555        gsi_next (&gsi), gsi_next (&gsi1))
1556     {
1557       tree access_fn = NULL;
1558       tree evolution_part;
1559       tree init_expr;
1560       tree step_expr;
1561       tree var, ni, ni_name;
1562       gimple_stmt_iterator last_gsi;
1563
1564       phi = gsi_stmt (gsi);
1565       phi1 = gsi_stmt (gsi1);
1566       if (vect_print_dump_info (REPORT_DETAILS))
1567         {
1568           fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
1569           print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1570         }
1571
1572       /* Skip virtual phi's.  */
1573       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1574         {
1575           if (vect_print_dump_info (REPORT_DETAILS))
1576             fprintf (vect_dump, "virtual phi. skip.");
1577           continue;
1578         }
1579
1580       /* Skip reduction phis.  */
1581       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1582         {
1583           if (vect_print_dump_info (REPORT_DETAILS))
1584             fprintf (vect_dump, "reduc phi. skip.");
1585           continue;
1586         }
1587
1588       access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
1589       gcc_assert (access_fn);
1590       STRIP_NOPS (access_fn);
1591       evolution_part =
1592          unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
1593       gcc_assert (evolution_part != NULL_TREE);
1594
1595       /* FORNOW: We do not support IVs whose evolution function is a polynomial
1596          of degree >= 2 or exponential.  */
1597       gcc_assert (!tree_is_chrec (evolution_part));
1598
1599       step_expr = evolution_part;
1600       init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1601                                                                loop->num));
1602
1603       if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
1604         ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
1605                           init_expr,
1606                           fold_convert (sizetype,
1607                                         fold_build2 (MULT_EXPR, TREE_TYPE (niters),
1608                                                      niters, step_expr)));
1609       else
1610         ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
1611                           fold_build2 (MULT_EXPR, TREE_TYPE (init_expr),
1612                                        fold_convert (TREE_TYPE (init_expr),
1613                                                      niters),
1614                                        step_expr),
1615                           init_expr);
1616
1617
1618
1619       var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
1620       add_referenced_var (var);
1621
1622       last_gsi = gsi_last_bb (exit_bb);
1623       ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1624                                           true, GSI_SAME_STMT);
1625
1626       /* Fix phi expressions in the successor bb.  */
1627       SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
1628     }
1629 }
1630
1631 /* Return the more conservative threshold between the
1632    min_profitable_iters returned by the cost model and the user
1633    specified threshold, if provided.  */
1634
1635 static unsigned int
1636 conservative_cost_threshold (loop_vec_info loop_vinfo,
1637                              int min_profitable_iters)
1638 {
1639   unsigned int th;
1640   int min_scalar_loop_bound;
1641
1642   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1643                             * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
1644
1645   /* Use the cost model only if it is more conservative than user specified
1646      threshold.  */
1647   th = (unsigned) min_scalar_loop_bound;
1648   if (min_profitable_iters
1649       && (!min_scalar_loop_bound
1650           || min_profitable_iters > min_scalar_loop_bound))
1651     th = (unsigned) min_profitable_iters;
1652
1653   if (th && vect_print_dump_info (REPORT_COST))
1654     fprintf (vect_dump, "Vectorization may not be profitable.");
1655
1656   return th;
1657 }
1658
1659 /* Function vect_do_peeling_for_loop_bound
1660
1661    Peel the last iterations of the loop represented by LOOP_VINFO.
1662    The peeled iterations form a new epilog loop.  Given that the loop now
1663    iterates NITERS times, the new epilog loop iterates
1664    NITERS % VECTORIZATION_FACTOR times.
1665
1666    The original loop will later be made to iterate
1667    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).  */
1668
1669 void
1670 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
1671 {
1672   tree ni_name, ratio_mult_vf_name;
1673   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1674   struct loop *new_loop;
1675   edge update_e;
1676   basic_block preheader;
1677   int loop_num;
1678   bool check_profitability = false;
1679   unsigned int th = 0;
1680   int min_profitable_iters;
1681
1682   if (vect_print_dump_info (REPORT_DETAILS))
1683     fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
1684
1685   initialize_original_copy_tables ();
1686
1687   /* Generate the following variables on the preheader of original loop:
1688
1689      ni_name = number of iteration the original loop executes
1690      ratio = ni_name / vf
1691      ratio_mult_vf_name = ratio * vf  */
1692   vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1693                                    &ratio_mult_vf_name, ratio);
1694
1695   loop_num  = loop->num;
1696
1697   /* If cost model check not done during versioning and
1698      peeling for alignment.  */
1699   if (!VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1700       && !VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo))
1701       && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
1702     {
1703       check_profitability = true;
1704
1705       /* Get profitability threshold for vectorized loop.  */
1706       min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
1707
1708       th = conservative_cost_threshold (loop_vinfo,
1709                                         min_profitable_iters);
1710     }
1711
1712   new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
1713                                             ratio_mult_vf_name, ni_name, false,
1714                                             th, check_profitability);
1715   gcc_assert (new_loop);
1716   gcc_assert (loop_num == loop->num);
1717 #ifdef ENABLE_CHECKING
1718   slpeel_verify_cfg_after_peeling (loop, new_loop);
1719 #endif
1720
1721   /* A guard that controls whether the new_loop is to be executed or skipped
1722      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
1723      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
1724      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
1725      is on the path where the LOOP IVs are used and need to be updated.  */
1726
1727   preheader = loop_preheader_edge (new_loop)->src;
1728   if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1729     update_e = EDGE_PRED (preheader, 0);
1730   else
1731     update_e = EDGE_PRED (preheader, 1);
1732
1733   /* Update IVs of original loop as if they were advanced
1734      by ratio_mult_vf_name steps.  */
1735   vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1736
1737   /* After peeling we have to reset scalar evolution analyzer.  */
1738   scev_reset ();
1739
1740   free_original_copy_tables ();
1741 }
1742
1743
1744 /* Function vect_gen_niters_for_prolog_loop
1745
1746    Set the number of iterations for the loop represented by LOOP_VINFO
1747    to the minimum between LOOP_NITERS (the original iteration count of the loop)
1748    and the misalignment of DR - the data reference recorded in
1749    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
1750    this loop, the data reference DR will refer to an aligned location.
1751
1752    The following computation is generated:
1753
1754    If the misalignment of DR is known at compile time:
1755      addr_mis = int mis = DR_MISALIGNMENT (dr);
1756    Else, compute address misalignment in bytes:
1757      addr_mis = addr & (vectype_size - 1)
1758
1759    prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1760
1761    (elem_size = element type size; an element is the scalar element whose type
1762    is the inner type of the vectype)
1763
1764    When the step of the data-ref in the loop is not 1 (as in interleaved data
1765    and SLP), the number of iterations of the prolog must be divided by the step
1766    (which is equal to the size of interleaved group).
1767
1768    The above formulas assume that VF == number of elements in the vector. This
1769    may not hold when there are multiple-types in the loop.
1770    In this case, for some data-references in the loop the VF does not represent
1771    the number of elements that fit in the vector.  Therefore, instead of VF we
1772    use TYPE_VECTOR_SUBPARTS.  */
1773
1774 static tree
1775 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
1776 {
1777   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1778   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1779   tree var;
1780   gimple_seq stmts;
1781   tree iters, iters_name;
1782   edge pe;
1783   basic_block new_bb;
1784   gimple dr_stmt = DR_STMT (dr);
1785   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1786   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1787   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1788   tree niters_type = TREE_TYPE (loop_niters);
1789   int step = 1;
1790   int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1791   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1792
1793   if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1794     step = DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1795
1796   pe = loop_preheader_edge (loop);
1797
1798   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1799     {
1800       int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
1801       int elem_misalign = byte_misalign / element_size;
1802
1803       if (vect_print_dump_info (REPORT_DETAILS))
1804         fprintf (vect_dump, "known alignment = %d.", byte_misalign);
1805
1806       iters = build_int_cst (niters_type,
1807                      (((nelements - elem_misalign) & (nelements - 1)) / step));
1808     }
1809   else
1810     {
1811       gimple_seq new_stmts = NULL;
1812       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1813                                                 &new_stmts, NULL_TREE, loop);
1814       tree ptr_type = TREE_TYPE (start_addr);
1815       tree size = TYPE_SIZE (ptr_type);
1816       tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
1817       tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
1818       tree elem_size_log =
1819         build_int_cst (type, exact_log2 (vectype_align/nelements));
1820       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1821       tree nelements_tree = build_int_cst (type, nelements);
1822       tree byte_misalign;
1823       tree elem_misalign;
1824
1825       new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1826       gcc_assert (!new_bb);
1827
1828       /* Create:  byte_misalign = addr & (vectype_size - 1)  */
1829       byte_misalign =
1830         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
1831
1832       /* Create:  elem_misalign = byte_misalign / element_size  */
1833       elem_misalign =
1834         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1835
1836       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
1837       iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1838       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1839       iters = fold_convert (niters_type, iters);
1840     }
1841
1842   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
1843   /* If the loop bound is known at compile time we already verified that it is
1844      greater than vf; since the misalignment ('iters') is at most vf, there's
1845      no need to generate the MIN_EXPR in this case.  */
1846   if (TREE_CODE (loop_niters) != INTEGER_CST)
1847     iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1848
1849   if (vect_print_dump_info (REPORT_DETAILS))
1850     {
1851       fprintf (vect_dump, "niters for prolog loop: ");
1852       print_generic_expr (vect_dump, iters, TDF_SLIM);
1853     }
1854
1855   var = create_tmp_var (niters_type, "prolog_loop_niters");
1856   add_referenced_var (var);
1857   stmts = NULL;
1858   iters_name = force_gimple_operand (iters, &stmts, false, var);
1859
1860   /* Insert stmt on loop preheader edge.  */
1861   if (stmts)
1862     {
1863       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1864       gcc_assert (!new_bb);
1865     }
1866
1867   return iters_name;
1868 }
1869
1870
1871 /* Function vect_update_init_of_dr
1872
1873    NITERS iterations were peeled from LOOP.  DR represents a data reference
1874    in LOOP.  This function updates the information recorded in DR to
1875    account for the fact that the first NITERS iterations had already been
1876    executed.  Specifically, it updates the OFFSET field of DR.  */
1877
1878 static void
1879 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1880 {
1881   tree offset = DR_OFFSET (dr);
1882
1883   niters = fold_build2 (MULT_EXPR, sizetype,
1884                         fold_convert (sizetype, niters),
1885                         fold_convert (sizetype, DR_STEP (dr)));
1886   offset = fold_build2 (PLUS_EXPR, sizetype, offset, niters);
1887   DR_OFFSET (dr) = offset;
1888 }
1889
1890
1891 /* Function vect_update_inits_of_drs
1892
1893    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1894    This function updates the information recorded for the data references in
1895    the loop to account for the fact that the first NITERS iterations had
1896    already been executed.  Specifically, it updates the initial_condition of
1897    the access_function of all the data_references in the loop.  */
1898
1899 static void
1900 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1901 {
1902   unsigned int i;
1903   VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1904   struct data_reference *dr;
1905
1906   if (vect_print_dump_info (REPORT_DETAILS))
1907     fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
1908
1909   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1910     vect_update_init_of_dr (dr, niters);
1911 }
1912
1913
1914 /* Function vect_do_peeling_for_alignment
1915
1916    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
1917    'niters' is set to the misalignment of one of the data references in the
1918    loop, thereby forcing it to refer to an aligned location at the beginning
1919    of the execution of this loop.  The data reference for which we are
1920    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
1921
1922 void
1923 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
1924 {
1925   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1926   tree niters_of_prolog_loop, ni_name;
1927   tree n_iters;
1928   struct loop *new_loop;
1929   bool check_profitability = false;
1930   unsigned int th = 0;
1931   int min_profitable_iters;
1932
1933   if (vect_print_dump_info (REPORT_DETAILS))
1934     fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
1935
1936   initialize_original_copy_tables ();
1937
1938   ni_name = vect_build_loop_niters (loop_vinfo);
1939   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
1940
1941
1942   /* If cost model check not done during versioning.  */
1943   if (!VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1944       && !VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
1945     {
1946       check_profitability = true;
1947
1948       /* Get profitability threshold for vectorized loop.  */
1949       min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
1950
1951       th = conservative_cost_threshold (loop_vinfo,
1952                                         min_profitable_iters);
1953     }
1954
1955   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
1956   new_loop =
1957     slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
1958                                    niters_of_prolog_loop, ni_name, true,
1959                                    th, check_profitability);
1960
1961   gcc_assert (new_loop);
1962 #ifdef ENABLE_CHECKING
1963   slpeel_verify_cfg_after_peeling (new_loop, loop);
1964 #endif
1965
1966   /* Update number of times loop executes.  */
1967   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
1968   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
1969                 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
1970
1971   /* Update the init conditions of the access functions of all data refs.  */
1972   vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
1973
1974   /* After peeling we have to reset scalar evolution analyzer.  */
1975   scev_reset ();
1976
1977   free_original_copy_tables ();
1978 }
1979
1980
1981 /* Function vect_create_cond_for_align_checks.
1982
1983    Create a conditional expression that represents the alignment checks for
1984    all of data references (array element references) whose alignment must be
1985    checked at runtime.
1986
1987    Input:
1988    COND_EXPR  - input conditional expression.  New conditions will be chained
1989                 with logical AND operation.
1990    LOOP_VINFO - two fields of the loop information are used.
1991                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1992                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1993
1994    Output:
1995    COND_EXPR_STMT_LIST - statements needed to construct the conditional
1996                          expression.
1997    The returned value is the conditional expression to be used in the if
1998    statement that controls which version of the loop gets executed at runtime.
1999
2000    The algorithm makes two assumptions:
2001      1) The number of bytes "n" in a vector is a power of 2.
2002      2) An address "a" is aligned if a%n is zero and that this
2003         test can be done as a&(n-1) == 0.  For example, for 16
2004         byte vectors the test is a&0xf == 0.  */
2005
2006 static void
2007 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2008                                    tree *cond_expr,
2009                                    gimple_seq *cond_expr_stmt_list)
2010 {
2011   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2012   VEC(gimple,heap) *may_misalign_stmts
2013     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2014   gimple ref_stmt;
2015   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2016   tree mask_cst;
2017   unsigned int i;
2018   tree psize;
2019   tree int_ptrsize_type;
2020   char tmp_name[20];
2021   tree or_tmp_name = NULL_TREE;
2022   tree and_tmp, and_tmp_name;
2023   gimple and_stmt;
2024   tree ptrsize_zero;
2025   tree part_cond_expr;
2026
2027   /* Check that mask is one less than a power of 2, i.e., mask is
2028      all zeros followed by all ones.  */
2029   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2030
2031   /* CHECKME: what is the best integer or unsigned type to use to hold a
2032      cast from a pointer value?  */
2033   psize = TYPE_SIZE (ptr_type_node);
2034   int_ptrsize_type
2035     = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
2036
2037   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2038      of the first vector of the i'th data reference. */
2039
2040   for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, ref_stmt); i++)
2041     {
2042       gimple_seq new_stmt_list = NULL;
2043       tree addr_base;
2044       tree addr_tmp, addr_tmp_name;
2045       tree or_tmp, new_or_tmp_name;
2046       gimple addr_stmt, or_stmt;
2047
2048       /* create: addr_tmp = (int)(address_of_first_vector) */
2049       addr_base =
2050         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2051                                               NULL_TREE, loop);
2052       if (new_stmt_list != NULL)
2053         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2054
2055       sprintf (tmp_name, "%s%d", "addr2int", i);
2056       addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2057       add_referenced_var (addr_tmp);
2058       addr_tmp_name = make_ssa_name (addr_tmp, NULL);
2059       addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2060                                                 addr_base, NULL_TREE);
2061       SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
2062       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2063
2064       /* The addresses are OR together.  */
2065
2066       if (or_tmp_name != NULL_TREE)
2067         {
2068           /* create: or_tmp = or_tmp | addr_tmp */
2069           sprintf (tmp_name, "%s%d", "orptrs", i);
2070           or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2071           add_referenced_var (or_tmp);
2072           new_or_tmp_name = make_ssa_name (or_tmp, NULL);
2073           or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2074                                                   new_or_tmp_name,
2075                                                   or_tmp_name, addr_tmp_name);
2076           SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
2077           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2078           or_tmp_name = new_or_tmp_name;
2079         }
2080       else
2081         or_tmp_name = addr_tmp_name;
2082
2083     } /* end for i */
2084
2085   mask_cst = build_int_cst (int_ptrsize_type, mask);
2086
2087   /* create: and_tmp = or_tmp & mask  */
2088   and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
2089   add_referenced_var (and_tmp);
2090   and_tmp_name = make_ssa_name (and_tmp, NULL);
2091
2092   and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2093                                            or_tmp_name, mask_cst);
2094   SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
2095   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2096
2097   /* Make and_tmp the left operand of the conditional test against zero.
2098      if and_tmp has a nonzero bit then some address is unaligned.  */
2099   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2100   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2101                                 and_tmp_name, ptrsize_zero);
2102   if (*cond_expr)
2103     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2104                               *cond_expr, part_cond_expr);
2105   else
2106     *cond_expr = part_cond_expr;
2107 }
2108
2109
2110 /* Function vect_vfa_segment_size.
2111
2112    Create an expression that computes the size of segment
2113    that will be accessed for a data reference.  The functions takes into
2114    account that realignment loads may access one more vector.
2115
2116    Input:
2117      DR: The data reference.
2118      VECT_FACTOR: vectorization factor.
2119
2120    Return an expression whose value is the size of segment which will be
2121    accessed by DR.  */
2122
2123 static tree
2124 vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
2125 {
2126   tree segment_length = fold_build2 (MULT_EXPR, integer_type_node,
2127                                      DR_STEP (dr), vect_factor);
2128
2129   if (vect_supportable_dr_alignment (dr) == dr_explicit_realign_optimized)
2130     {
2131       tree vector_size = TYPE_SIZE_UNIT
2132                           (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2133
2134       segment_length = fold_build2 (PLUS_EXPR, integer_type_node,
2135                                     segment_length, vector_size);
2136     }
2137   return fold_convert (sizetype, segment_length);
2138 }
2139
2140
2141 /* Function vect_create_cond_for_alias_checks.
2142
2143    Create a conditional expression that represents the run-time checks for
2144    overlapping of address ranges represented by a list of data references
2145    relations passed as input.
2146
2147    Input:
2148    COND_EXPR  - input conditional expression.  New conditions will be chained
2149                 with logical AND operation.
2150    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2151                 to be checked.
2152
2153    Output:
2154    COND_EXPR - conditional expression.
2155    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2156                          expression.
2157
2158
2159    The returned value is the conditional expression to be used in the if
2160    statement that controls which version of the loop gets executed at runtime.
2161 */
2162
2163 static void
2164 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
2165                                    tree * cond_expr,
2166                                    gimple_seq * cond_expr_stmt_list)
2167 {
2168   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2169   VEC (ddr_p, heap) * may_alias_ddrs =
2170     LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2171   tree vect_factor =
2172     build_int_cst (integer_type_node, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2173
2174   ddr_p ddr;
2175   unsigned int i;
2176   tree part_cond_expr;
2177
2178   /* Create expression
2179      ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
2180      || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
2181      &&
2182      ...
2183      &&
2184      ((store_ptr_n + store_segment_length_n) < load_ptr_n)
2185      || (load_ptr_n + load_segment_length_n) < store_ptr_n))  */
2186
2187   if (VEC_empty (ddr_p, may_alias_ddrs))
2188     return;
2189
2190   for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
2191     {
2192       struct data_reference *dr_a, *dr_b;
2193       gimple dr_group_first_a, dr_group_first_b;
2194       tree addr_base_a, addr_base_b;
2195       tree segment_length_a, segment_length_b;
2196       gimple stmt_a, stmt_b;
2197
2198       dr_a = DDR_A (ddr);
2199       stmt_a = DR_STMT (DDR_A (ddr));
2200       dr_group_first_a = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a));
2201       if (dr_group_first_a)
2202         {
2203           stmt_a = dr_group_first_a;
2204           dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2205         }
2206
2207       dr_b = DDR_B (ddr);
2208       stmt_b = DR_STMT (DDR_B (ddr));
2209       dr_group_first_b = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b));
2210       if (dr_group_first_b)
2211         {
2212           stmt_b = dr_group_first_b;
2213           dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2214         }
2215
2216       addr_base_a =
2217         vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
2218                                               NULL_TREE, loop);
2219       addr_base_b =
2220         vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
2221                                               NULL_TREE, loop);
2222
2223       segment_length_a = vect_vfa_segment_size (dr_a, vect_factor);
2224       segment_length_b = vect_vfa_segment_size (dr_b, vect_factor);
2225
2226       if (vect_print_dump_info (REPORT_DR_DETAILS))
2227         {
2228           fprintf (vect_dump,
2229                    "create runtime check for data references ");
2230           print_generic_expr (vect_dump, DR_REF (dr_a), TDF_SLIM);
2231           fprintf (vect_dump, " and ");
2232           print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
2233         }
2234
2235
2236       part_cond_expr =
2237         fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2238           fold_build2 (LT_EXPR, boolean_type_node,
2239             fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a),
2240               addr_base_a,
2241               segment_length_a),
2242             addr_base_b),
2243           fold_build2 (LT_EXPR, boolean_type_node,
2244             fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
2245               addr_base_b,
2246               segment_length_b),
2247             addr_base_a));
2248
2249       if (*cond_expr)
2250         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2251                                   *cond_expr, part_cond_expr);
2252       else
2253         *cond_expr = part_cond_expr;
2254     }
2255     if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
2256       fprintf (vect_dump, "created %u versioning for alias checks.\n",
2257                VEC_length (ddr_p, may_alias_ddrs));
2258
2259 }
2260
2261
2262 /* Function vect_loop_versioning.
2263
2264    If the loop has data references that may or may not be aligned or/and
2265    has data reference relations whose independence was not proven then
2266    two versions of the loop need to be generated, one which is vectorized
2267    and one which isn't.  A test is then generated to control which of the
2268    loops is executed.  The test checks for the alignment of all of the
2269    data references that may or may not be aligned.  An additional
2270    sequence of runtime tests is generated for each pairs of DDRs whose
2271    independence was not proven.  The vectorized version of loop is
2272    executed only if both alias and alignment tests are passed.
2273
2274    The test generated to check which version of loop is executed
2275    is modified to also check for profitability as indicated by the
2276    cost model initially.  */
2277
2278 void
2279 vect_loop_versioning (loop_vec_info loop_vinfo)
2280 {
2281   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2282   struct loop *nloop;
2283   tree cond_expr = NULL_TREE;
2284   gimple_seq cond_expr_stmt_list = NULL;
2285   basic_block condition_bb;
2286   gimple_stmt_iterator gsi, cond_exp_gsi;
2287   basic_block merge_bb;
2288   basic_block new_exit_bb;
2289   edge new_exit_e, e;
2290   gimple orig_phi, new_phi;
2291   tree arg;
2292   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2293   gimple_seq gimplify_stmt_list = NULL;
2294   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2295   int min_profitable_iters = 0;
2296   unsigned int th;
2297
2298   /* Get profitability threshold for vectorized loop.  */
2299   min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
2300
2301   th = conservative_cost_threshold (loop_vinfo,
2302                                     min_profitable_iters);
2303
2304   cond_expr =
2305     fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2306                  build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2307
2308   cond_expr = force_gimple_operand (cond_expr, &cond_expr_stmt_list,
2309                                     false, NULL_TREE);
2310
2311   if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
2312       vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2313                                          &cond_expr_stmt_list);
2314
2315   if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
2316     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr,
2317                                        &cond_expr_stmt_list);
2318
2319   cond_expr =
2320     fold_build2 (NE_EXPR, boolean_type_node, cond_expr, integer_zero_node);
2321   cond_expr =
2322     force_gimple_operand (cond_expr, &gimplify_stmt_list, true, NULL_TREE);
2323   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2324
2325   initialize_original_copy_tables ();
2326   nloop = loop_version (loop, cond_expr, &condition_bb,
2327                         prob, prob, REG_BR_PROB_BASE - prob, true);
2328   free_original_copy_tables();
2329
2330   /* Loop versioning violates an assumption we try to maintain during
2331      vectorization - that the loop exit block has a single predecessor.
2332      After versioning, the exit block of both loop versions is the same
2333      basic block (i.e. it has two predecessors). Just in order to simplify
2334      following transformations in the vectorizer, we fix this situation
2335      here by adding a new (empty) block on the exit-edge of the loop,
2336      with the proper loop-exit phis to maintain loop-closed-form.  */
2337
2338   merge_bb = single_exit (loop)->dest;
2339   gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2340   new_exit_bb = split_edge (single_exit (loop));
2341   new_exit_e = single_exit (loop);
2342   e = EDGE_SUCC (new_exit_bb, 0);
2343
2344   for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2345     {
2346       orig_phi = gsi_stmt (gsi);
2347       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
2348                                   new_exit_bb);
2349       arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2350       add_phi_arg (new_phi, arg, new_exit_e);
2351       SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
2352     }
2353
2354   /* End loop-exit-fixes after versioning.  */
2355
2356   update_ssa (TODO_update_ssa);
2357   if (cond_expr_stmt_list)
2358     {
2359       cond_exp_gsi = gsi_last_bb (condition_bb);
2360       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list, GSI_SAME_STMT);
2361     }
2362 }
2363