gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
   3    Foundation, Inc.
   4    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   5    and Ira Rosen <irar@il.ibm.com>
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 3, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "tm.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "basic-block.h"
  30 #include "diagnostic.h"
  31 #include "tree-flow.h"
  32 #include "tree-dump.h"
  33 #include "cfgloop.h"
  34 #include "cfglayout.h"
  35 #include "expr.h"
  36 #include "toplev.h"
  37 #include "tree-scalar-evolution.h"
  38 #include "tree-vectorizer.h"
  39 #include "langhooks.h"
  40
  41 /*************************************************************************
  42   Simple Loop Peeling Utilities
  43
  44   Utilities to support loop peeling for vectorization purposes.
  45  *************************************************************************/
  46
  47
  48 /* Renames the use *OP_P.  */
  49
  50 static void
  51 rename_use_op (use_operand_p op_p)
  52 {
  53   tree new_name;
  54
  55   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  56     return;
  57
  58   new_name = get_current_def (USE_FROM_PTR (op_p));
  59
  60   /* Something defined outside of the loop.  */
  61   if (!new_name)
  62     return;
  63
  64   /* An ordinary ssa name defined in the loop.  */
  65
  66   SET_USE (op_p, new_name);
  67 }
  68
  69
  70 /* Renames the variables in basic block BB.  */
  71
  72 void
  73 rename_variables_in_bb (basic_block bb)
  74 {
  75   gimple_stmt_iterator gsi;
  76   gimple stmt;
  77   use_operand_p use_p;
  78   ssa_op_iter iter;
  79   edge e;
  80   edge_iterator ei;
  81   struct loop *loop = bb->loop_father;
  82
  83   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  84     {
  85       stmt = gsi_stmt (gsi);
  86       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
  87         rename_use_op (use_p);
  88     }
  89
  90   FOR_EACH_EDGE (e, ei, bb->succs)
  91     {
  92       if (!flow_bb_inside_loop_p (loop, e->dest))
  93         continue;
  94       for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
  95         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
  96     }
  97 }
  98
  99
 100 /* Renames variables in new generated LOOP.  */
 101
 102 void
 103 rename_variables_in_loop (struct loop *loop)
 104 {
 105   unsigned i;
 106   basic_block *bbs;
 107
 108   bbs = get_loop_body (loop);
 109
 110   for (i = 0; i < loop->num_nodes; i++)
 111     rename_variables_in_bb (bbs[i]);
 112
 113   free (bbs);
 114 }
 115
 116
 117 /* Update the PHI nodes of NEW_LOOP.
 118
 119    NEW_LOOP is a duplicate of ORIG_LOOP.
 120    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 121    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 122    executes before it.  */
 123
 124 static void
 125 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 126                                        struct loop *new_loop, bool after)
 127 {
 128   tree new_ssa_name;
 129   gimple phi_new, phi_orig;
 130   tree def;
 131   edge orig_loop_latch = loop_latch_edge (orig_loop);
 132   edge orig_entry_e = loop_preheader_edge (orig_loop);
 133   edge new_loop_exit_e = single_exit (new_loop);
 134   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 135   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 136   gimple_stmt_iterator gsi_new, gsi_orig;
 137
 138   /*
 139      step 1. For each loop-header-phi:
 140              Add the first phi argument for the phi in NEW_LOOP
 141             (the one associated with the entry of NEW_LOOP)
 142
 143      step 2. For each loop-header-phi:
 144              Add the second phi argument for the phi in NEW_LOOP
 145             (the one associated with the latch of NEW_LOOP)
 146
 147      step 3. Update the phis in the successor block of NEW_LOOP.
 148
 149         case 1: NEW_LOOP was placed before ORIG_LOOP:
 150                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 151                 Updating the phis in the successor block can therefore be done
 152                 along with the scanning of the loop header phis, because the
 153                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 154                 phi nodes, organized in the same order.
 155
 156         case 2: NEW_LOOP was placed after ORIG_LOOP:
 157                 The successor block of NEW_LOOP is the original exit block of
 158                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 159                 We postpone updating these phis to a later stage (when
 160                 loop guards are added).
 161    */
 162
 163
 164   /* Scan the phis in the headers of the old and new loops
 165      (they are organized in exactly the same order).  */
 166
 167   for (gsi_new = gsi_start_phis (new_loop->header),
 168        gsi_orig = gsi_start_phis (orig_loop->header);
 169        !gsi_end_p (gsi_new) && !gsi_end_p (gsi_orig);
 170        gsi_next (&gsi_new), gsi_next (&gsi_orig))
 171     {
 172       phi_new = gsi_stmt (gsi_new);
 173       phi_orig = gsi_stmt (gsi_orig);
 174
 175       /* step 1.  */
 176       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 177       add_phi_arg (phi_new, def, new_loop_entry_e);
 178
 179       /* step 2.  */
 180       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 181       if (TREE_CODE (def) != SSA_NAME)
 182         continue;
 183
 184       new_ssa_name = get_current_def (def);
 185       if (!new_ssa_name)
 186         {
 187           /* This only happens if there are no definitions
 188              inside the loop. use the phi_result in this case.  */
 189           new_ssa_name = PHI_RESULT (phi_new);
 190         }
 191
 192       /* An ordinary ssa name defined in the loop.  */
 193       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
 194
 195       /* step 3 (case 1).  */
 196       if (!after)
 197         {
 198           gcc_assert (new_loop_exit_e == orig_entry_e);
 199           SET_PHI_ARG_DEF (phi_orig,
 200                            new_loop_exit_e->dest_idx,
 201                            new_ssa_name);
 202         }
 203     }
 204 }
 205
 206
 207 /* Update PHI nodes for a guard of the LOOP.
 208
 209    Input:
 210    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 211         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 212         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 213         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 214         We denote this bb NEW_MERGE_BB because before the guard code was added
 215         it had a single predecessor (the LOOP header), and now it became a merge
 216         point of two paths - the path that ends with the LOOP exit-edge, and
 217         the path that ends with GUARD_EDGE.
 218    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 219         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 220
 221    ===> The CFG before the guard-code was added:
 222         LOOP_header_bb:
 223           loop_body
 224           if (exit_loop) goto update_bb
 225           else           goto LOOP_header_bb
 226         update_bb:
 227
 228    ==> The CFG after the guard-code was added:
 229         guard_bb:
 230           if (LOOP_guard_condition) goto new_merge_bb
 231           else                      goto LOOP_header_bb
 232         LOOP_header_bb:
 233           loop_body
 234           if (exit_loop_condition) goto new_merge_bb
 235           else                     goto LOOP_header_bb
 236         new_merge_bb:
 237           goto update_bb
 238         update_bb:
 239
 240    ==> The CFG after this function:
 241         guard_bb:
 242           if (LOOP_guard_condition) goto new_merge_bb
 243           else                      goto LOOP_header_bb
 244         LOOP_header_bb:
 245           loop_body
 246           if (exit_loop_condition) goto new_exit_bb
 247           else                     goto LOOP_header_bb
 248         new_exit_bb:
 249         new_merge_bb:
 250           goto update_bb
 251         update_bb:
 252
 253    This function:
 254    1. creates and updates the relevant phi nodes to account for the new
 255       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 256       1.1. Create phi nodes at NEW_MERGE_BB.
 257       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 258            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 259    2. preserves loop-closed-ssa-form by creating the required phi nodes
 260       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 261
 262    There are two flavors to this function:
 263
 264    slpeel_update_phi_nodes_for_guard1:
 265      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 266      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 267      for variables that have phis in the loop header.
 268
 269    slpeel_update_phi_nodes_for_guard2:
 270      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 271      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 272      for variables that have phis in the loop exit.
 273
 274    I.E., the overall structure is:
 275
 276         loop1_preheader_bb:
 277                 guard1 (goto loop1/merge1_bb)
 278         loop1
 279         loop1_exit_bb:
 280                 guard2 (goto merge1_bb/merge2_bb)
 281         merge1_bb
 282         loop2
 283         loop2_exit_bb
 284         merge2_bb
 285         next_bb
 286
 287    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 288    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 289    that have phis in loop1->header).
 290
 291    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 292    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 293    that have phis in next_bb). It also adds some of these phis to
 294    loop1_exit_bb.
 295
 296    slpeel_update_phi_nodes_for_guard1 is always called before
 297    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 298    to create correct data-flow and loop-closed-ssa-form.
 299
 300    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 301    that change between iterations of a loop (and therefore have a phi-node
 302    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 303    phis for variables that are used out of the loop (and therefore have
 304    loop-closed exit phis). Some variables may be both updated between
 305    iterations and used after the loop. This is why in loop1_exit_bb we
 306    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 307    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 308
 309    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 310      an original loop. i.e., we have:
 311
 312            orig_loop
 313            guard_bb (goto LOOP/new_merge)
 314            new_loop <-- LOOP
 315            new_exit
 316            new_merge
 317            next_bb
 318
 319      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 320      have:
 321
 322            new_loop
 323            guard_bb (goto LOOP/new_merge)
 324            orig_loop <-- LOOP
 325            new_exit
 326            new_merge
 327            next_bb
 328
 329      The SSA names defined in the original loop have a current
 330      reaching definition that that records the corresponding new
 331      ssa-name used in the new duplicated loop copy.
 332   */
 333
 334 /* Function slpeel_update_phi_nodes_for_guard1
 335
 336    Input:
 337    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 338    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 339             information.
 340
 341    In the context of the overall structure, we have:
 342
 343         loop1_preheader_bb:
 344                 guard1 (goto loop1/merge1_bb)
 345 LOOP->  loop1
 346         loop1_exit_bb:
 347                 guard2 (goto merge1_bb/merge2_bb)
 348         merge1_bb
 349         loop2
 350         loop2_exit_bb
 351         merge2_bb
 352         next_bb
 353
 354    For each name updated between loop iterations (i.e - for each name that has
 355    an entry (loop-header) phi in LOOP) we create a new phi in:
 356    1. merge1_bb (to account for the edge from guard1)
 357    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 358 */
 359
 360 static void
 361 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 362                                     bool is_new_loop, basic_block *new_exit_bb,
 363                                     bitmap *defs)
 364 {
 365   gimple orig_phi, new_phi;
 366   gimple update_phi, update_phi2;
 367   tree guard_arg, loop_arg;
 368   basic_block new_merge_bb = guard_edge->dest;
 369   edge e = EDGE_SUCC (new_merge_bb, 0);
 370   basic_block update_bb = e->dest;
 371   basic_block orig_bb = loop->header;
 372   edge new_exit_e;
 373   tree current_new_name;
 374   gimple_stmt_iterator gsi_orig, gsi_update;
 375
 376   /* Create new bb between loop and new_merge_bb.  */
 377   *new_exit_bb = split_edge (single_exit (loop));
 378
 379   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 380
 381   for (gsi_orig = gsi_start_phis (orig_bb),
 382        gsi_update = gsi_start_phis (update_bb);
 383        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
 384        gsi_next (&gsi_orig), gsi_next (&gsi_update))
 385     {
 386       orig_phi = gsi_stmt (gsi_orig);
 387       update_phi = gsi_stmt (gsi_update);
 388
 389       /** 1. Handle new-merge-point phis  **/
 390
 391       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 392       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 393                                  new_merge_bb);
 394
 395       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 396             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 397       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 398       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 399
 400       add_phi_arg (new_phi, loop_arg, new_exit_e);
 401       add_phi_arg (new_phi, guard_arg, guard_edge);
 402
 403       /* 1.3. Update phi in successor block.  */
 404       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 405                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 406       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 407       update_phi2 = new_phi;
 408
 409
 410       /** 2. Handle loop-closed-ssa-form phis  **/
 411
 412       if (!is_gimple_reg (PHI_RESULT (orig_phi)))
 413         continue;
 414
 415       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 416       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 417                                  *new_exit_bb);
 418
 419       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 420       add_phi_arg (new_phi, loop_arg, single_exit (loop));
 421
 422       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 423       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 424       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 425
 426       /* 2.4. Record the newly created name with set_current_def.
 427          We want to find a name such that
 428                 name = get_current_def (orig_loop_name)
 429          and to set its current definition as follows:
 430                 set_current_def (name, new_phi_name)
 431
 432          If LOOP is a new loop then loop_arg is already the name we're
 433          looking for. If LOOP is the original loop, then loop_arg is
 434          the orig_loop_name and the relevant name is recorded in its
 435          current reaching definition.  */
 436       if (is_new_loop)
 437         current_new_name = loop_arg;
 438       else
 439         {
 440           current_new_name = get_current_def (loop_arg);
 441           /* current_def is not available only if the variable does not
 442              change inside the loop, in which case we also don't care
 443              about recording a current_def for it because we won't be
 444              trying to create loop-exit-phis for it.  */
 445           if (!current_new_name)
 446             continue;
 447         }
 448       gcc_assert (get_current_def (current_new_name) == NULL_TREE);
 449
 450       set_current_def (current_new_name, PHI_RESULT (new_phi));
 451       bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
 452     }
 453 }
 454
 455
 456 /* Function slpeel_update_phi_nodes_for_guard2
 457
 458    Input:
 459    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 460
 461    In the context of the overall structure, we have:
 462
 463         loop1_preheader_bb:
 464                 guard1 (goto loop1/merge1_bb)
 465         loop1
 466         loop1_exit_bb:
 467                 guard2 (goto merge1_bb/merge2_bb)
 468         merge1_bb
 469 LOOP->  loop2
 470         loop2_exit_bb
 471         merge2_bb
 472         next_bb
 473
 474    For each name used out side the loop (i.e - for each name that has an exit
 475    phi in next_bb) we create a new phi in:
 476    1. merge2_bb (to account for the edge from guard_bb)
 477    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 478    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 479       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 480 */
 481
 482 static void
 483 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 484                                     bool is_new_loop, basic_block *new_exit_bb)
 485 {
 486   gimple orig_phi, new_phi;
 487   gimple update_phi, update_phi2;
 488   tree guard_arg, loop_arg;
 489   basic_block new_merge_bb = guard_edge->dest;
 490   edge e = EDGE_SUCC (new_merge_bb, 0);
 491   basic_block update_bb = e->dest;
 492   edge new_exit_e;
 493   tree orig_def, orig_def_new_name;
 494   tree new_name, new_name2;
 495   tree arg;
 496   gimple_stmt_iterator gsi;
 497
 498   /* Create new bb between loop and new_merge_bb.  */
 499   *new_exit_bb = split_edge (single_exit (loop));
 500
 501   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 502
 503   for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
 504     {
 505       update_phi = gsi_stmt (gsi);
 506       orig_phi = update_phi;
 507       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 508       /* This loop-closed-phi actually doesn't represent a use
 509          out of the loop - the phi arg is a constant.  */
 510       if (TREE_CODE (orig_def) != SSA_NAME)
 511         continue;
 512       orig_def_new_name = get_current_def (orig_def);
 513       arg = NULL_TREE;
 514
 515       /** 1. Handle new-merge-point phis  **/
 516
 517       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 518       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 519                                  new_merge_bb);
 520
 521       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 522             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 523       new_name = orig_def;
 524       new_name2 = NULL_TREE;
 525       if (orig_def_new_name)
 526         {
 527           new_name = orig_def_new_name;
 528           /* Some variables have both loop-entry-phis and loop-exit-phis.
 529              Such variables were given yet newer names by phis placed in
 530              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 531              new_name2 = get_current_def (get_current_def (orig_name)).  */
 532           new_name2 = get_current_def (new_name);
 533         }
 534
 535       if (is_new_loop)
 536         {
 537           guard_arg = orig_def;
 538           loop_arg = new_name;
 539         }
 540       else
 541         {
 542           guard_arg = new_name;
 543           loop_arg = orig_def;
 544         }
 545       if (new_name2)
 546         guard_arg = new_name2;
 547
 548       add_phi_arg (new_phi, loop_arg, new_exit_e);
 549       add_phi_arg (new_phi, guard_arg, guard_edge);
 550
 551       /* 1.3. Update phi in successor block.  */
 552       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 553       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 554       update_phi2 = new_phi;
 555
 556
 557       /** 2. Handle loop-closed-ssa-form phis  **/
 558
 559       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 560       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 561                                  *new_exit_bb);
 562
 563       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 564       add_phi_arg (new_phi, loop_arg, single_exit (loop));
 565
 566       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 567       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 568       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 569
 570
 571       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 572
 573       /* 3.1. Find the relevant names that need an exit-phi in
 574          GUARD_BB, i.e. names for which
 575          slpeel_update_phi_nodes_for_guard1 had not already created a
 576          phi node. This is the case for names that are used outside
 577          the loop (and therefore need an exit phi) but are not updated
 578          across loop iterations (and therefore don't have a
 579          loop-header-phi).
 580
 581          slpeel_update_phi_nodes_for_guard1 is responsible for
 582          creating loop-exit phis in GUARD_BB for names that have a
 583          loop-header-phi.  When such a phi is created we also record
 584          the new name in its current definition.  If this new name
 585          exists, then guard_arg was set to this new name (see 1.2
 586          above).  Therefore, if guard_arg is not this new name, this
 587          is an indication that an exit-phi in GUARD_BB was not yet
 588          created, so we take care of it here.  */
 589       if (guard_arg == new_name2)
 590         continue;
 591       arg = guard_arg;
 592
 593       /* 3.2. Generate new phi node in GUARD_BB:  */
 594       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 595                                  guard_edge->src);
 596
 597       /* 3.3. GUARD_BB has one incoming edge:  */
 598       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 599       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
 600
 601       /* 3.4. Update phi in successor of GUARD_BB:  */
 602       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 603                                                                 == guard_arg);
 604       SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
 605     }
 606 }
 607
 608
 609 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 610    that starts at zero, increases by one and its limit is NITERS.
 611
 612    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 613
 614 void
 615 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 616 {
 617   tree indx_before_incr, indx_after_incr;
 618   gimple cond_stmt;
 619   gimple orig_cond;
 620   edge exit_edge = single_exit (loop);
 621   gimple_stmt_iterator loop_cond_gsi;
 622   gimple_stmt_iterator incr_gsi;
 623   bool insert_after;
 624   tree init = build_int_cst (TREE_TYPE (niters), 0);
 625   tree step = build_int_cst (TREE_TYPE (niters), 1);
 626   LOC loop_loc;
 627   enum tree_code code;
 628
 629   orig_cond = get_loop_exit_condition (loop);
 630   gcc_assert (orig_cond);
 631   loop_cond_gsi = gsi_for_stmt (orig_cond);
 632
 633   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 634   create_iv (init, step, NULL_TREE, loop,
 635              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 636
 637   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 638                                               true, NULL_TREE, true,
 639                                               GSI_SAME_STMT);
 640   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 641                                      true, GSI_SAME_STMT);
 642
 643   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 644   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 645                                  NULL_TREE);
 646
 647   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 648
 649   /* Remove old loop exit test:  */
 650   gsi_remove (&loop_cond_gsi, true);
 651
 652   loop_loc = find_loop_location (loop);
 653   if (dump_file && (dump_flags & TDF_DETAILS))
 654     {
 655       if (loop_loc != UNKNOWN_LOC)
 656         fprintf (dump_file, "\nloop at %s:%d: ",
 657                  LOC_FILE (loop_loc), LOC_LINE (loop_loc));
 658       print_gimple_stmt (dump_file, cond_stmt, 0, TDF_SLIM);
 659     }
 660
 661   loop->nb_iterations = niters;
 662 }
 663
 664
 665 /* Given LOOP this function generates a new copy of it and puts it
 666    on E which is either the entry or exit of LOOP.  */
 667
 668 struct loop *
 669 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
 670 {
 671   struct loop *new_loop;
 672   basic_block *new_bbs, *bbs;
 673   bool at_exit;
 674   bool was_imm_dom;
 675   basic_block exit_dest;
 676   gimple phi;
 677   tree phi_arg;
 678   edge exit, new_exit;
 679   gimple_stmt_iterator gsi;
 680
 681   at_exit = (e == single_exit (loop));
 682   if (!at_exit && e != loop_preheader_edge (loop))
 683     return NULL;
 684
 685   bbs = get_loop_body (loop);
 686
 687   /* Check whether duplication is possible.  */
 688   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 689     {
 690       free (bbs);
 691       return NULL;
 692     }
 693
 694   /* Generate new loop structure.  */
 695   new_loop = duplicate_loop (loop, loop_outer (loop));
 696   if (!new_loop)
 697     {
 698       free (bbs);
 699       return NULL;
 700     }
 701
 702   exit_dest = single_exit (loop)->dest;
 703   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 704                                           exit_dest) == loop->header ?
 705                  true : false);
 706
 707   new_bbs = XNEWVEC (basic_block, loop->num_nodes);
 708
 709   exit = single_exit (loop);
 710   copy_bbs (bbs, loop->num_nodes, new_bbs,
 711             &exit, 1, &new_exit, NULL,
 712             e->src);
 713
 714   /* Duplicating phi args at exit bbs as coming
 715      also from exit of duplicated loop.  */
 716   for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi); gsi_next (&gsi))
 717     {
 718       phi = gsi_stmt (gsi);
 719       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop));
 720       if (phi_arg)
 721         {
 722           edge new_loop_exit_edge;
 723
 724           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 725             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 726           else
 727             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 728
 729           add_phi_arg (phi, phi_arg, new_loop_exit_edge);
 730         }
 731     }
 732
 733   if (at_exit) /* Add the loop copy at exit.  */
 734     {
 735       redirect_edge_and_branch_force (e, new_loop->header);
 736       PENDING_STMT (e) = NULL;
 737       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 738       if (was_imm_dom)
 739         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 740     }
 741   else /* Add the copy at entry.  */
 742     {
 743       edge new_exit_e;
 744       edge entry_e = loop_preheader_edge (loop);
 745       basic_block preheader = entry_e->src;
 746
 747       if (!flow_bb_inside_loop_p (new_loop,
 748                                   EDGE_SUCC (new_loop->header, 0)->dest))
 749         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 750       else
 751         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 752
 753       redirect_edge_and_branch_force (new_exit_e, loop->header);
 754       PENDING_STMT (new_exit_e) = NULL;
 755       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 756                                new_exit_e->src);
 757
 758       /* We have to add phi args to the loop->header here as coming
 759          from new_exit_e edge.  */
 760       for (gsi = gsi_start_phis (loop->header);
 761            !gsi_end_p (gsi);
 762            gsi_next (&gsi))
 763         {
 764           phi = gsi_stmt (gsi);
 765           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 766           if (phi_arg)
 767             add_phi_arg (phi, phi_arg, new_exit_e);
 768         }
 769
 770       redirect_edge_and_branch_force (entry_e, new_loop->header);
 771       PENDING_STMT (entry_e) = NULL;
 772       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 773     }
 774
 775   free (new_bbs);
 776   free (bbs);
 777
 778   return new_loop;
 779 }
 780
 781
 782 /* Given the condition statement COND, put it as the last statement
 783    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 784    Assumes that this is the single exit of the guarded loop.
 785    Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.  */
 786
 787 static edge
 788 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 789                        gimple_seq cond_expr_stmt_list,
 790                        basic_block exit_bb, basic_block dom_bb)
 791 {
 792   gimple_stmt_iterator gsi;
 793   edge new_e, enter_e;
 794   gimple cond_stmt;
 795   gimple_seq gimplify_stmt_list = NULL;
 796
 797   enter_e = EDGE_SUCC (guard_bb, 0);
 798   enter_e->flags &= ~EDGE_FALLTHRU;
 799   enter_e->flags |= EDGE_FALSE_VALUE;
 800   gsi = gsi_last_bb (guard_bb);
 801
 802   cond = force_gimple_operand (cond, &gimplify_stmt_list, true, NULL_TREE);
 803   if (gimplify_stmt_list)
 804     gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
 805   cond_stmt = gimple_build_cond (NE_EXPR,
 806                                  cond, build_int_cst (TREE_TYPE (cond), 0),
 807                                  NULL_TREE, NULL_TREE);
 808   if (cond_expr_stmt_list)
 809     gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
 810
 811   gsi = gsi_last_bb (guard_bb);
 812   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 813
 814   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 815   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 816   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 817   return new_e;
 818 }
 819
 820
 821 /* This function verifies that the following restrictions apply to LOOP:
 822    (1) it is innermost
 823    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 824    (3) it is single entry, single exit
 825    (4) its exit condition is the last stmt in the header
 826    (5) E is the entry/exit edge of LOOP.
 827  */
 828
 829 bool
 830 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 831 {
 832   edge exit_e = single_exit (loop);
 833   edge entry_e = loop_preheader_edge (loop);
 834   gimple orig_cond = get_loop_exit_condition (loop);
 835   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 836
 837   if (need_ssa_update_p (cfun))
 838     return false;
 839
 840   if (loop->inner
 841       /* All loops have an outer scope; the only case loop->outer is NULL is for
 842          the function itself.  */
 843       || !loop_outer (loop)
 844       || loop->num_nodes != 2
 845       || !empty_block_p (loop->latch)
 846       || !single_exit (loop)
 847       /* Verify that new loop exit condition can be trivially modified.  */
 848       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 849       || (e != exit_e && e != entry_e))
 850     return false;
 851
 852   return true;
 853 }
 854
 855 #ifdef ENABLE_CHECKING
 856 static void
 857 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 858                                  struct loop *second_loop)
 859 {
 860   basic_block loop1_exit_bb = single_exit (first_loop)->dest;
 861   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
 862   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
 863
 864   /* A guard that controls whether the second_loop is to be executed or skipped
 865      is placed in first_loop->exit.  first_loop->exit therefore has two
 866      successors - one is the preheader of second_loop, and the other is a bb
 867      after second_loop.
 868    */
 869   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
 870
 871   /* 1. Verify that one of the successors of first_loop->exit is the preheader
 872         of second_loop.  */
 873
 874   /* The preheader of new_loop is expected to have two predecessors:
 875      first_loop->exit and the block that precedes first_loop.  */
 876
 877   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
 878               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
 879                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
 880                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
 881                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
 882
 883   /* Verify that the other successor of first_loop->exit is after the
 884      second_loop.  */
 885   /* TODO */
 886 }
 887 #endif
 888
 889 /* If the run time cost model check determines that vectorization is
 890    not profitable and hence scalar loop should be generated then set
 891    FIRST_NITERS to prologue peeled iterations. This will allow all the
 892    iterations to be executed in the prologue peeled scalar loop.  */
 893
 894 static void
 895 set_prologue_iterations (basic_block bb_before_first_loop,
 896                          tree first_niters,
 897                          struct loop *loop,
 898                          unsigned int th)
 899 {
 900   edge e;
 901   basic_block cond_bb, then_bb;
 902   tree var, prologue_after_cost_adjust_name;
 903   gimple_stmt_iterator gsi;
 904   gimple newphi;
 905   edge e_true, e_false, e_fallthru;
 906   gimple cond_stmt;
 907   gimple_seq gimplify_stmt_list = NULL, stmts = NULL;
 908   tree cost_pre_condition = NULL_TREE;
 909   tree scalar_loop_iters =
 910     unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
 911
 912   e = single_pred_edge (bb_before_first_loop);
 913   cond_bb = split_edge(e);
 914
 915   e = single_pred_edge (bb_before_first_loop);
 916   then_bb = split_edge(e);
 917   set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
 918
 919   e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
 920                                    EDGE_FALSE_VALUE);
 921   set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
 922
 923   e_true = EDGE_PRED (then_bb, 0);
 924   e_true->flags &= ~EDGE_FALLTHRU;
 925   e_true->flags |= EDGE_TRUE_VALUE;
 926
 927   e_fallthru = EDGE_SUCC (then_bb, 0);
 928
 929   cost_pre_condition =
 930     fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
 931                  build_int_cst (TREE_TYPE (scalar_loop_iters), th));
 932   cost_pre_condition =
 933     force_gimple_operand (cost_pre_condition, &gimplify_stmt_list,
 934                           true, NULL_TREE);
 935   cond_stmt = gimple_build_cond (NE_EXPR, cost_pre_condition,
 936                                  build_int_cst (TREE_TYPE (cost_pre_condition),
 937                                                 0), NULL_TREE, NULL_TREE);
 938
 939   gsi = gsi_last_bb (cond_bb);
 940   if (gimplify_stmt_list)
 941     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
 942
 943   gsi = gsi_last_bb (cond_bb);
 944   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 945
 946   var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
 947                         "prologue_after_cost_adjust");
 948   add_referenced_var (var);
 949   prologue_after_cost_adjust_name =
 950     force_gimple_operand (scalar_loop_iters, &stmts, false, var);
 951
 952   gsi = gsi_last_bb (then_bb);
 953   if (stmts)
 954     gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
 955
 956   newphi = create_phi_node (var, bb_before_first_loop);
 957   add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru);
 958   add_phi_arg (newphi, first_niters, e_false);
 959
 960   first_niters = PHI_RESULT (newphi);
 961 }
 962
 963
 964 /* Function slpeel_tree_peel_loop_to_edge.
 965
 966    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
 967    that is placed on the entry (exit) edge E of LOOP. After this transformation
 968    we have two loops one after the other - first-loop iterates FIRST_NITERS
 969    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
 970    If the cost model indicates that it is profitable to emit a scalar
 971    loop instead of the vector one, then the prolog (epilog) loop will iterate
 972    for the entire unchanged scalar iterations of the loop.
 973
 974    Input:
 975    - LOOP: the loop to be peeled.
 976    - E: the exit or entry edge of LOOP.
 977         If it is the entry edge, we peel the first iterations of LOOP. In this
 978         case first-loop is LOOP, and second-loop is the newly created loop.
 979         If it is the exit edge, we peel the last iterations of LOOP. In this
 980         case, first-loop is the newly created loop, and second-loop is LOOP.
 981    - NITERS: the number of iterations that LOOP iterates.
 982    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
 983    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
 984         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
 985         is false, the caller of this function may want to take care of this
 986         (this can be useful if we don't want new stmts added to first-loop).
 987    - TH: cost model profitability threshold of iterations for vectorization.
 988    - CHECK_PROFITABILITY: specify whether cost model check has not occurred
 989                           during versioning and hence needs to occur during
 990                           prologue generation or whether cost model check
 991                           has not occurred during prologue generation and hence
 992                           needs to occur during epilogue generation.
 993
 994
 995    Output:
 996    The function returns a pointer to the new loop-copy, or NULL if it failed
 997    to perform the transformation.
 998
 999    The function generates two if-then-else guards: one before the first loop,
1000    and the other before the second loop:
1001    The first guard is:
1002      if (FIRST_NITERS == 0) then skip the first loop,
1003      and go directly to the second loop.
1004    The second guard is:
1005      if (FIRST_NITERS == NITERS) then skip the second loop.
1006
1007    If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1008    then the generated condition is combined with COND_EXPR and the
1009    statements in COND_EXPR_STMT_LIST are emitted together with it.
1010
1011    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1012    FORNOW the resulting code will not be in loop-closed-ssa form.
1013 */
1014
1015 static struct loop*
1016 slpeel_tree_peel_loop_to_edge (struct loop *loop,
1017                                edge e, tree first_niters,
1018                                tree niters, bool update_first_loop_count,
1019                                unsigned int th, bool check_profitability,
1020                                tree cond_expr, gimple_seq cond_expr_stmt_list)
1021 {
1022   struct loop *new_loop = NULL, *first_loop, *second_loop;
1023   edge skip_e;
1024   tree pre_condition = NULL_TREE;
1025   bitmap definitions;
1026   basic_block bb_before_second_loop, bb_after_second_loop;
1027   basic_block bb_before_first_loop;
1028   basic_block bb_between_loops;
1029   basic_block new_exit_bb;
1030   edge exit_e = single_exit (loop);
1031   LOC loop_loc;
1032   tree cost_pre_condition = NULL_TREE;
1033
1034   if (!slpeel_can_duplicate_loop_p (loop, e))
1035     return NULL;
1036
1037   /* We have to initialize cfg_hooks. Then, when calling
1038    cfg_hooks->split_edge, the function tree_split_edge
1039    is actually called and, when calling cfg_hooks->duplicate_block,
1040    the function tree_duplicate_bb is called.  */
1041   gimple_register_cfg_hooks ();
1042
1043
1044   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1045         Resulting CFG would be:
1046
1047         first_loop:
1048         do {
1049         } while ...
1050
1051         second_loop:
1052         do {
1053         } while ...
1054
1055         orig_exit_bb:
1056    */
1057
1058   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1059     {
1060       loop_loc = find_loop_location (loop);
1061       if (dump_file && (dump_flags & TDF_DETAILS))
1062         {
1063           if (loop_loc != UNKNOWN_LOC)
1064             fprintf (dump_file, "\n%s:%d: note: ",
1065                      LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1066           fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1067         }
1068       return NULL;
1069     }
1070
1071   if (e == exit_e)
1072     {
1073       /* NEW_LOOP was placed after LOOP.  */
1074       first_loop = loop;
1075       second_loop = new_loop;
1076     }
1077   else
1078     {
1079       /* NEW_LOOP was placed before LOOP.  */
1080       first_loop = new_loop;
1081       second_loop = loop;
1082     }
1083
1084   definitions = ssa_names_to_replace ();
1085   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1086   rename_variables_in_loop (new_loop);
1087
1088
1089   /* 2.  Add the guard code in one of the following ways:
1090
1091      2.a Add the guard that controls whether the first loop is executed.
1092          This occurs when this function is invoked for prologue or epilogue
1093          generation and when the cost model check can be done at compile time.
1094
1095          Resulting CFG would be:
1096
1097          bb_before_first_loop:
1098          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1099                                 GOTO first-loop
1100
1101          first_loop:
1102          do {
1103          } while ...
1104
1105          bb_before_second_loop:
1106
1107          second_loop:
1108          do {
1109          } while ...
1110
1111          orig_exit_bb:
1112
1113      2.b Add the cost model check that allows the prologue
1114          to iterate for the entire unchanged scalar
1115          iterations of the loop in the event that the cost
1116          model indicates that the scalar loop is more
1117          profitable than the vector one. This occurs when
1118          this function is invoked for prologue generation
1119          and the cost model check needs to be done at run
1120          time.
1121
1122          Resulting CFG after prologue peeling would be:
1123
1124          if (scalar_loop_iterations <= th)
1125            FIRST_NITERS = scalar_loop_iterations
1126
1127          bb_before_first_loop:
1128          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1129                                 GOTO first-loop
1130
1131          first_loop:
1132          do {
1133          } while ...
1134
1135          bb_before_second_loop:
1136
1137          second_loop:
1138          do {
1139          } while ...
1140
1141          orig_exit_bb:
1142
1143      2.c Add the cost model check that allows the epilogue
1144          to iterate for the entire unchanged scalar
1145          iterations of the loop in the event that the cost
1146          model indicates that the scalar loop is more
1147          profitable than the vector one. This occurs when
1148          this function is invoked for epilogue generation
1149          and the cost model check needs to be done at run
1150          time.  This check is combined with any pre-existing
1151          check in COND_EXPR to avoid versioning.
1152
1153          Resulting CFG after prologue peeling would be:
1154
1155          bb_before_first_loop:
1156          if ((scalar_loop_iterations <= th)
1157              ||
1158              FIRST_NITERS == 0) GOTO bb_before_second_loop
1159                                 GOTO first-loop
1160
1161          first_loop:
1162          do {
1163          } while ...
1164
1165          bb_before_second_loop:
1166
1167          second_loop:
1168          do {
1169          } while ...
1170
1171          orig_exit_bb:
1172   */
1173
1174   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1175   bb_before_second_loop = split_edge (single_exit (first_loop));
1176
1177   /* Epilogue peeling.  */
1178   if (!update_first_loop_count)
1179     {
1180       pre_condition =
1181         fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1182                      build_int_cst (TREE_TYPE (first_niters), 0));
1183       if (check_profitability)
1184         {
1185           tree scalar_loop_iters
1186             = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1187                                         (loop_vec_info_for_loop (loop)));
1188           cost_pre_condition =
1189             fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1190                          build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1191
1192           pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1193                                        cost_pre_condition, pre_condition);
1194         }
1195       if (cond_expr)
1196         {
1197           pre_condition =
1198             fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1199                          pre_condition,
1200                          fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1201                                       cond_expr));
1202         }
1203     }
1204
1205   /* Prologue peeling.  */
1206   else
1207     {
1208       if (check_profitability)
1209         set_prologue_iterations (bb_before_first_loop, first_niters,
1210                                  loop, th);
1211
1212       pre_condition =
1213         fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1214                      build_int_cst (TREE_TYPE (first_niters), 0));
1215     }
1216
1217   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1218                                   cond_expr_stmt_list,
1219                                   bb_before_second_loop, bb_before_first_loop);
1220   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1221                                       first_loop == new_loop,
1222                                       &new_exit_bb, &definitions);
1223
1224
1225   /* 3. Add the guard that controls whether the second loop is executed.
1226         Resulting CFG would be:
1227
1228         bb_before_first_loop:
1229         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1230                                GOTO first-loop
1231
1232         first_loop:
1233         do {
1234         } while ...
1235
1236         bb_between_loops:
1237         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1238                                     GOTO bb_before_second_loop
1239
1240         bb_before_second_loop:
1241
1242         second_loop:
1243         do {
1244         } while ...
1245
1246         bb_after_second_loop:
1247
1248         orig_exit_bb:
1249    */
1250
1251   bb_between_loops = new_exit_bb;
1252   bb_after_second_loop = split_edge (single_exit (second_loop));
1253
1254   pre_condition =
1255         fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1256   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1257                                   bb_after_second_loop, bb_before_first_loop);
1258   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1259                                      second_loop == new_loop, &new_exit_bb);
1260
1261   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1262    */
1263   if (update_first_loop_count)
1264     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1265
1266   BITMAP_FREE (definitions);
1267   delete_update_ssa ();
1268
1269   return new_loop;
1270 }
1271
1272 /* Function vect_get_loop_location.
1273
1274    Extract the location of the loop in the source code.
1275    If the loop is not well formed for vectorization, an estimated
1276    location is calculated.
1277    Return the loop location if succeed and NULL if not.  */
1278
1279 LOC
1280 find_loop_location (struct loop *loop)
1281 {
1282   gimple stmt = NULL;
1283   basic_block bb;
1284   gimple_stmt_iterator si;
1285
1286   if (!loop)
1287     return UNKNOWN_LOC;
1288
1289   stmt = get_loop_exit_condition (loop);
1290
1291   if (stmt && gimple_location (stmt) != UNKNOWN_LOC)
1292     return gimple_location (stmt);
1293
1294   /* If we got here the loop is probably not "well formed",
1295      try to estimate the loop location */
1296
1297   if (!loop->header)
1298     return UNKNOWN_LOC;
1299
1300   bb = loop->header;
1301
1302   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1303     {
1304       stmt = gsi_stmt (si);
1305       if (gimple_location (stmt) != UNKNOWN_LOC)
1306         return gimple_location (stmt);
1307     }
1308
1309   return UNKNOWN_LOC;
1310 }
1311
1312
1313 /* This function builds ni_name = number of iterations loop executes
1314    on the loop preheader.  If SEQ is given the stmt is instead emitted
1315    there.  */
1316
1317 static tree
1318 vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
1319 {
1320   tree ni_name, var;
1321   gimple_seq stmts = NULL;
1322   edge pe;
1323   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1324   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1325
1326   var = create_tmp_var (TREE_TYPE (ni), "niters");
1327   add_referenced_var (var);
1328   ni_name = force_gimple_operand (ni, &stmts, false, var);
1329
1330   pe = loop_preheader_edge (loop);
1331   if (stmts)
1332     {
1333       if (seq)
1334         gimple_seq_add_seq (&seq, stmts);
1335       else
1336         {
1337           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1338           gcc_assert (!new_bb);
1339         }
1340     }
1341
1342   return ni_name;
1343 }
1344
1345
1346 /* This function generates the following statements:
1347
1348  ni_name = number of iterations loop executes
1349  ratio = ni_name / vf
1350  ratio_mult_vf_name = ratio * vf
1351
1352  and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
1353  if that is non-NULL.  */
1354
1355 static void
1356 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1357                                  tree *ni_name_ptr,
1358                                  tree *ratio_mult_vf_name_ptr,
1359                                  tree *ratio_name_ptr,
1360                                  gimple_seq cond_expr_stmt_list)
1361 {
1362
1363   edge pe;
1364   basic_block new_bb;
1365   gimple_seq stmts;
1366   tree ni_name;
1367   tree var;
1368   tree ratio_name;
1369   tree ratio_mult_vf_name;
1370   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1371   tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1372   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1373   tree log_vf;
1374
1375   pe = loop_preheader_edge (loop);
1376
1377   /* Generate temporary variable that contains
1378      number of iterations loop executes.  */
1379
1380   ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
1381   log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
1382
1383   /* Create: ratio = ni >> log2(vf) */
1384
1385   ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
1386   if (!is_gimple_val (ratio_name))
1387     {
1388       var = create_tmp_var (TREE_TYPE (ni), "bnd");
1389       add_referenced_var (var);
1390
1391       stmts = NULL;
1392       ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
1393       if (cond_expr_stmt_list)
1394         gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1395       else
1396         {
1397           pe = loop_preheader_edge (loop);
1398           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1399           gcc_assert (!new_bb);
1400         }
1401     }
1402
1403   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
1404
1405   ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
1406                                     ratio_name, log_vf);
1407   if (!is_gimple_val (ratio_mult_vf_name))
1408     {
1409       var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1410       add_referenced_var (var);
1411
1412       stmts = NULL;
1413       ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
1414                                                  true, var);
1415       if (cond_expr_stmt_list)
1416         gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1417       else
1418         {
1419           pe = loop_preheader_edge (loop);
1420           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1421           gcc_assert (!new_bb);
1422         }
1423     }
1424
1425   *ni_name_ptr = ni_name;
1426   *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1427   *ratio_name_ptr = ratio_name;
1428
1429   return;
1430 }
1431
1432 /* Function vect_can_advance_ivs_p
1433
1434    In case the number of iterations that LOOP iterates is unknown at compile
1435    time, an epilog loop will be generated, and the loop induction variables
1436    (IVs) will be "advanced" to the value they are supposed to take just before
1437    the epilog loop.  Here we check that the access function of the loop IVs
1438    and the expression that represents the loop bound are simple enough.
1439    These restrictions will be relaxed in the future.  */
1440
1441 bool
1442 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1443 {
1444   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1445   basic_block bb = loop->header;
1446   gimple phi;
1447   gimple_stmt_iterator gsi;
1448
1449   /* Analyze phi functions of the loop header.  */
1450
1451   if (vect_print_dump_info (REPORT_DETAILS))
1452     fprintf (vect_dump, "vect_can_advance_ivs_p:");
1453
1454   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1455     {
1456       tree access_fn = NULL;
1457       tree evolution_part;
1458
1459       phi = gsi_stmt (gsi);
1460       if (vect_print_dump_info (REPORT_DETAILS))
1461         {
1462           fprintf (vect_dump, "Analyze phi: ");
1463           print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1464         }
1465
1466       /* Skip virtual phi's. The data dependences that are associated with
1467          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1468
1469       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1470         {
1471           if (vect_print_dump_info (REPORT_DETAILS))
1472             fprintf (vect_dump, "virtual phi. skip.");
1473           continue;
1474         }
1475
1476       /* Skip reduction phis.  */
1477
1478       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1479         {
1480           if (vect_print_dump_info (REPORT_DETAILS))
1481             fprintf (vect_dump, "reduc phi. skip.");
1482           continue;
1483         }
1484
1485       /* Analyze the evolution function.  */
1486
1487       access_fn = instantiate_parameters
1488         (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1489
1490       if (!access_fn)
1491         {
1492           if (vect_print_dump_info (REPORT_DETAILS))
1493             fprintf (vect_dump, "No Access function.");
1494           return false;
1495         }
1496
1497       if (vect_print_dump_info (REPORT_DETAILS))
1498         {
1499           fprintf (vect_dump, "Access function of PHI: ");
1500           print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1501         }
1502
1503       evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1504
1505       if (evolution_part == NULL_TREE)
1506         {
1507           if (vect_print_dump_info (REPORT_DETAILS))
1508             fprintf (vect_dump, "No evolution.");
1509           return false;
1510         }
1511
1512       /* FORNOW: We do not transform initial conditions of IVs
1513          which evolution functions are a polynomial of degree >= 2.  */
1514
1515       if (tree_is_chrec (evolution_part))
1516         return false;
1517     }
1518
1519   return true;
1520 }
1521
1522
1523 /*   Function vect_update_ivs_after_vectorizer.
1524
1525      "Advance" the induction variables of LOOP to the value they should take
1526      after the execution of LOOP.  This is currently necessary because the
1527      vectorizer does not handle induction variables that are used after the
1528      loop.  Such a situation occurs when the last iterations of LOOP are
1529      peeled, because:
1530      1. We introduced new uses after LOOP for IVs that were not originally used
1531         after LOOP: the IVs of LOOP are now used by an epilog loop.
1532      2. LOOP is going to be vectorized; this means that it will iterate N/VF
1533         times, whereas the loop IVs should be bumped N times.
1534
1535      Input:
1536      - LOOP - a loop that is going to be vectorized. The last few iterations
1537               of LOOP were peeled.
1538      - NITERS - the number of iterations that LOOP executes (before it is
1539                 vectorized). i.e, the number of times the ivs should be bumped.
1540      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1541                   coming out from LOOP on which there are uses of the LOOP ivs
1542                   (this is the path from LOOP->exit to epilog_loop->preheader).
1543
1544                   The new definitions of the ivs are placed in LOOP->exit.
1545                   The phi args associated with the edge UPDATE_E in the bb
1546                   UPDATE_E->dest are updated accordingly.
1547
1548      Assumption 1: Like the rest of the vectorizer, this function assumes
1549      a single loop exit that has a single predecessor.
1550
1551      Assumption 2: The phi nodes in the LOOP header and in update_bb are
1552      organized in the same order.
1553
1554      Assumption 3: The access function of the ivs is simple enough (see
1555      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1556
1557      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1558      coming out of LOOP on which the ivs of LOOP are used (this is the path
1559      that leads to the epilog loop; other paths skip the epilog loop).  This
1560      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1561      needs to have its phis updated.
1562  */
1563
1564 static void
1565 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1566                                   edge update_e)
1567 {
1568   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1569   basic_block exit_bb = single_exit (loop)->dest;
1570   gimple phi, phi1;
1571   gimple_stmt_iterator gsi, gsi1;
1572   basic_block update_bb = update_e->dest;
1573
1574   /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1575
1576   /* Make sure there exists a single-predecessor exit bb:  */
1577   gcc_assert (single_pred_p (exit_bb));
1578
1579   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1580        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1581        gsi_next (&gsi), gsi_next (&gsi1))
1582     {
1583       tree access_fn = NULL;
1584       tree evolution_part;
1585       tree init_expr;
1586       tree step_expr, off;
1587       tree type;
1588       tree var, ni, ni_name;
1589       gimple_stmt_iterator last_gsi;
1590
1591       phi = gsi_stmt (gsi);
1592       phi1 = gsi_stmt (gsi1);
1593       if (vect_print_dump_info (REPORT_DETAILS))
1594         {
1595           fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
1596           print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1597         }
1598
1599       /* Skip virtual phi's.  */
1600       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1601         {
1602           if (vect_print_dump_info (REPORT_DETAILS))
1603             fprintf (vect_dump, "virtual phi. skip.");
1604           continue;
1605         }
1606
1607       /* Skip reduction phis.  */
1608       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1609         {
1610           if (vect_print_dump_info (REPORT_DETAILS))
1611             fprintf (vect_dump, "reduc phi. skip.");
1612           continue;
1613         }
1614
1615       access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
1616       gcc_assert (access_fn);
1617       /* We can end up with an access_fn like
1618            (short int) {(short unsigned int) i_49, +, 1}_1
1619          for further analysis we need to strip the outer cast but we
1620          need to preserve the original type.  */
1621       type = TREE_TYPE (access_fn);
1622       STRIP_NOPS (access_fn);
1623       evolution_part =
1624          unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
1625       gcc_assert (evolution_part != NULL_TREE);
1626
1627       /* FORNOW: We do not support IVs whose evolution function is a polynomial
1628          of degree >= 2 or exponential.  */
1629       gcc_assert (!tree_is_chrec (evolution_part));
1630
1631       step_expr = evolution_part;
1632       init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1633                                                                loop->num));
1634       init_expr = fold_convert (type, init_expr);
1635
1636       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1637                          fold_convert (TREE_TYPE (step_expr), niters),
1638                          step_expr);
1639       if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
1640         ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
1641                           init_expr,
1642                           fold_convert (sizetype, off));
1643       else
1644         ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
1645                           init_expr,
1646                           fold_convert (TREE_TYPE (init_expr), off));
1647
1648       var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
1649       add_referenced_var (var);
1650
1651       last_gsi = gsi_last_bb (exit_bb);
1652       ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1653                                           true, GSI_SAME_STMT);
1654
1655       /* Fix phi expressions in the successor bb.  */
1656       SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
1657     }
1658 }
1659
1660 /* Return the more conservative threshold between the
1661    min_profitable_iters returned by the cost model and the user
1662    specified threshold, if provided.  */
1663
1664 static unsigned int
1665 conservative_cost_threshold (loop_vec_info loop_vinfo,
1666                              int min_profitable_iters)
1667 {
1668   unsigned int th;
1669   int min_scalar_loop_bound;
1670
1671   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1672                             * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
1673
1674   /* Use the cost model only if it is more conservative than user specified
1675      threshold.  */
1676   th = (unsigned) min_scalar_loop_bound;
1677   if (min_profitable_iters
1678       && (!min_scalar_loop_bound
1679           || min_profitable_iters > min_scalar_loop_bound))
1680     th = (unsigned) min_profitable_iters;
1681
1682   if (th && vect_print_dump_info (REPORT_COST))
1683     fprintf (vect_dump, "Profitability threshold is %u loop iterations.", th);
1684
1685   return th;
1686 }
1687
1688 /* Function vect_do_peeling_for_loop_bound
1689
1690    Peel the last iterations of the loop represented by LOOP_VINFO.
1691    The peeled iterations form a new epilog loop.  Given that the loop now
1692    iterates NITERS times, the new epilog loop iterates
1693    NITERS % VECTORIZATION_FACTOR times.
1694
1695    The original loop will later be made to iterate
1696    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1697
1698    COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1699    test.  */
1700
1701 void
1702 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1703                                 tree cond_expr, gimple_seq cond_expr_stmt_list)
1704 {
1705   tree ni_name, ratio_mult_vf_name;
1706   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1707   struct loop *new_loop;
1708   edge update_e;
1709   basic_block preheader;
1710   int loop_num;
1711   bool check_profitability = false;
1712   unsigned int th = 0;
1713   int min_profitable_iters;
1714
1715   if (vect_print_dump_info (REPORT_DETAILS))
1716     fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
1717
1718   initialize_original_copy_tables ();
1719
1720   /* Generate the following variables on the preheader of original loop:
1721
1722      ni_name = number of iteration the original loop executes
1723      ratio = ni_name / vf
1724      ratio_mult_vf_name = ratio * vf  */
1725   vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1726                                    &ratio_mult_vf_name, ratio,
1727                                    cond_expr_stmt_list);
1728
1729   loop_num  = loop->num;
1730
1731   /* If cost model check not done during versioning and
1732      peeling for alignment.  */
1733   if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
1734       && !LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)
1735       && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)
1736       && !cond_expr)
1737     {
1738       check_profitability = true;
1739
1740       /* Get profitability threshold for vectorized loop.  */
1741       min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
1742
1743       th = conservative_cost_threshold (loop_vinfo,
1744                                         min_profitable_iters);
1745     }
1746
1747   new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
1748                                             ratio_mult_vf_name, ni_name, false,
1749                                             th, check_profitability,
1750                                             cond_expr, cond_expr_stmt_list);
1751   gcc_assert (new_loop);
1752   gcc_assert (loop_num == loop->num);
1753 #ifdef ENABLE_CHECKING
1754   slpeel_verify_cfg_after_peeling (loop, new_loop);
1755 #endif
1756
1757   /* A guard that controls whether the new_loop is to be executed or skipped
1758      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
1759      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
1760      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
1761      is on the path where the LOOP IVs are used and need to be updated.  */
1762
1763   preheader = loop_preheader_edge (new_loop)->src;
1764   if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1765     update_e = EDGE_PRED (preheader, 0);
1766   else
1767     update_e = EDGE_PRED (preheader, 1);
1768
1769   /* Update IVs of original loop as if they were advanced
1770      by ratio_mult_vf_name steps.  */
1771   vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1772
1773   /* After peeling we have to reset scalar evolution analyzer.  */
1774   scev_reset ();
1775
1776   free_original_copy_tables ();
1777 }
1778
1779
1780 /* Function vect_gen_niters_for_prolog_loop
1781
1782    Set the number of iterations for the loop represented by LOOP_VINFO
1783    to the minimum between LOOP_NITERS (the original iteration count of the loop)
1784    and the misalignment of DR - the data reference recorded in
1785    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
1786    this loop, the data reference DR will refer to an aligned location.
1787
1788    The following computation is generated:
1789
1790    If the misalignment of DR is known at compile time:
1791      addr_mis = int mis = DR_MISALIGNMENT (dr);
1792    Else, compute address misalignment in bytes:
1793      addr_mis = addr & (vectype_size - 1)
1794
1795    prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1796
1797    (elem_size = element type size; an element is the scalar element whose type
1798    is the inner type of the vectype)
1799
1800    When the step of the data-ref in the loop is not 1 (as in interleaved data
1801    and SLP), the number of iterations of the prolog must be divided by the step
1802    (which is equal to the size of interleaved group).
1803
1804    The above formulas assume that VF == number of elements in the vector. This
1805    may not hold when there are multiple-types in the loop.
1806    In this case, for some data-references in the loop the VF does not represent
1807    the number of elements that fit in the vector.  Therefore, instead of VF we
1808    use TYPE_VECTOR_SUBPARTS.  */
1809
1810 static tree
1811 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
1812 {
1813   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1814   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1815   tree var;
1816   gimple_seq stmts;
1817   tree iters, iters_name;
1818   edge pe;
1819   basic_block new_bb;
1820   gimple dr_stmt = DR_STMT (dr);
1821   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1822   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1823   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1824   tree niters_type = TREE_TYPE (loop_niters);
1825   int step = 1;
1826   int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1827   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1828
1829   if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1830     step = DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1831
1832   pe = loop_preheader_edge (loop);
1833
1834   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1835     {
1836       int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
1837       int elem_misalign = byte_misalign / element_size;
1838
1839       if (vect_print_dump_info (REPORT_DETAILS))
1840         fprintf (vect_dump, "known alignment = %d.", byte_misalign);
1841
1842       iters = build_int_cst (niters_type,
1843                      (((nelements - elem_misalign) & (nelements - 1)) / step));
1844     }
1845   else
1846     {
1847       gimple_seq new_stmts = NULL;
1848       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1849                                                 &new_stmts, NULL_TREE, loop);
1850       tree ptr_type = TREE_TYPE (start_addr);
1851       tree size = TYPE_SIZE (ptr_type);
1852       tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
1853       tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
1854       tree elem_size_log =
1855         build_int_cst (type, exact_log2 (vectype_align/nelements));
1856       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1857       tree nelements_tree = build_int_cst (type, nelements);
1858       tree byte_misalign;
1859       tree elem_misalign;
1860
1861       new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1862       gcc_assert (!new_bb);
1863
1864       /* Create:  byte_misalign = addr & (vectype_size - 1)  */
1865       byte_misalign =
1866         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
1867
1868       /* Create:  elem_misalign = byte_misalign / element_size  */
1869       elem_misalign =
1870         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1871
1872       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
1873       iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1874       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1875       iters = fold_convert (niters_type, iters);
1876     }
1877
1878   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
1879   /* If the loop bound is known at compile time we already verified that it is
1880      greater than vf; since the misalignment ('iters') is at most vf, there's
1881      no need to generate the MIN_EXPR in this case.  */
1882   if (TREE_CODE (loop_niters) != INTEGER_CST)
1883     iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1884
1885   if (vect_print_dump_info (REPORT_DETAILS))
1886     {
1887       fprintf (vect_dump, "niters for prolog loop: ");
1888       print_generic_expr (vect_dump, iters, TDF_SLIM);
1889     }
1890
1891   var = create_tmp_var (niters_type, "prolog_loop_niters");
1892   add_referenced_var (var);
1893   stmts = NULL;
1894   iters_name = force_gimple_operand (iters, &stmts, false, var);
1895
1896   /* Insert stmt on loop preheader edge.  */
1897   if (stmts)
1898     {
1899       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1900       gcc_assert (!new_bb);
1901     }
1902
1903   return iters_name;
1904 }
1905
1906
1907 /* Function vect_update_init_of_dr
1908
1909    NITERS iterations were peeled from LOOP.  DR represents a data reference
1910    in LOOP.  This function updates the information recorded in DR to
1911    account for the fact that the first NITERS iterations had already been
1912    executed.  Specifically, it updates the OFFSET field of DR.  */
1913
1914 static void
1915 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1916 {
1917   tree offset = DR_OFFSET (dr);
1918
1919   niters = fold_build2 (MULT_EXPR, sizetype,
1920                         fold_convert (sizetype, niters),
1921                         fold_convert (sizetype, DR_STEP (dr)));
1922   offset = fold_build2 (PLUS_EXPR, sizetype,
1923                         fold_convert (sizetype, offset), niters);
1924   DR_OFFSET (dr) = offset;
1925 }
1926
1927
1928 /* Function vect_update_inits_of_drs
1929
1930    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1931    This function updates the information recorded for the data references in
1932    the loop to account for the fact that the first NITERS iterations had
1933    already been executed.  Specifically, it updates the initial_condition of
1934    the access_function of all the data_references in the loop.  */
1935
1936 static void
1937 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1938 {
1939   unsigned int i;
1940   VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1941   struct data_reference *dr;
1942
1943   if (vect_print_dump_info (REPORT_DETAILS))
1944     fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
1945
1946   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1947     vect_update_init_of_dr (dr, niters);
1948 }
1949
1950
1951 /* Function vect_do_peeling_for_alignment
1952
1953    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
1954    'niters' is set to the misalignment of one of the data references in the
1955    loop, thereby forcing it to refer to an aligned location at the beginning
1956    of the execution of this loop.  The data reference for which we are
1957    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
1958
1959 void
1960 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
1961 {
1962   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1963   tree niters_of_prolog_loop, ni_name;
1964   tree n_iters;
1965   struct loop *new_loop;
1966   unsigned int th = 0;
1967   int min_profitable_iters;
1968
1969   if (vect_print_dump_info (REPORT_DETAILS))
1970     fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
1971
1972   initialize_original_copy_tables ();
1973
1974   ni_name = vect_build_loop_niters (loop_vinfo, NULL);
1975   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
1976
1977
1978   /* Get profitability threshold for vectorized loop.  */
1979   min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
1980   th = conservative_cost_threshold (loop_vinfo,
1981                                     min_profitable_iters);
1982
1983   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
1984   new_loop =
1985     slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
1986                                    niters_of_prolog_loop, ni_name, true,
1987                                    th, true, NULL_TREE, NULL);
1988
1989   gcc_assert (new_loop);
1990 #ifdef ENABLE_CHECKING
1991   slpeel_verify_cfg_after_peeling (new_loop, loop);
1992 #endif
1993
1994   /* Update number of times loop executes.  */
1995   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
1996   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
1997                 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
1998
1999   /* Update the init conditions of the access functions of all data refs.  */
2000   vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
2001
2002   /* After peeling we have to reset scalar evolution analyzer.  */
2003   scev_reset ();
2004
2005   free_original_copy_tables ();
2006 }
2007
2008
2009 /* Function vect_create_cond_for_align_checks.
2010
2011    Create a conditional expression that represents the alignment checks for
2012    all of data references (array element references) whose alignment must be
2013    checked at runtime.
2014
2015    Input:
2016    COND_EXPR  - input conditional expression.  New conditions will be chained
2017                 with logical AND operation.
2018    LOOP_VINFO - two fields of the loop information are used.
2019                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2020                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2021
2022    Output:
2023    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2024                          expression.
2025    The returned value is the conditional expression to be used in the if
2026    statement that controls which version of the loop gets executed at runtime.
2027
2028    The algorithm makes two assumptions:
2029      1) The number of bytes "n" in a vector is a power of 2.
2030      2) An address "a" is aligned if a%n is zero and that this
2031         test can be done as a&(n-1) == 0.  For example, for 16
2032         byte vectors the test is a&0xf == 0.  */
2033
2034 static void
2035 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2036                                    tree *cond_expr,
2037                                    gimple_seq *cond_expr_stmt_list)
2038 {
2039   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2040   VEC(gimple,heap) *may_misalign_stmts
2041     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2042   gimple ref_stmt;
2043   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2044   tree mask_cst;
2045   unsigned int i;
2046   tree psize;
2047   tree int_ptrsize_type;
2048   char tmp_name[20];
2049   tree or_tmp_name = NULL_TREE;
2050   tree and_tmp, and_tmp_name;
2051   gimple and_stmt;
2052   tree ptrsize_zero;
2053   tree part_cond_expr;
2054
2055   /* Check that mask is one less than a power of 2, i.e., mask is
2056      all zeros followed by all ones.  */
2057   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2058
2059   /* CHECKME: what is the best integer or unsigned type to use to hold a
2060      cast from a pointer value?  */
2061   psize = TYPE_SIZE (ptr_type_node);
2062   int_ptrsize_type
2063     = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
2064
2065   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2066      of the first vector of the i'th data reference. */
2067
2068   for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, ref_stmt); i++)
2069     {
2070       gimple_seq new_stmt_list = NULL;
2071       tree addr_base;
2072       tree addr_tmp, addr_tmp_name;
2073       tree or_tmp, new_or_tmp_name;
2074       gimple addr_stmt, or_stmt;
2075
2076       /* create: addr_tmp = (int)(address_of_first_vector) */
2077       addr_base =
2078         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2079                                               NULL_TREE, loop);
2080       if (new_stmt_list != NULL)
2081         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2082
2083       sprintf (tmp_name, "%s%d", "addr2int", i);
2084       addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2085       add_referenced_var (addr_tmp);
2086       addr_tmp_name = make_ssa_name (addr_tmp, NULL);
2087       addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2088                                                 addr_base, NULL_TREE);
2089       SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
2090       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2091
2092       /* The addresses are OR together.  */
2093
2094       if (or_tmp_name != NULL_TREE)
2095         {
2096           /* create: or_tmp = or_tmp | addr_tmp */
2097           sprintf (tmp_name, "%s%d", "orptrs", i);
2098           or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2099           add_referenced_var (or_tmp);
2100           new_or_tmp_name = make_ssa_name (or_tmp, NULL);
2101           or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2102                                                   new_or_tmp_name,
2103                                                   or_tmp_name, addr_tmp_name);
2104           SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
2105           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2106           or_tmp_name = new_or_tmp_name;
2107         }
2108       else
2109         or_tmp_name = addr_tmp_name;
2110
2111     } /* end for i */
2112
2113   mask_cst = build_int_cst (int_ptrsize_type, mask);
2114
2115   /* create: and_tmp = or_tmp & mask  */
2116   and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
2117   add_referenced_var (and_tmp);
2118   and_tmp_name = make_ssa_name (and_tmp, NULL);
2119
2120   and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2121                                            or_tmp_name, mask_cst);
2122   SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
2123   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2124
2125   /* Make and_tmp the left operand of the conditional test against zero.
2126      if and_tmp has a nonzero bit then some address is unaligned.  */
2127   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2128   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2129                                 and_tmp_name, ptrsize_zero);
2130   if (*cond_expr)
2131     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2132                               *cond_expr, part_cond_expr);
2133   else
2134     *cond_expr = part_cond_expr;
2135 }
2136
2137
2138 /* Function vect_vfa_segment_size.
2139
2140    Create an expression that computes the size of segment
2141    that will be accessed for a data reference.  The functions takes into
2142    account that realignment loads may access one more vector.
2143
2144    Input:
2145      DR: The data reference.
2146      VECT_FACTOR: vectorization factor.
2147
2148    Return an expression whose value is the size of segment which will be
2149    accessed by DR.  */
2150
2151 static tree
2152 vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
2153 {
2154   tree segment_length = fold_build2 (MULT_EXPR, integer_type_node,
2155                                      DR_STEP (dr), vect_factor);
2156
2157   if (vect_supportable_dr_alignment (dr) == dr_explicit_realign_optimized)
2158     {
2159       tree vector_size = TYPE_SIZE_UNIT
2160                           (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2161
2162       segment_length = fold_build2 (PLUS_EXPR, integer_type_node,
2163                                     segment_length, vector_size);
2164     }
2165   return fold_convert (sizetype, segment_length);
2166 }
2167
2168
2169 /* Function vect_create_cond_for_alias_checks.
2170
2171    Create a conditional expression that represents the run-time checks for
2172    overlapping of address ranges represented by a list of data references
2173    relations passed as input.
2174
2175    Input:
2176    COND_EXPR  - input conditional expression.  New conditions will be chained
2177                 with logical AND operation.
2178    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2179                 to be checked.
2180
2181    Output:
2182    COND_EXPR - conditional expression.
2183    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2184                          expression.
2185
2186
2187    The returned value is the conditional expression to be used in the if
2188    statement that controls which version of the loop gets executed at runtime.
2189 */
2190
2191 static void
2192 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
2193                                    tree * cond_expr,
2194                                    gimple_seq * cond_expr_stmt_list)
2195 {
2196   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2197   VEC (ddr_p, heap) * may_alias_ddrs =
2198     LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2199   tree vect_factor =
2200     build_int_cst (integer_type_node, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2201
2202   ddr_p ddr;
2203   unsigned int i;
2204   tree part_cond_expr;
2205
2206   /* Create expression
2207      ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
2208      || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
2209      &&
2210      ...
2211      &&
2212      ((store_ptr_n + store_segment_length_n) < load_ptr_n)
2213      || (load_ptr_n + load_segment_length_n) < store_ptr_n))  */
2214
2215   if (VEC_empty (ddr_p, may_alias_ddrs))
2216     return;
2217
2218   for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
2219     {
2220       struct data_reference *dr_a, *dr_b;
2221       gimple dr_group_first_a, dr_group_first_b;
2222       tree addr_base_a, addr_base_b;
2223       tree segment_length_a, segment_length_b;
2224       gimple stmt_a, stmt_b;
2225
2226       dr_a = DDR_A (ddr);
2227       stmt_a = DR_STMT (DDR_A (ddr));
2228       dr_group_first_a = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a));
2229       if (dr_group_first_a)
2230         {
2231           stmt_a = dr_group_first_a;
2232           dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2233         }
2234
2235       dr_b = DDR_B (ddr);
2236       stmt_b = DR_STMT (DDR_B (ddr));
2237       dr_group_first_b = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b));
2238       if (dr_group_first_b)
2239         {
2240           stmt_b = dr_group_first_b;
2241           dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2242         }
2243
2244       addr_base_a =
2245         vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
2246                                               NULL_TREE, loop);
2247       addr_base_b =
2248         vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
2249                                               NULL_TREE, loop);
2250
2251       segment_length_a = vect_vfa_segment_size (dr_a, vect_factor);
2252       segment_length_b = vect_vfa_segment_size (dr_b, vect_factor);
2253
2254       if (vect_print_dump_info (REPORT_DR_DETAILS))
2255         {
2256           fprintf (vect_dump,
2257                    "create runtime check for data references ");
2258           print_generic_expr (vect_dump, DR_REF (dr_a), TDF_SLIM);
2259           fprintf (vect_dump, " and ");
2260           print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
2261         }
2262
2263
2264       part_cond_expr =
2265         fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2266           fold_build2 (LT_EXPR, boolean_type_node,
2267             fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a),
2268               addr_base_a,
2269               segment_length_a),
2270             addr_base_b),
2271           fold_build2 (LT_EXPR, boolean_type_node,
2272             fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
2273               addr_base_b,
2274               segment_length_b),
2275             addr_base_a));
2276
2277       if (*cond_expr)
2278         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2279                                   *cond_expr, part_cond_expr);
2280       else
2281         *cond_expr = part_cond_expr;
2282     }
2283
2284   if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
2285     fprintf (vect_dump, "created %u versioning for alias checks.\n",
2286              VEC_length (ddr_p, may_alias_ddrs));
2287 }
2288
2289
2290 /* Function vect_loop_versioning.
2291
2292    If the loop has data references that may or may not be aligned or/and
2293    has data reference relations whose independence was not proven then
2294    two versions of the loop need to be generated, one which is vectorized
2295    and one which isn't.  A test is then generated to control which of the
2296    loops is executed.  The test checks for the alignment of all of the
2297    data references that may or may not be aligned.  An additional
2298    sequence of runtime tests is generated for each pairs of DDRs whose
2299    independence was not proven.  The vectorized version of loop is
2300    executed only if both alias and alignment tests are passed.
2301
2302    The test generated to check which version of loop is executed
2303    is modified to also check for profitability as indicated by the
2304    cost model initially.
2305
2306    The versioning precondition(s) are placed in *COND_EXPR and
2307    *COND_EXPR_STMT_LIST.  If DO_VERSIONING is true versioning is
2308    also performed, otherwise only the conditions are generated.  */
2309
2310 void
2311 vect_loop_versioning (loop_vec_info loop_vinfo, bool do_versioning,
2312                       tree *cond_expr, gimple_seq *cond_expr_stmt_list)
2313 {
2314   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2315   struct loop *nloop;
2316   basic_block condition_bb;
2317   gimple_stmt_iterator gsi, cond_exp_gsi;
2318   basic_block merge_bb;
2319   basic_block new_exit_bb;
2320   edge new_exit_e, e;
2321   gimple orig_phi, new_phi;
2322   tree arg;
2323   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2324   gimple_seq gimplify_stmt_list = NULL;
2325   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2326   int min_profitable_iters = 0;
2327   unsigned int th;
2328
2329   /* Get profitability threshold for vectorized loop.  */
2330   min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
2331
2332   th = conservative_cost_threshold (loop_vinfo,
2333                                     min_profitable_iters);
2334
2335   *cond_expr =
2336     fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2337                  build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2338
2339   *cond_expr = force_gimple_operand (*cond_expr, cond_expr_stmt_list,
2340                                      false, NULL_TREE);
2341
2342   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2343       vect_create_cond_for_align_checks (loop_vinfo, cond_expr,
2344                                          cond_expr_stmt_list);
2345
2346   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2347     vect_create_cond_for_alias_checks (loop_vinfo, cond_expr,
2348                                        cond_expr_stmt_list);
2349
2350   *cond_expr =
2351     fold_build2 (NE_EXPR, boolean_type_node, *cond_expr, integer_zero_node);
2352   *cond_expr =
2353     force_gimple_operand (*cond_expr, &gimplify_stmt_list, true, NULL_TREE);
2354   gimple_seq_add_seq (cond_expr_stmt_list, gimplify_stmt_list);
2355
2356   /* If we only needed the extra conditions and a new loop copy
2357      bail out here.  */
2358   if (!do_versioning)
2359     return;
2360
2361   initialize_original_copy_tables ();
2362   nloop = loop_version (loop, *cond_expr, &condition_bb,
2363                         prob, prob, REG_BR_PROB_BASE - prob, true);
2364   free_original_copy_tables();
2365
2366   /* Loop versioning violates an assumption we try to maintain during
2367      vectorization - that the loop exit block has a single predecessor.
2368      After versioning, the exit block of both loop versions is the same
2369      basic block (i.e. it has two predecessors). Just in order to simplify
2370      following transformations in the vectorizer, we fix this situation
2371      here by adding a new (empty) block on the exit-edge of the loop,
2372      with the proper loop-exit phis to maintain loop-closed-form.  */
2373
2374   merge_bb = single_exit (loop)->dest;
2375   gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2376   new_exit_bb = split_edge (single_exit (loop));
2377   new_exit_e = single_exit (loop);
2378   e = EDGE_SUCC (new_exit_bb, 0);
2379
2380   for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2381     {
2382       orig_phi = gsi_stmt (gsi);
2383       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
2384                                   new_exit_bb);
2385       arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2386       add_phi_arg (new_phi, arg, new_exit_e);
2387       SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
2388     }
2389
2390   /* End loop-exit-fixes after versioning.  */
2391
2392   update_ssa (TODO_update_ssa);
2393   if (*cond_expr_stmt_list)
2394     {
2395       cond_exp_gsi = gsi_last_bb (condition_bb);
2396       gsi_insert_seq_before (&cond_exp_gsi, *cond_expr_stmt_list,
2397                              GSI_SAME_STMT);
2398       *cond_expr_stmt_list = NULL;
2399     }
2400   *cond_expr = NULL_TREE;
2401 }
2402