gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
   3    Foundation, Inc.
   4    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   5    and Ira Rosen <irar@il.ibm.com>
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 3, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "tm.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "basic-block.h"
  30 #include "diagnostic.h"
  31 #include "tree-flow.h"
  32 #include "tree-dump.h"
  33 #include "cfgloop.h"
  34 #include "cfglayout.h"
  35 #include "expr.h"
  36 #include "toplev.h"
  37 #include "tree-scalar-evolution.h"
  38 #include "tree-vectorizer.h"
  39 #include "langhooks.h"
  40
  41 /*************************************************************************
  42   Simple Loop Peeling Utilities
  43
  44   Utilities to support loop peeling for vectorization purposes.
  45  *************************************************************************/
  46
  47
  48 /* Renames the use *OP_P.  */
  49
  50 static void
  51 rename_use_op (use_operand_p op_p)
  52 {
  53   tree new_name;
  54
  55   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  56     return;
  57
  58   new_name = get_current_def (USE_FROM_PTR (op_p));
  59
  60   /* Something defined outside of the loop.  */
  61   if (!new_name)
  62     return;
  63
  64   /* An ordinary ssa name defined in the loop.  */
  65
  66   SET_USE (op_p, new_name);
  67 }
  68
  69
  70 /* Renames the variables in basic block BB.  */
  71
  72 void
  73 rename_variables_in_bb (basic_block bb)
  74 {
  75   gimple_stmt_iterator gsi;
  76   gimple stmt;
  77   use_operand_p use_p;
  78   ssa_op_iter iter;
  79   edge e;
  80   edge_iterator ei;
  81   struct loop *loop = bb->loop_father;
  82
  83   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  84     {
  85       stmt = gsi_stmt (gsi);
  86       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
  87         rename_use_op (use_p);
  88     }
  89
  90   FOR_EACH_EDGE (e, ei, bb->succs)
  91     {
  92       if (!flow_bb_inside_loop_p (loop, e->dest))
  93         continue;
  94       for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
  95         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
  96     }
  97 }
  98
  99
 100 /* Renames variables in new generated LOOP.  */
 101
 102 void
 103 rename_variables_in_loop (struct loop *loop)
 104 {
 105   unsigned i;
 106   basic_block *bbs;
 107
 108   bbs = get_loop_body (loop);
 109
 110   for (i = 0; i < loop->num_nodes; i++)
 111     rename_variables_in_bb (bbs[i]);
 112
 113   free (bbs);
 114 }
 115
 116
 117 /* Update the PHI nodes of NEW_LOOP.
 118
 119    NEW_LOOP is a duplicate of ORIG_LOOP.
 120    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 121    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 122    executes before it.  */
 123
 124 static void
 125 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 126                                        struct loop *new_loop, bool after)
 127 {
 128   tree new_ssa_name;
 129   gimple phi_new, phi_orig;
 130   tree def;
 131   edge orig_loop_latch = loop_latch_edge (orig_loop);
 132   edge orig_entry_e = loop_preheader_edge (orig_loop);
 133   edge new_loop_exit_e = single_exit (new_loop);
 134   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 135   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 136   gimple_stmt_iterator gsi_new, gsi_orig;
 137
 138   /*
 139      step 1. For each loop-header-phi:
 140              Add the first phi argument for the phi in NEW_LOOP
 141             (the one associated with the entry of NEW_LOOP)
 142
 143      step 2. For each loop-header-phi:
 144              Add the second phi argument for the phi in NEW_LOOP
 145             (the one associated with the latch of NEW_LOOP)
 146
 147      step 3. Update the phis in the successor block of NEW_LOOP.
 148
 149         case 1: NEW_LOOP was placed before ORIG_LOOP:
 150                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 151                 Updating the phis in the successor block can therefore be done
 152                 along with the scanning of the loop header phis, because the
 153                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 154                 phi nodes, organized in the same order.
 155
 156         case 2: NEW_LOOP was placed after ORIG_LOOP:
 157                 The successor block of NEW_LOOP is the original exit block of
 158                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 159                 We postpone updating these phis to a later stage (when
 160                 loop guards are added).
 161    */
 162
 163
 164   /* Scan the phis in the headers of the old and new loops
 165      (they are organized in exactly the same order).  */
 166
 167   for (gsi_new = gsi_start_phis (new_loop->header),
 168        gsi_orig = gsi_start_phis (orig_loop->header);
 169        !gsi_end_p (gsi_new) && !gsi_end_p (gsi_orig);
 170        gsi_next (&gsi_new), gsi_next (&gsi_orig))
 171     {
 172       source_location locus;
 173       phi_new = gsi_stmt (gsi_new);
 174       phi_orig = gsi_stmt (gsi_orig);
 175
 176       /* step 1.  */
 177       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 178       locus = gimple_phi_arg_location_from_edge (phi_orig, entry_arg_e);
 179       add_phi_arg (phi_new, def, new_loop_entry_e, locus);
 180
 181       /* step 2.  */
 182       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 183       locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
 184       if (TREE_CODE (def) != SSA_NAME)
 185         continue;
 186
 187       new_ssa_name = get_current_def (def);
 188       if (!new_ssa_name)
 189         {
 190           /* This only happens if there are no definitions
 191              inside the loop. use the phi_result in this case.  */
 192           new_ssa_name = PHI_RESULT (phi_new);
 193         }
 194
 195       /* An ordinary ssa name defined in the loop.  */
 196       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
 197
 198       /* step 3 (case 1).  */
 199       if (!after)
 200         {
 201           gcc_assert (new_loop_exit_e == orig_entry_e);
 202           SET_PHI_ARG_DEF (phi_orig,
 203                            new_loop_exit_e->dest_idx,
 204                            new_ssa_name);
 205         }
 206     }
 207 }
 208
 209
 210 /* Update PHI nodes for a guard of the LOOP.
 211
 212    Input:
 213    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 214         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 215         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 216         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 217         We denote this bb NEW_MERGE_BB because before the guard code was added
 218         it had a single predecessor (the LOOP header), and now it became a merge
 219         point of two paths - the path that ends with the LOOP exit-edge, and
 220         the path that ends with GUARD_EDGE.
 221    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 222         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 223
 224    ===> The CFG before the guard-code was added:
 225         LOOP_header_bb:
 226           loop_body
 227           if (exit_loop) goto update_bb
 228           else           goto LOOP_header_bb
 229         update_bb:
 230
 231    ==> The CFG after the guard-code was added:
 232         guard_bb:
 233           if (LOOP_guard_condition) goto new_merge_bb
 234           else                      goto LOOP_header_bb
 235         LOOP_header_bb:
 236           loop_body
 237           if (exit_loop_condition) goto new_merge_bb
 238           else                     goto LOOP_header_bb
 239         new_merge_bb:
 240           goto update_bb
 241         update_bb:
 242
 243    ==> The CFG after this function:
 244         guard_bb:
 245           if (LOOP_guard_condition) goto new_merge_bb
 246           else                      goto LOOP_header_bb
 247         LOOP_header_bb:
 248           loop_body
 249           if (exit_loop_condition) goto new_exit_bb
 250           else                     goto LOOP_header_bb
 251         new_exit_bb:
 252         new_merge_bb:
 253           goto update_bb
 254         update_bb:
 255
 256    This function:
 257    1. creates and updates the relevant phi nodes to account for the new
 258       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 259       1.1. Create phi nodes at NEW_MERGE_BB.
 260       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 261            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 262    2. preserves loop-closed-ssa-form by creating the required phi nodes
 263       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 264
 265    There are two flavors to this function:
 266
 267    slpeel_update_phi_nodes_for_guard1:
 268      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 269      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 270      for variables that have phis in the loop header.
 271
 272    slpeel_update_phi_nodes_for_guard2:
 273      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 274      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 275      for variables that have phis in the loop exit.
 276
 277    I.E., the overall structure is:
 278
 279         loop1_preheader_bb:
 280                 guard1 (goto loop1/merge1_bb)
 281         loop1
 282         loop1_exit_bb:
 283                 guard2 (goto merge1_bb/merge2_bb)
 284         merge1_bb
 285         loop2
 286         loop2_exit_bb
 287         merge2_bb
 288         next_bb
 289
 290    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 291    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 292    that have phis in loop1->header).
 293
 294    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 295    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 296    that have phis in next_bb). It also adds some of these phis to
 297    loop1_exit_bb.
 298
 299    slpeel_update_phi_nodes_for_guard1 is always called before
 300    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 301    to create correct data-flow and loop-closed-ssa-form.
 302
 303    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 304    that change between iterations of a loop (and therefore have a phi-node
 305    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 306    phis for variables that are used out of the loop (and therefore have
 307    loop-closed exit phis). Some variables may be both updated between
 308    iterations and used after the loop. This is why in loop1_exit_bb we
 309    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 310    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 311
 312    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 313      an original loop. i.e., we have:
 314
 315            orig_loop
 316            guard_bb (goto LOOP/new_merge)
 317            new_loop <-- LOOP
 318            new_exit
 319            new_merge
 320            next_bb
 321
 322      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 323      have:
 324
 325            new_loop
 326            guard_bb (goto LOOP/new_merge)
 327            orig_loop <-- LOOP
 328            new_exit
 329            new_merge
 330            next_bb
 331
 332      The SSA names defined in the original loop have a current
 333      reaching definition that that records the corresponding new
 334      ssa-name used in the new duplicated loop copy.
 335   */
 336
 337 /* Function slpeel_update_phi_nodes_for_guard1
 338
 339    Input:
 340    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 341    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 342             information.
 343
 344    In the context of the overall structure, we have:
 345
 346         loop1_preheader_bb:
 347                 guard1 (goto loop1/merge1_bb)
 348 LOOP->  loop1
 349         loop1_exit_bb:
 350                 guard2 (goto merge1_bb/merge2_bb)
 351         merge1_bb
 352         loop2
 353         loop2_exit_bb
 354         merge2_bb
 355         next_bb
 356
 357    For each name updated between loop iterations (i.e - for each name that has
 358    an entry (loop-header) phi in LOOP) we create a new phi in:
 359    1. merge1_bb (to account for the edge from guard1)
 360    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 361 */
 362
 363 static void
 364 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 365                                     bool is_new_loop, basic_block *new_exit_bb,
 366                                     bitmap *defs)
 367 {
 368   gimple orig_phi, new_phi;
 369   gimple update_phi, update_phi2;
 370   tree guard_arg, loop_arg;
 371   basic_block new_merge_bb = guard_edge->dest;
 372   edge e = EDGE_SUCC (new_merge_bb, 0);
 373   basic_block update_bb = e->dest;
 374   basic_block orig_bb = loop->header;
 375   edge new_exit_e;
 376   tree current_new_name;
 377   gimple_stmt_iterator gsi_orig, gsi_update;
 378
 379   /* Create new bb between loop and new_merge_bb.  */
 380   *new_exit_bb = split_edge (single_exit (loop));
 381
 382   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 383
 384   for (gsi_orig = gsi_start_phis (orig_bb),
 385        gsi_update = gsi_start_phis (update_bb);
 386        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
 387        gsi_next (&gsi_orig), gsi_next (&gsi_update))
 388     {
 389       source_location loop_locus, guard_locus;;
 390       orig_phi = gsi_stmt (gsi_orig);
 391       update_phi = gsi_stmt (gsi_update);
 392
 393       /** 1. Handle new-merge-point phis  **/
 394
 395       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 396       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 397                                  new_merge_bb);
 398
 399       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 400             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 401       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 402       loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
 403                                                       EDGE_SUCC (loop->latch,
 404                                                                  0));
 405       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 406       guard_locus
 407         = gimple_phi_arg_location_from_edge (orig_phi,
 408                                              loop_preheader_edge (loop));
 409
 410       add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
 411       add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
 412
 413       /* 1.3. Update phi in successor block.  */
 414       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 415                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 416       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 417       update_phi2 = new_phi;
 418
 419
 420       /** 2. Handle loop-closed-ssa-form phis  **/
 421
 422       if (!is_gimple_reg (PHI_RESULT (orig_phi)))
 423         continue;
 424
 425       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 426       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 427                                  *new_exit_bb);
 428
 429       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 430       add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
 431
 432       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 433       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 434       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 435
 436       /* 2.4. Record the newly created name with set_current_def.
 437          We want to find a name such that
 438                 name = get_current_def (orig_loop_name)
 439          and to set its current definition as follows:
 440                 set_current_def (name, new_phi_name)
 441
 442          If LOOP is a new loop then loop_arg is already the name we're
 443          looking for. If LOOP is the original loop, then loop_arg is
 444          the orig_loop_name and the relevant name is recorded in its
 445          current reaching definition.  */
 446       if (is_new_loop)
 447         current_new_name = loop_arg;
 448       else
 449         {
 450           current_new_name = get_current_def (loop_arg);
 451           /* current_def is not available only if the variable does not
 452              change inside the loop, in which case we also don't care
 453              about recording a current_def for it because we won't be
 454              trying to create loop-exit-phis for it.  */
 455           if (!current_new_name)
 456             continue;
 457         }
 458       gcc_assert (get_current_def (current_new_name) == NULL_TREE);
 459
 460       set_current_def (current_new_name, PHI_RESULT (new_phi));
 461       bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
 462     }
 463 }
 464
 465
 466 /* Function slpeel_update_phi_nodes_for_guard2
 467
 468    Input:
 469    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 470
 471    In the context of the overall structure, we have:
 472
 473         loop1_preheader_bb:
 474                 guard1 (goto loop1/merge1_bb)
 475         loop1
 476         loop1_exit_bb:
 477                 guard2 (goto merge1_bb/merge2_bb)
 478         merge1_bb
 479 LOOP->  loop2
 480         loop2_exit_bb
 481         merge2_bb
 482         next_bb
 483
 484    For each name used out side the loop (i.e - for each name that has an exit
 485    phi in next_bb) we create a new phi in:
 486    1. merge2_bb (to account for the edge from guard_bb)
 487    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 488    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 489       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 490 */
 491
 492 static void
 493 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 494                                     bool is_new_loop, basic_block *new_exit_bb)
 495 {
 496   gimple orig_phi, new_phi;
 497   gimple update_phi, update_phi2;
 498   tree guard_arg, loop_arg;
 499   basic_block new_merge_bb = guard_edge->dest;
 500   edge e = EDGE_SUCC (new_merge_bb, 0);
 501   basic_block update_bb = e->dest;
 502   edge new_exit_e;
 503   tree orig_def, orig_def_new_name;
 504   tree new_name, new_name2;
 505   tree arg;
 506   gimple_stmt_iterator gsi;
 507
 508   /* Create new bb between loop and new_merge_bb.  */
 509   *new_exit_bb = split_edge (single_exit (loop));
 510
 511   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 512
 513   for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
 514     {
 515       update_phi = gsi_stmt (gsi);
 516       orig_phi = update_phi;
 517       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 518       /* This loop-closed-phi actually doesn't represent a use
 519          out of the loop - the phi arg is a constant.  */
 520       if (TREE_CODE (orig_def) != SSA_NAME)
 521         continue;
 522       orig_def_new_name = get_current_def (orig_def);
 523       arg = NULL_TREE;
 524
 525       /** 1. Handle new-merge-point phis  **/
 526
 527       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 528       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 529                                  new_merge_bb);
 530
 531       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 532             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 533       new_name = orig_def;
 534       new_name2 = NULL_TREE;
 535       if (orig_def_new_name)
 536         {
 537           new_name = orig_def_new_name;
 538           /* Some variables have both loop-entry-phis and loop-exit-phis.
 539              Such variables were given yet newer names by phis placed in
 540              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 541              new_name2 = get_current_def (get_current_def (orig_name)).  */
 542           new_name2 = get_current_def (new_name);
 543         }
 544
 545       if (is_new_loop)
 546         {
 547           guard_arg = orig_def;
 548           loop_arg = new_name;
 549         }
 550       else
 551         {
 552           guard_arg = new_name;
 553           loop_arg = orig_def;
 554         }
 555       if (new_name2)
 556         guard_arg = new_name2;
 557
 558       add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
 559       add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
 560
 561       /* 1.3. Update phi in successor block.  */
 562       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 563       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 564       update_phi2 = new_phi;
 565
 566
 567       /** 2. Handle loop-closed-ssa-form phis  **/
 568
 569       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 570       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 571                                  *new_exit_bb);
 572
 573       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 574       add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
 575
 576       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 577       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 578       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 579
 580
 581       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 582
 583       /* 3.1. Find the relevant names that need an exit-phi in
 584          GUARD_BB, i.e. names for which
 585          slpeel_update_phi_nodes_for_guard1 had not already created a
 586          phi node. This is the case for names that are used outside
 587          the loop (and therefore need an exit phi) but are not updated
 588          across loop iterations (and therefore don't have a
 589          loop-header-phi).
 590
 591          slpeel_update_phi_nodes_for_guard1 is responsible for
 592          creating loop-exit phis in GUARD_BB for names that have a
 593          loop-header-phi.  When such a phi is created we also record
 594          the new name in its current definition.  If this new name
 595          exists, then guard_arg was set to this new name (see 1.2
 596          above).  Therefore, if guard_arg is not this new name, this
 597          is an indication that an exit-phi in GUARD_BB was not yet
 598          created, so we take care of it here.  */
 599       if (guard_arg == new_name2)
 600         continue;
 601       arg = guard_arg;
 602
 603       /* 3.2. Generate new phi node in GUARD_BB:  */
 604       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 605                                  guard_edge->src);
 606
 607       /* 3.3. GUARD_BB has one incoming edge:  */
 608       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 609       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
 610                    UNKNOWN_LOCATION);
 611
 612       /* 3.4. Update phi in successor of GUARD_BB:  */
 613       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 614                                                                 == guard_arg);
 615       SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
 616     }
 617 }
 618
 619
 620 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 621    that starts at zero, increases by one and its limit is NITERS.
 622
 623    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 624
 625 void
 626 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 627 {
 628   tree indx_before_incr, indx_after_incr;
 629   gimple cond_stmt;
 630   gimple orig_cond;
 631   edge exit_edge = single_exit (loop);
 632   gimple_stmt_iterator loop_cond_gsi;
 633   gimple_stmt_iterator incr_gsi;
 634   bool insert_after;
 635   tree init = build_int_cst (TREE_TYPE (niters), 0);
 636   tree step = build_int_cst (TREE_TYPE (niters), 1);
 637   LOC loop_loc;
 638   enum tree_code code;
 639
 640   orig_cond = get_loop_exit_condition (loop);
 641   gcc_assert (orig_cond);
 642   loop_cond_gsi = gsi_for_stmt (orig_cond);
 643
 644   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 645   create_iv (init, step, NULL_TREE, loop,
 646              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 647
 648   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 649                                               true, NULL_TREE, true,
 650                                               GSI_SAME_STMT);
 651   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 652                                      true, GSI_SAME_STMT);
 653
 654   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 655   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 656                                  NULL_TREE);
 657
 658   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 659
 660   /* Remove old loop exit test:  */
 661   gsi_remove (&loop_cond_gsi, true);
 662
 663   loop_loc = find_loop_location (loop);
 664   if (dump_file && (dump_flags & TDF_DETAILS))
 665     {
 666       if (loop_loc != UNKNOWN_LOC)
 667         fprintf (dump_file, "\nloop at %s:%d: ",
 668                  LOC_FILE (loop_loc), LOC_LINE (loop_loc));
 669       print_gimple_stmt (dump_file, cond_stmt, 0, TDF_SLIM);
 670     }
 671
 672   loop->nb_iterations = niters;
 673 }
 674
 675
 676 /* Given LOOP this function generates a new copy of it and puts it
 677    on E which is either the entry or exit of LOOP.  */
 678
 679 struct loop *
 680 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
 681 {
 682   struct loop *new_loop;
 683   basic_block *new_bbs, *bbs;
 684   bool at_exit;
 685   bool was_imm_dom;
 686   basic_block exit_dest;
 687   gimple phi;
 688   tree phi_arg;
 689   edge exit, new_exit;
 690   gimple_stmt_iterator gsi;
 691
 692   at_exit = (e == single_exit (loop));
 693   if (!at_exit && e != loop_preheader_edge (loop))
 694     return NULL;
 695
 696   bbs = get_loop_body (loop);
 697
 698   /* Check whether duplication is possible.  */
 699   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 700     {
 701       free (bbs);
 702       return NULL;
 703     }
 704
 705   /* Generate new loop structure.  */
 706   new_loop = duplicate_loop (loop, loop_outer (loop));
 707   if (!new_loop)
 708     {
 709       free (bbs);
 710       return NULL;
 711     }
 712
 713   exit_dest = single_exit (loop)->dest;
 714   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 715                                           exit_dest) == loop->header ?
 716                  true : false);
 717
 718   new_bbs = XNEWVEC (basic_block, loop->num_nodes);
 719
 720   exit = single_exit (loop);
 721   copy_bbs (bbs, loop->num_nodes, new_bbs,
 722             &exit, 1, &new_exit, NULL,
 723             e->src);
 724
 725   /* Duplicating phi args at exit bbs as coming
 726      also from exit of duplicated loop.  */
 727   for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi); gsi_next (&gsi))
 728     {
 729       phi = gsi_stmt (gsi);
 730       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop));
 731       if (phi_arg)
 732         {
 733           edge new_loop_exit_edge;
 734           source_location locus;
 735
 736           locus = gimple_phi_arg_location_from_edge (phi, single_exit (loop));
 737           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 738             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 739           else
 740             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 741
 742           add_phi_arg (phi, phi_arg, new_loop_exit_edge, locus);
 743         }
 744     }
 745
 746   if (at_exit) /* Add the loop copy at exit.  */
 747     {
 748       redirect_edge_and_branch_force (e, new_loop->header);
 749       PENDING_STMT (e) = NULL;
 750       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 751       if (was_imm_dom)
 752         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 753     }
 754   else /* Add the copy at entry.  */
 755     {
 756       edge new_exit_e;
 757       edge entry_e = loop_preheader_edge (loop);
 758       basic_block preheader = entry_e->src;
 759
 760       if (!flow_bb_inside_loop_p (new_loop,
 761                                   EDGE_SUCC (new_loop->header, 0)->dest))
 762         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 763       else
 764         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 765
 766       redirect_edge_and_branch_force (new_exit_e, loop->header);
 767       PENDING_STMT (new_exit_e) = NULL;
 768       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 769                                new_exit_e->src);
 770
 771       /* We have to add phi args to the loop->header here as coming
 772          from new_exit_e edge.  */
 773       for (gsi = gsi_start_phis (loop->header);
 774            !gsi_end_p (gsi);
 775            gsi_next (&gsi))
 776         {
 777           phi = gsi_stmt (gsi);
 778           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 779           if (phi_arg)
 780             add_phi_arg (phi, phi_arg, new_exit_e,
 781                          gimple_phi_arg_location_from_edge (phi, entry_e));
 782         }
 783
 784       redirect_edge_and_branch_force (entry_e, new_loop->header);
 785       PENDING_STMT (entry_e) = NULL;
 786       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 787     }
 788
 789   free (new_bbs);
 790   free (bbs);
 791
 792   return new_loop;
 793 }
 794
 795
 796 /* Given the condition statement COND, put it as the last statement
 797    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 798    Assumes that this is the single exit of the guarded loop.
 799    Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.  */
 800
 801 static edge
 802 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 803                        gimple_seq cond_expr_stmt_list,
 804                        basic_block exit_bb, basic_block dom_bb)
 805 {
 806   gimple_stmt_iterator gsi;
 807   edge new_e, enter_e;
 808   gimple cond_stmt;
 809   gimple_seq gimplify_stmt_list = NULL;
 810
 811   enter_e = EDGE_SUCC (guard_bb, 0);
 812   enter_e->flags &= ~EDGE_FALLTHRU;
 813   enter_e->flags |= EDGE_FALSE_VALUE;
 814   gsi = gsi_last_bb (guard_bb);
 815
 816   cond = force_gimple_operand (cond, &gimplify_stmt_list, true, NULL_TREE);
 817   if (gimplify_stmt_list)
 818     gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
 819   cond_stmt = gimple_build_cond (NE_EXPR,
 820                                  cond, build_int_cst (TREE_TYPE (cond), 0),
 821                                  NULL_TREE, NULL_TREE);
 822   if (cond_expr_stmt_list)
 823     gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
 824
 825   gsi = gsi_last_bb (guard_bb);
 826   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 827
 828   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 829   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 830   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 831   return new_e;
 832 }
 833
 834
 835 /* This function verifies that the following restrictions apply to LOOP:
 836    (1) it is innermost
 837    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 838    (3) it is single entry, single exit
 839    (4) its exit condition is the last stmt in the header
 840    (5) E is the entry/exit edge of LOOP.
 841  */
 842
 843 bool
 844 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 845 {
 846   edge exit_e = single_exit (loop);
 847   edge entry_e = loop_preheader_edge (loop);
 848   gimple orig_cond = get_loop_exit_condition (loop);
 849   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 850
 851   if (need_ssa_update_p (cfun))
 852     return false;
 853
 854   if (loop->inner
 855       /* All loops have an outer scope; the only case loop->outer is NULL is for
 856          the function itself.  */
 857       || !loop_outer (loop)
 858       || loop->num_nodes != 2
 859       || !empty_block_p (loop->latch)
 860       || !single_exit (loop)
 861       /* Verify that new loop exit condition can be trivially modified.  */
 862       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 863       || (e != exit_e && e != entry_e))
 864     return false;
 865
 866   return true;
 867 }
 868
 869 #ifdef ENABLE_CHECKING
 870 static void
 871 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 872                                  struct loop *second_loop)
 873 {
 874   basic_block loop1_exit_bb = single_exit (first_loop)->dest;
 875   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
 876   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
 877
 878   /* A guard that controls whether the second_loop is to be executed or skipped
 879      is placed in first_loop->exit.  first_loop->exit therefore has two
 880      successors - one is the preheader of second_loop, and the other is a bb
 881      after second_loop.
 882    */
 883   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
 884
 885   /* 1. Verify that one of the successors of first_loop->exit is the preheader
 886         of second_loop.  */
 887
 888   /* The preheader of new_loop is expected to have two predecessors:
 889      first_loop->exit and the block that precedes first_loop.  */
 890
 891   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
 892               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
 893                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
 894                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
 895                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
 896
 897   /* Verify that the other successor of first_loop->exit is after the
 898      second_loop.  */
 899   /* TODO */
 900 }
 901 #endif
 902
 903 /* If the run time cost model check determines that vectorization is
 904    not profitable and hence scalar loop should be generated then set
 905    FIRST_NITERS to prologue peeled iterations. This will allow all the
 906    iterations to be executed in the prologue peeled scalar loop.  */
 907
 908 static void
 909 set_prologue_iterations (basic_block bb_before_first_loop,
 910                          tree first_niters,
 911                          struct loop *loop,
 912                          unsigned int th)
 913 {
 914   edge e;
 915   basic_block cond_bb, then_bb;
 916   tree var, prologue_after_cost_adjust_name;
 917   gimple_stmt_iterator gsi;
 918   gimple newphi;
 919   edge e_true, e_false, e_fallthru;
 920   gimple cond_stmt;
 921   gimple_seq gimplify_stmt_list = NULL, stmts = NULL;
 922   tree cost_pre_condition = NULL_TREE;
 923   tree scalar_loop_iters =
 924     unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
 925
 926   e = single_pred_edge (bb_before_first_loop);
 927   cond_bb = split_edge(e);
 928
 929   e = single_pred_edge (bb_before_first_loop);
 930   then_bb = split_edge(e);
 931   set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
 932
 933   e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
 934                                    EDGE_FALSE_VALUE);
 935   set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
 936
 937   e_true = EDGE_PRED (then_bb, 0);
 938   e_true->flags &= ~EDGE_FALLTHRU;
 939   e_true->flags |= EDGE_TRUE_VALUE;
 940
 941   e_fallthru = EDGE_SUCC (then_bb, 0);
 942
 943   cost_pre_condition =
 944     fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
 945                  build_int_cst (TREE_TYPE (scalar_loop_iters), th));
 946   cost_pre_condition =
 947     force_gimple_operand (cost_pre_condition, &gimplify_stmt_list,
 948                           true, NULL_TREE);
 949   cond_stmt = gimple_build_cond (NE_EXPR, cost_pre_condition,
 950                                  build_int_cst (TREE_TYPE (cost_pre_condition),
 951                                                 0), NULL_TREE, NULL_TREE);
 952
 953   gsi = gsi_last_bb (cond_bb);
 954   if (gimplify_stmt_list)
 955     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
 956
 957   gsi = gsi_last_bb (cond_bb);
 958   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 959
 960   var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
 961                         "prologue_after_cost_adjust");
 962   add_referenced_var (var);
 963   prologue_after_cost_adjust_name =
 964     force_gimple_operand (scalar_loop_iters, &stmts, false, var);
 965
 966   gsi = gsi_last_bb (then_bb);
 967   if (stmts)
 968     gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
 969
 970   newphi = create_phi_node (var, bb_before_first_loop);
 971   add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
 972                UNKNOWN_LOCATION);
 973   add_phi_arg (newphi, first_niters, e_false, UNKNOWN_LOCATION);
 974
 975   first_niters = PHI_RESULT (newphi);
 976 }
 977
 978
 979 /* Function slpeel_tree_peel_loop_to_edge.
 980
 981    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
 982    that is placed on the entry (exit) edge E of LOOP. After this transformation
 983    we have two loops one after the other - first-loop iterates FIRST_NITERS
 984    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
 985    If the cost model indicates that it is profitable to emit a scalar
 986    loop instead of the vector one, then the prolog (epilog) loop will iterate
 987    for the entire unchanged scalar iterations of the loop.
 988
 989    Input:
 990    - LOOP: the loop to be peeled.
 991    - E: the exit or entry edge of LOOP.
 992         If it is the entry edge, we peel the first iterations of LOOP. In this
 993         case first-loop is LOOP, and second-loop is the newly created loop.
 994         If it is the exit edge, we peel the last iterations of LOOP. In this
 995         case, first-loop is the newly created loop, and second-loop is LOOP.
 996    - NITERS: the number of iterations that LOOP iterates.
 997    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
 998    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
 999         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1000         is false, the caller of this function may want to take care of this
1001         (this can be useful if we don't want new stmts added to first-loop).
1002    - TH: cost model profitability threshold of iterations for vectorization.
1003    - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1004                           during versioning and hence needs to occur during
1005                           prologue generation or whether cost model check
1006                           has not occurred during prologue generation and hence
1007                           needs to occur during epilogue generation.
1008
1009
1010    Output:
1011    The function returns a pointer to the new loop-copy, or NULL if it failed
1012    to perform the transformation.
1013
1014    The function generates two if-then-else guards: one before the first loop,
1015    and the other before the second loop:
1016    The first guard is:
1017      if (FIRST_NITERS == 0) then skip the first loop,
1018      and go directly to the second loop.
1019    The second guard is:
1020      if (FIRST_NITERS == NITERS) then skip the second loop.
1021
1022    If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1023    then the generated condition is combined with COND_EXPR and the
1024    statements in COND_EXPR_STMT_LIST are emitted together with it.
1025
1026    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1027    FORNOW the resulting code will not be in loop-closed-ssa form.
1028 */
1029
1030 static struct loop*
1031 slpeel_tree_peel_loop_to_edge (struct loop *loop,
1032                                edge e, tree first_niters,
1033                                tree niters, bool update_first_loop_count,
1034                                unsigned int th, bool check_profitability,
1035                                tree cond_expr, gimple_seq cond_expr_stmt_list)
1036 {
1037   struct loop *new_loop = NULL, *first_loop, *second_loop;
1038   edge skip_e;
1039   tree pre_condition = NULL_TREE;
1040   bitmap definitions;
1041   basic_block bb_before_second_loop, bb_after_second_loop;
1042   basic_block bb_before_first_loop;
1043   basic_block bb_between_loops;
1044   basic_block new_exit_bb;
1045   edge exit_e = single_exit (loop);
1046   LOC loop_loc;
1047   tree cost_pre_condition = NULL_TREE;
1048
1049   if (!slpeel_can_duplicate_loop_p (loop, e))
1050     return NULL;
1051
1052   /* We have to initialize cfg_hooks. Then, when calling
1053    cfg_hooks->split_edge, the function tree_split_edge
1054    is actually called and, when calling cfg_hooks->duplicate_block,
1055    the function tree_duplicate_bb is called.  */
1056   gimple_register_cfg_hooks ();
1057
1058
1059   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1060         Resulting CFG would be:
1061
1062         first_loop:
1063         do {
1064         } while ...
1065
1066         second_loop:
1067         do {
1068         } while ...
1069
1070         orig_exit_bb:
1071    */
1072
1073   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1074     {
1075       loop_loc = find_loop_location (loop);
1076       if (dump_file && (dump_flags & TDF_DETAILS))
1077         {
1078           if (loop_loc != UNKNOWN_LOC)
1079             fprintf (dump_file, "\n%s:%d: note: ",
1080                      LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1081           fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1082         }
1083       return NULL;
1084     }
1085
1086   if (e == exit_e)
1087     {
1088       /* NEW_LOOP was placed after LOOP.  */
1089       first_loop = loop;
1090       second_loop = new_loop;
1091     }
1092   else
1093     {
1094       /* NEW_LOOP was placed before LOOP.  */
1095       first_loop = new_loop;
1096       second_loop = loop;
1097     }
1098
1099   definitions = ssa_names_to_replace ();
1100   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1101   rename_variables_in_loop (new_loop);
1102
1103
1104   /* 2.  Add the guard code in one of the following ways:
1105
1106      2.a Add the guard that controls whether the first loop is executed.
1107          This occurs when this function is invoked for prologue or epilogue
1108          generation and when the cost model check can be done at compile time.
1109
1110          Resulting CFG would be:
1111
1112          bb_before_first_loop:
1113          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1114                                 GOTO first-loop
1115
1116          first_loop:
1117          do {
1118          } while ...
1119
1120          bb_before_second_loop:
1121
1122          second_loop:
1123          do {
1124          } while ...
1125
1126          orig_exit_bb:
1127
1128      2.b Add the cost model check that allows the prologue
1129          to iterate for the entire unchanged scalar
1130          iterations of the loop in the event that the cost
1131          model indicates that the scalar loop is more
1132          profitable than the vector one. This occurs when
1133          this function is invoked for prologue generation
1134          and the cost model check needs to be done at run
1135          time.
1136
1137          Resulting CFG after prologue peeling would be:
1138
1139          if (scalar_loop_iterations <= th)
1140            FIRST_NITERS = scalar_loop_iterations
1141
1142          bb_before_first_loop:
1143          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1144                                 GOTO first-loop
1145
1146          first_loop:
1147          do {
1148          } while ...
1149
1150          bb_before_second_loop:
1151
1152          second_loop:
1153          do {
1154          } while ...
1155
1156          orig_exit_bb:
1157
1158      2.c Add the cost model check that allows the epilogue
1159          to iterate for the entire unchanged scalar
1160          iterations of the loop in the event that the cost
1161          model indicates that the scalar loop is more
1162          profitable than the vector one. This occurs when
1163          this function is invoked for epilogue generation
1164          and the cost model check needs to be done at run
1165          time.  This check is combined with any pre-existing
1166          check in COND_EXPR to avoid versioning.
1167
1168          Resulting CFG after prologue peeling would be:
1169
1170          bb_before_first_loop:
1171          if ((scalar_loop_iterations <= th)
1172              ||
1173              FIRST_NITERS == 0) GOTO bb_before_second_loop
1174                                 GOTO first-loop
1175
1176          first_loop:
1177          do {
1178          } while ...
1179
1180          bb_before_second_loop:
1181
1182          second_loop:
1183          do {
1184          } while ...
1185
1186          orig_exit_bb:
1187   */
1188
1189   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1190   bb_before_second_loop = split_edge (single_exit (first_loop));
1191
1192   /* Epilogue peeling.  */
1193   if (!update_first_loop_count)
1194     {
1195       pre_condition =
1196         fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1197                      build_int_cst (TREE_TYPE (first_niters), 0));
1198       if (check_profitability)
1199         {
1200           tree scalar_loop_iters
1201             = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1202                                         (loop_vec_info_for_loop (loop)));
1203           cost_pre_condition =
1204             fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1205                          build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1206
1207           pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1208                                        cost_pre_condition, pre_condition);
1209         }
1210       if (cond_expr)
1211         {
1212           pre_condition =
1213             fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1214                          pre_condition,
1215                          fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1216                                       cond_expr));
1217         }
1218     }
1219
1220   /* Prologue peeling.  */
1221   else
1222     {
1223       if (check_profitability)
1224         set_prologue_iterations (bb_before_first_loop, first_niters,
1225                                  loop, th);
1226
1227       pre_condition =
1228         fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1229                      build_int_cst (TREE_TYPE (first_niters), 0));
1230     }
1231
1232   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1233                                   cond_expr_stmt_list,
1234                                   bb_before_second_loop, bb_before_first_loop);
1235   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1236                                       first_loop == new_loop,
1237                                       &new_exit_bb, &definitions);
1238
1239
1240   /* 3. Add the guard that controls whether the second loop is executed.
1241         Resulting CFG would be:
1242
1243         bb_before_first_loop:
1244         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1245                                GOTO first-loop
1246
1247         first_loop:
1248         do {
1249         } while ...
1250
1251         bb_between_loops:
1252         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1253                                     GOTO bb_before_second_loop
1254
1255         bb_before_second_loop:
1256
1257         second_loop:
1258         do {
1259         } while ...
1260
1261         bb_after_second_loop:
1262
1263         orig_exit_bb:
1264    */
1265
1266   bb_between_loops = new_exit_bb;
1267   bb_after_second_loop = split_edge (single_exit (second_loop));
1268
1269   pre_condition =
1270         fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1271   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1272                                   bb_after_second_loop, bb_before_first_loop);
1273   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1274                                      second_loop == new_loop, &new_exit_bb);
1275
1276   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1277    */
1278   if (update_first_loop_count)
1279     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1280
1281   BITMAP_FREE (definitions);
1282   delete_update_ssa ();
1283
1284   return new_loop;
1285 }
1286
1287 /* Function vect_get_loop_location.
1288
1289    Extract the location of the loop in the source code.
1290    If the loop is not well formed for vectorization, an estimated
1291    location is calculated.
1292    Return the loop location if succeed and NULL if not.  */
1293
1294 LOC
1295 find_loop_location (struct loop *loop)
1296 {
1297   gimple stmt = NULL;
1298   basic_block bb;
1299   gimple_stmt_iterator si;
1300
1301   if (!loop)
1302     return UNKNOWN_LOC;
1303
1304   stmt = get_loop_exit_condition (loop);
1305
1306   if (stmt && gimple_location (stmt) != UNKNOWN_LOC)
1307     return gimple_location (stmt);
1308
1309   /* If we got here the loop is probably not "well formed",
1310      try to estimate the loop location */
1311
1312   if (!loop->header)
1313     return UNKNOWN_LOC;
1314
1315   bb = loop->header;
1316
1317   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1318     {
1319       stmt = gsi_stmt (si);
1320       if (gimple_location (stmt) != UNKNOWN_LOC)
1321         return gimple_location (stmt);
1322     }
1323
1324   return UNKNOWN_LOC;
1325 }
1326
1327
1328 /* This function builds ni_name = number of iterations loop executes
1329    on the loop preheader.  If SEQ is given the stmt is instead emitted
1330    there.  */
1331
1332 static tree
1333 vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
1334 {
1335   tree ni_name, var;
1336   gimple_seq stmts = NULL;
1337   edge pe;
1338   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1339   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1340
1341   var = create_tmp_var (TREE_TYPE (ni), "niters");
1342   add_referenced_var (var);
1343   ni_name = force_gimple_operand (ni, &stmts, false, var);
1344
1345   pe = loop_preheader_edge (loop);
1346   if (stmts)
1347     {
1348       if (seq)
1349         gimple_seq_add_seq (&seq, stmts);
1350       else
1351         {
1352           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1353           gcc_assert (!new_bb);
1354         }
1355     }
1356
1357   return ni_name;
1358 }
1359
1360
1361 /* This function generates the following statements:
1362
1363  ni_name = number of iterations loop executes
1364  ratio = ni_name / vf
1365  ratio_mult_vf_name = ratio * vf
1366
1367  and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
1368  if that is non-NULL.  */
1369
1370 static void
1371 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1372                                  tree *ni_name_ptr,
1373                                  tree *ratio_mult_vf_name_ptr,
1374                                  tree *ratio_name_ptr,
1375                                  gimple_seq cond_expr_stmt_list)
1376 {
1377
1378   edge pe;
1379   basic_block new_bb;
1380   gimple_seq stmts;
1381   tree ni_name;
1382   tree var;
1383   tree ratio_name;
1384   tree ratio_mult_vf_name;
1385   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1386   tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1387   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1388   tree log_vf;
1389
1390   pe = loop_preheader_edge (loop);
1391
1392   /* Generate temporary variable that contains
1393      number of iterations loop executes.  */
1394
1395   ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
1396   log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
1397
1398   /* Create: ratio = ni >> log2(vf) */
1399
1400   ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
1401   if (!is_gimple_val (ratio_name))
1402     {
1403       var = create_tmp_var (TREE_TYPE (ni), "bnd");
1404       add_referenced_var (var);
1405
1406       stmts = NULL;
1407       ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
1408       if (cond_expr_stmt_list)
1409         gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1410       else
1411         {
1412           pe = loop_preheader_edge (loop);
1413           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1414           gcc_assert (!new_bb);
1415         }
1416     }
1417
1418   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
1419
1420   ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
1421                                     ratio_name, log_vf);
1422   if (!is_gimple_val (ratio_mult_vf_name))
1423     {
1424       var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1425       add_referenced_var (var);
1426
1427       stmts = NULL;
1428       ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
1429                                                  true, var);
1430       if (cond_expr_stmt_list)
1431         gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1432       else
1433         {
1434           pe = loop_preheader_edge (loop);
1435           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1436           gcc_assert (!new_bb);
1437         }
1438     }
1439
1440   *ni_name_ptr = ni_name;
1441   *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1442   *ratio_name_ptr = ratio_name;
1443
1444   return;
1445 }
1446
1447 /* Function vect_can_advance_ivs_p
1448
1449    In case the number of iterations that LOOP iterates is unknown at compile
1450    time, an epilog loop will be generated, and the loop induction variables
1451    (IVs) will be "advanced" to the value they are supposed to take just before
1452    the epilog loop.  Here we check that the access function of the loop IVs
1453    and the expression that represents the loop bound are simple enough.
1454    These restrictions will be relaxed in the future.  */
1455
1456 bool
1457 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1458 {
1459   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1460   basic_block bb = loop->header;
1461   gimple phi;
1462   gimple_stmt_iterator gsi;
1463
1464   /* Analyze phi functions of the loop header.  */
1465
1466   if (vect_print_dump_info (REPORT_DETAILS))
1467     fprintf (vect_dump, "vect_can_advance_ivs_p:");
1468
1469   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1470     {
1471       tree access_fn = NULL;
1472       tree evolution_part;
1473
1474       phi = gsi_stmt (gsi);
1475       if (vect_print_dump_info (REPORT_DETAILS))
1476         {
1477           fprintf (vect_dump, "Analyze phi: ");
1478           print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1479         }
1480
1481       /* Skip virtual phi's. The data dependences that are associated with
1482          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1483
1484       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1485         {
1486           if (vect_print_dump_info (REPORT_DETAILS))
1487             fprintf (vect_dump, "virtual phi. skip.");
1488           continue;
1489         }
1490
1491       /* Skip reduction phis.  */
1492
1493       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1494         {
1495           if (vect_print_dump_info (REPORT_DETAILS))
1496             fprintf (vect_dump, "reduc phi. skip.");
1497           continue;
1498         }
1499
1500       /* Analyze the evolution function.  */
1501
1502       access_fn = instantiate_parameters
1503         (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1504
1505       if (!access_fn)
1506         {
1507           if (vect_print_dump_info (REPORT_DETAILS))
1508             fprintf (vect_dump, "No Access function.");
1509           return false;
1510         }
1511
1512       if (vect_print_dump_info (REPORT_DETAILS))
1513         {
1514           fprintf (vect_dump, "Access function of PHI: ");
1515           print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1516         }
1517
1518       evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1519
1520       if (evolution_part == NULL_TREE)
1521         {
1522           if (vect_print_dump_info (REPORT_DETAILS))
1523             fprintf (vect_dump, "No evolution.");
1524           return false;
1525         }
1526
1527       /* FORNOW: We do not transform initial conditions of IVs
1528          which evolution functions are a polynomial of degree >= 2.  */
1529
1530       if (tree_is_chrec (evolution_part))
1531         return false;
1532     }
1533
1534   return true;
1535 }
1536
1537
1538 /*   Function vect_update_ivs_after_vectorizer.
1539
1540      "Advance" the induction variables of LOOP to the value they should take
1541      after the execution of LOOP.  This is currently necessary because the
1542      vectorizer does not handle induction variables that are used after the
1543      loop.  Such a situation occurs when the last iterations of LOOP are
1544      peeled, because:
1545      1. We introduced new uses after LOOP for IVs that were not originally used
1546         after LOOP: the IVs of LOOP are now used by an epilog loop.
1547      2. LOOP is going to be vectorized; this means that it will iterate N/VF
1548         times, whereas the loop IVs should be bumped N times.
1549
1550      Input:
1551      - LOOP - a loop that is going to be vectorized. The last few iterations
1552               of LOOP were peeled.
1553      - NITERS - the number of iterations that LOOP executes (before it is
1554                 vectorized). i.e, the number of times the ivs should be bumped.
1555      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1556                   coming out from LOOP on which there are uses of the LOOP ivs
1557                   (this is the path from LOOP->exit to epilog_loop->preheader).
1558
1559                   The new definitions of the ivs are placed in LOOP->exit.
1560                   The phi args associated with the edge UPDATE_E in the bb
1561                   UPDATE_E->dest are updated accordingly.
1562
1563      Assumption 1: Like the rest of the vectorizer, this function assumes
1564      a single loop exit that has a single predecessor.
1565
1566      Assumption 2: The phi nodes in the LOOP header and in update_bb are
1567      organized in the same order.
1568
1569      Assumption 3: The access function of the ivs is simple enough (see
1570      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1571
1572      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1573      coming out of LOOP on which the ivs of LOOP are used (this is the path
1574      that leads to the epilog loop; other paths skip the epilog loop).  This
1575      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1576      needs to have its phis updated.
1577  */
1578
1579 static void
1580 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1581                                   edge update_e)
1582 {
1583   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1584   basic_block exit_bb = single_exit (loop)->dest;
1585   gimple phi, phi1;
1586   gimple_stmt_iterator gsi, gsi1;
1587   basic_block update_bb = update_e->dest;
1588
1589   /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1590
1591   /* Make sure there exists a single-predecessor exit bb:  */
1592   gcc_assert (single_pred_p (exit_bb));
1593
1594   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1595        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1596        gsi_next (&gsi), gsi_next (&gsi1))
1597     {
1598       tree access_fn = NULL;
1599       tree evolution_part;
1600       tree init_expr;
1601       tree step_expr, off;
1602       tree type;
1603       tree var, ni, ni_name;
1604       gimple_stmt_iterator last_gsi;
1605
1606       phi = gsi_stmt (gsi);
1607       phi1 = gsi_stmt (gsi1);
1608       if (vect_print_dump_info (REPORT_DETAILS))
1609         {
1610           fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
1611           print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1612         }
1613
1614       /* Skip virtual phi's.  */
1615       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1616         {
1617           if (vect_print_dump_info (REPORT_DETAILS))
1618             fprintf (vect_dump, "virtual phi. skip.");
1619           continue;
1620         }
1621
1622       /* Skip reduction phis.  */
1623       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1624         {
1625           if (vect_print_dump_info (REPORT_DETAILS))
1626             fprintf (vect_dump, "reduc phi. skip.");
1627           continue;
1628         }
1629
1630       access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
1631       gcc_assert (access_fn);
1632       /* We can end up with an access_fn like
1633            (short int) {(short unsigned int) i_49, +, 1}_1
1634          for further analysis we need to strip the outer cast but we
1635          need to preserve the original type.  */
1636       type = TREE_TYPE (access_fn);
1637       STRIP_NOPS (access_fn);
1638       evolution_part =
1639          unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
1640       gcc_assert (evolution_part != NULL_TREE);
1641
1642       /* FORNOW: We do not support IVs whose evolution function is a polynomial
1643          of degree >= 2 or exponential.  */
1644       gcc_assert (!tree_is_chrec (evolution_part));
1645
1646       step_expr = evolution_part;
1647       init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1648                                                                loop->num));
1649       init_expr = fold_convert (type, init_expr);
1650
1651       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1652                          fold_convert (TREE_TYPE (step_expr), niters),
1653                          step_expr);
1654       if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
1655         ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
1656                           init_expr,
1657                           fold_convert (sizetype, off));
1658       else
1659         ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
1660                           init_expr,
1661                           fold_convert (TREE_TYPE (init_expr), off));
1662
1663       var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
1664       add_referenced_var (var);
1665
1666       last_gsi = gsi_last_bb (exit_bb);
1667       ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1668                                           true, GSI_SAME_STMT);
1669
1670       /* Fix phi expressions in the successor bb.  */
1671       SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
1672     }
1673 }
1674
1675 /* Return the more conservative threshold between the
1676    min_profitable_iters returned by the cost model and the user
1677    specified threshold, if provided.  */
1678
1679 static unsigned int
1680 conservative_cost_threshold (loop_vec_info loop_vinfo,
1681                              int min_profitable_iters)
1682 {
1683   unsigned int th;
1684   int min_scalar_loop_bound;
1685
1686   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1687                             * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
1688
1689   /* Use the cost model only if it is more conservative than user specified
1690      threshold.  */
1691   th = (unsigned) min_scalar_loop_bound;
1692   if (min_profitable_iters
1693       && (!min_scalar_loop_bound
1694           || min_profitable_iters > min_scalar_loop_bound))
1695     th = (unsigned) min_profitable_iters;
1696
1697   if (th && vect_print_dump_info (REPORT_COST))
1698     fprintf (vect_dump, "Profitability threshold is %u loop iterations.", th);
1699
1700   return th;
1701 }
1702
1703 /* Function vect_do_peeling_for_loop_bound
1704
1705    Peel the last iterations of the loop represented by LOOP_VINFO.
1706    The peeled iterations form a new epilog loop.  Given that the loop now
1707    iterates NITERS times, the new epilog loop iterates
1708    NITERS % VECTORIZATION_FACTOR times.
1709
1710    The original loop will later be made to iterate
1711    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1712
1713    COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1714    test.  */
1715
1716 void
1717 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1718                                 tree cond_expr, gimple_seq cond_expr_stmt_list)
1719 {
1720   tree ni_name, ratio_mult_vf_name;
1721   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1722   struct loop *new_loop;
1723   edge update_e;
1724   basic_block preheader;
1725   int loop_num;
1726   bool check_profitability = false;
1727   unsigned int th = 0;
1728   int min_profitable_iters;
1729
1730   if (vect_print_dump_info (REPORT_DETAILS))
1731     fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
1732
1733   initialize_original_copy_tables ();
1734
1735   /* Generate the following variables on the preheader of original loop:
1736
1737      ni_name = number of iteration the original loop executes
1738      ratio = ni_name / vf
1739      ratio_mult_vf_name = ratio * vf  */
1740   vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1741                                    &ratio_mult_vf_name, ratio,
1742                                    cond_expr_stmt_list);
1743
1744   loop_num  = loop->num;
1745
1746   /* If cost model check not done during versioning and
1747      peeling for alignment.  */
1748   if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
1749       && !LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)
1750       && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)
1751       && !cond_expr)
1752     {
1753       check_profitability = true;
1754
1755       /* Get profitability threshold for vectorized loop.  */
1756       min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
1757
1758       th = conservative_cost_threshold (loop_vinfo,
1759                                         min_profitable_iters);
1760     }
1761
1762   new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
1763                                             ratio_mult_vf_name, ni_name, false,
1764                                             th, check_profitability,
1765                                             cond_expr, cond_expr_stmt_list);
1766   gcc_assert (new_loop);
1767   gcc_assert (loop_num == loop->num);
1768 #ifdef ENABLE_CHECKING
1769   slpeel_verify_cfg_after_peeling (loop, new_loop);
1770 #endif
1771
1772   /* A guard that controls whether the new_loop is to be executed or skipped
1773      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
1774      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
1775      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
1776      is on the path where the LOOP IVs are used and need to be updated.  */
1777
1778   preheader = loop_preheader_edge (new_loop)->src;
1779   if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1780     update_e = EDGE_PRED (preheader, 0);
1781   else
1782     update_e = EDGE_PRED (preheader, 1);
1783
1784   /* Update IVs of original loop as if they were advanced
1785      by ratio_mult_vf_name steps.  */
1786   vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1787
1788   /* After peeling we have to reset scalar evolution analyzer.  */
1789   scev_reset ();
1790
1791   free_original_copy_tables ();
1792 }
1793
1794
1795 /* Function vect_gen_niters_for_prolog_loop
1796
1797    Set the number of iterations for the loop represented by LOOP_VINFO
1798    to the minimum between LOOP_NITERS (the original iteration count of the loop)
1799    and the misalignment of DR - the data reference recorded in
1800    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
1801    this loop, the data reference DR will refer to an aligned location.
1802
1803    The following computation is generated:
1804
1805    If the misalignment of DR is known at compile time:
1806      addr_mis = int mis = DR_MISALIGNMENT (dr);
1807    Else, compute address misalignment in bytes:
1808      addr_mis = addr & (vectype_size - 1)
1809
1810    prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1811
1812    (elem_size = element type size; an element is the scalar element whose type
1813    is the inner type of the vectype)
1814
1815    When the step of the data-ref in the loop is not 1 (as in interleaved data
1816    and SLP), the number of iterations of the prolog must be divided by the step
1817    (which is equal to the size of interleaved group).
1818
1819    The above formulas assume that VF == number of elements in the vector. This
1820    may not hold when there are multiple-types in the loop.
1821    In this case, for some data-references in the loop the VF does not represent
1822    the number of elements that fit in the vector.  Therefore, instead of VF we
1823    use TYPE_VECTOR_SUBPARTS.  */
1824
1825 static tree
1826 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
1827 {
1828   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1829   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1830   tree var;
1831   gimple_seq stmts;
1832   tree iters, iters_name;
1833   edge pe;
1834   basic_block new_bb;
1835   gimple dr_stmt = DR_STMT (dr);
1836   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1837   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1838   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1839   tree niters_type = TREE_TYPE (loop_niters);
1840   int step = 1;
1841   int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1842   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1843
1844   if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1845     step = DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1846
1847   pe = loop_preheader_edge (loop);
1848
1849   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1850     {
1851       int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
1852       int elem_misalign = byte_misalign / element_size;
1853
1854       if (vect_print_dump_info (REPORT_DETAILS))
1855         fprintf (vect_dump, "known alignment = %d.", byte_misalign);
1856
1857       iters = build_int_cst (niters_type,
1858                      (((nelements - elem_misalign) & (nelements - 1)) / step));
1859     }
1860   else
1861     {
1862       gimple_seq new_stmts = NULL;
1863       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1864                                                 &new_stmts, NULL_TREE, loop);
1865       tree ptr_type = TREE_TYPE (start_addr);
1866       tree size = TYPE_SIZE (ptr_type);
1867       tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
1868       tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
1869       tree elem_size_log =
1870         build_int_cst (type, exact_log2 (vectype_align/nelements));
1871       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1872       tree nelements_tree = build_int_cst (type, nelements);
1873       tree byte_misalign;
1874       tree elem_misalign;
1875
1876       new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1877       gcc_assert (!new_bb);
1878
1879       /* Create:  byte_misalign = addr & (vectype_size - 1)  */
1880       byte_misalign =
1881         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
1882
1883       /* Create:  elem_misalign = byte_misalign / element_size  */
1884       elem_misalign =
1885         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1886
1887       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
1888       iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1889       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1890       iters = fold_convert (niters_type, iters);
1891     }
1892
1893   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
1894   /* If the loop bound is known at compile time we already verified that it is
1895      greater than vf; since the misalignment ('iters') is at most vf, there's
1896      no need to generate the MIN_EXPR in this case.  */
1897   if (TREE_CODE (loop_niters) != INTEGER_CST)
1898     iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1899
1900   if (vect_print_dump_info (REPORT_DETAILS))
1901     {
1902       fprintf (vect_dump, "niters for prolog loop: ");
1903       print_generic_expr (vect_dump, iters, TDF_SLIM);
1904     }
1905
1906   var = create_tmp_var (niters_type, "prolog_loop_niters");
1907   add_referenced_var (var);
1908   stmts = NULL;
1909   iters_name = force_gimple_operand (iters, &stmts, false, var);
1910
1911   /* Insert stmt on loop preheader edge.  */
1912   if (stmts)
1913     {
1914       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1915       gcc_assert (!new_bb);
1916     }
1917
1918   return iters_name;
1919 }
1920
1921
1922 /* Function vect_update_init_of_dr
1923
1924    NITERS iterations were peeled from LOOP.  DR represents a data reference
1925    in LOOP.  This function updates the information recorded in DR to
1926    account for the fact that the first NITERS iterations had already been
1927    executed.  Specifically, it updates the OFFSET field of DR.  */
1928
1929 static void
1930 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1931 {
1932   tree offset = DR_OFFSET (dr);
1933
1934   niters = fold_build2 (MULT_EXPR, sizetype,
1935                         fold_convert (sizetype, niters),
1936                         fold_convert (sizetype, DR_STEP (dr)));
1937   offset = fold_build2 (PLUS_EXPR, sizetype,
1938                         fold_convert (sizetype, offset), niters);
1939   DR_OFFSET (dr) = offset;
1940 }
1941
1942
1943 /* Function vect_update_inits_of_drs
1944
1945    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1946    This function updates the information recorded for the data references in
1947    the loop to account for the fact that the first NITERS iterations had
1948    already been executed.  Specifically, it updates the initial_condition of
1949    the access_function of all the data_references in the loop.  */
1950
1951 static void
1952 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1953 {
1954   unsigned int i;
1955   VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1956   struct data_reference *dr;
1957
1958   if (vect_print_dump_info (REPORT_DETAILS))
1959     fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
1960
1961   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1962     vect_update_init_of_dr (dr, niters);
1963 }
1964
1965
1966 /* Function vect_do_peeling_for_alignment
1967
1968    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
1969    'niters' is set to the misalignment of one of the data references in the
1970    loop, thereby forcing it to refer to an aligned location at the beginning
1971    of the execution of this loop.  The data reference for which we are
1972    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
1973
1974 void
1975 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
1976 {
1977   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1978   tree niters_of_prolog_loop, ni_name;
1979   tree n_iters;
1980   struct loop *new_loop;
1981   unsigned int th = 0;
1982   int min_profitable_iters;
1983
1984   if (vect_print_dump_info (REPORT_DETAILS))
1985     fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
1986
1987   initialize_original_copy_tables ();
1988
1989   ni_name = vect_build_loop_niters (loop_vinfo, NULL);
1990   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
1991
1992
1993   /* Get profitability threshold for vectorized loop.  */
1994   min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
1995   th = conservative_cost_threshold (loop_vinfo,
1996                                     min_profitable_iters);
1997
1998   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
1999   new_loop =
2000     slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
2001                                    niters_of_prolog_loop, ni_name, true,
2002                                    th, true, NULL_TREE, NULL);
2003
2004   gcc_assert (new_loop);
2005 #ifdef ENABLE_CHECKING
2006   slpeel_verify_cfg_after_peeling (new_loop, loop);
2007 #endif
2008
2009   /* Update number of times loop executes.  */
2010   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2011   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2012                 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2013
2014   /* Update the init conditions of the access functions of all data refs.  */
2015   vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
2016
2017   /* After peeling we have to reset scalar evolution analyzer.  */
2018   scev_reset ();
2019
2020   free_original_copy_tables ();
2021 }
2022
2023
2024 /* Function vect_create_cond_for_align_checks.
2025
2026    Create a conditional expression that represents the alignment checks for
2027    all of data references (array element references) whose alignment must be
2028    checked at runtime.
2029
2030    Input:
2031    COND_EXPR  - input conditional expression.  New conditions will be chained
2032                 with logical AND operation.
2033    LOOP_VINFO - two fields of the loop information are used.
2034                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2035                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2036
2037    Output:
2038    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2039                          expression.
2040    The returned value is the conditional expression to be used in the if
2041    statement that controls which version of the loop gets executed at runtime.
2042
2043    The algorithm makes two assumptions:
2044      1) The number of bytes "n" in a vector is a power of 2.
2045      2) An address "a" is aligned if a%n is zero and that this
2046         test can be done as a&(n-1) == 0.  For example, for 16
2047         byte vectors the test is a&0xf == 0.  */
2048
2049 static void
2050 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2051                                    tree *cond_expr,
2052                                    gimple_seq *cond_expr_stmt_list)
2053 {
2054   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2055   VEC(gimple,heap) *may_misalign_stmts
2056     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2057   gimple ref_stmt;
2058   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2059   tree mask_cst;
2060   unsigned int i;
2061   tree psize;
2062   tree int_ptrsize_type;
2063   char tmp_name[20];
2064   tree or_tmp_name = NULL_TREE;
2065   tree and_tmp, and_tmp_name;
2066   gimple and_stmt;
2067   tree ptrsize_zero;
2068   tree part_cond_expr;
2069
2070   /* Check that mask is one less than a power of 2, i.e., mask is
2071      all zeros followed by all ones.  */
2072   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2073
2074   /* CHECKME: what is the best integer or unsigned type to use to hold a
2075      cast from a pointer value?  */
2076   psize = TYPE_SIZE (ptr_type_node);
2077   int_ptrsize_type
2078     = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
2079
2080   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2081      of the first vector of the i'th data reference. */
2082
2083   for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, ref_stmt); i++)
2084     {
2085       gimple_seq new_stmt_list = NULL;
2086       tree addr_base;
2087       tree addr_tmp, addr_tmp_name;
2088       tree or_tmp, new_or_tmp_name;
2089       gimple addr_stmt, or_stmt;
2090
2091       /* create: addr_tmp = (int)(address_of_first_vector) */
2092       addr_base =
2093         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2094                                               NULL_TREE, loop);
2095       if (new_stmt_list != NULL)
2096         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2097
2098       sprintf (tmp_name, "%s%d", "addr2int", i);
2099       addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2100       add_referenced_var (addr_tmp);
2101       addr_tmp_name = make_ssa_name (addr_tmp, NULL);
2102       addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2103                                                 addr_base, NULL_TREE);
2104       SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
2105       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2106
2107       /* The addresses are OR together.  */
2108
2109       if (or_tmp_name != NULL_TREE)
2110         {
2111           /* create: or_tmp = or_tmp | addr_tmp */
2112           sprintf (tmp_name, "%s%d", "orptrs", i);
2113           or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2114           add_referenced_var (or_tmp);
2115           new_or_tmp_name = make_ssa_name (or_tmp, NULL);
2116           or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2117                                                   new_or_tmp_name,
2118                                                   or_tmp_name, addr_tmp_name);
2119           SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
2120           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2121           or_tmp_name = new_or_tmp_name;
2122         }
2123       else
2124         or_tmp_name = addr_tmp_name;
2125
2126     } /* end for i */
2127
2128   mask_cst = build_int_cst (int_ptrsize_type, mask);
2129
2130   /* create: and_tmp = or_tmp & mask  */
2131   and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
2132   add_referenced_var (and_tmp);
2133   and_tmp_name = make_ssa_name (and_tmp, NULL);
2134
2135   and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2136                                            or_tmp_name, mask_cst);
2137   SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
2138   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2139
2140   /* Make and_tmp the left operand of the conditional test against zero.
2141      if and_tmp has a nonzero bit then some address is unaligned.  */
2142   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2143   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2144                                 and_tmp_name, ptrsize_zero);
2145   if (*cond_expr)
2146     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2147                               *cond_expr, part_cond_expr);
2148   else
2149     *cond_expr = part_cond_expr;
2150 }
2151
2152
2153 /* Function vect_vfa_segment_size.
2154
2155    Create an expression that computes the size of segment
2156    that will be accessed for a data reference.  The functions takes into
2157    account that realignment loads may access one more vector.
2158
2159    Input:
2160      DR: The data reference.
2161      VECT_FACTOR: vectorization factor.
2162
2163    Return an expression whose value is the size of segment which will be
2164    accessed by DR.  */
2165
2166 static tree
2167 vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
2168 {
2169   tree segment_length = fold_build2 (MULT_EXPR, integer_type_node,
2170                                      DR_STEP (dr), vect_factor);
2171
2172   if (vect_supportable_dr_alignment (dr) == dr_explicit_realign_optimized)
2173     {
2174       tree vector_size = TYPE_SIZE_UNIT
2175                           (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2176
2177       segment_length = fold_build2 (PLUS_EXPR, integer_type_node,
2178                                     segment_length, vector_size);
2179     }
2180   return fold_convert (sizetype, segment_length);
2181 }
2182
2183
2184 /* Function vect_create_cond_for_alias_checks.
2185
2186    Create a conditional expression that represents the run-time checks for
2187    overlapping of address ranges represented by a list of data references
2188    relations passed as input.
2189
2190    Input:
2191    COND_EXPR  - input conditional expression.  New conditions will be chained
2192                 with logical AND operation.
2193    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2194                 to be checked.
2195
2196    Output:
2197    COND_EXPR - conditional expression.
2198    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2199                          expression.
2200
2201
2202    The returned value is the conditional expression to be used in the if
2203    statement that controls which version of the loop gets executed at runtime.
2204 */
2205
2206 static void
2207 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
2208                                    tree * cond_expr,
2209                                    gimple_seq * cond_expr_stmt_list)
2210 {
2211   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2212   VEC (ddr_p, heap) * may_alias_ddrs =
2213     LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2214   tree vect_factor =
2215     build_int_cst (integer_type_node, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2216
2217   ddr_p ddr;
2218   unsigned int i;
2219   tree part_cond_expr;
2220
2221   /* Create expression
2222      ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
2223      || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
2224      &&
2225      ...
2226      &&
2227      ((store_ptr_n + store_segment_length_n) < load_ptr_n)
2228      || (load_ptr_n + load_segment_length_n) < store_ptr_n))  */
2229
2230   if (VEC_empty (ddr_p, may_alias_ddrs))
2231     return;
2232
2233   for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
2234     {
2235       struct data_reference *dr_a, *dr_b;
2236       gimple dr_group_first_a, dr_group_first_b;
2237       tree addr_base_a, addr_base_b;
2238       tree segment_length_a, segment_length_b;
2239       gimple stmt_a, stmt_b;
2240
2241       dr_a = DDR_A (ddr);
2242       stmt_a = DR_STMT (DDR_A (ddr));
2243       dr_group_first_a = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a));
2244       if (dr_group_first_a)
2245         {
2246           stmt_a = dr_group_first_a;
2247           dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2248         }
2249
2250       dr_b = DDR_B (ddr);
2251       stmt_b = DR_STMT (DDR_B (ddr));
2252       dr_group_first_b = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b));
2253       if (dr_group_first_b)
2254         {
2255           stmt_b = dr_group_first_b;
2256           dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2257         }
2258
2259       addr_base_a =
2260         vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
2261                                               NULL_TREE, loop);
2262       addr_base_b =
2263         vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
2264                                               NULL_TREE, loop);
2265
2266       segment_length_a = vect_vfa_segment_size (dr_a, vect_factor);
2267       segment_length_b = vect_vfa_segment_size (dr_b, vect_factor);
2268
2269       if (vect_print_dump_info (REPORT_DR_DETAILS))
2270         {
2271           fprintf (vect_dump,
2272                    "create runtime check for data references ");
2273           print_generic_expr (vect_dump, DR_REF (dr_a), TDF_SLIM);
2274           fprintf (vect_dump, " and ");
2275           print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
2276         }
2277
2278
2279       part_cond_expr =
2280         fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2281           fold_build2 (LT_EXPR, boolean_type_node,
2282             fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a),
2283               addr_base_a,
2284               segment_length_a),
2285             addr_base_b),
2286           fold_build2 (LT_EXPR, boolean_type_node,
2287             fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
2288               addr_base_b,
2289               segment_length_b),
2290             addr_base_a));
2291
2292       if (*cond_expr)
2293         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2294                                   *cond_expr, part_cond_expr);
2295       else
2296         *cond_expr = part_cond_expr;
2297     }
2298
2299   if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
2300     fprintf (vect_dump, "created %u versioning for alias checks.\n",
2301              VEC_length (ddr_p, may_alias_ddrs));
2302 }
2303
2304
2305 /* Function vect_loop_versioning.
2306
2307    If the loop has data references that may or may not be aligned or/and
2308    has data reference relations whose independence was not proven then
2309    two versions of the loop need to be generated, one which is vectorized
2310    and one which isn't.  A test is then generated to control which of the
2311    loops is executed.  The test checks for the alignment of all of the
2312    data references that may or may not be aligned.  An additional
2313    sequence of runtime tests is generated for each pairs of DDRs whose
2314    independence was not proven.  The vectorized version of loop is
2315    executed only if both alias and alignment tests are passed.
2316
2317    The test generated to check which version of loop is executed
2318    is modified to also check for profitability as indicated by the
2319    cost model initially.
2320
2321    The versioning precondition(s) are placed in *COND_EXPR and
2322    *COND_EXPR_STMT_LIST.  If DO_VERSIONING is true versioning is
2323    also performed, otherwise only the conditions are generated.  */
2324
2325 void
2326 vect_loop_versioning (loop_vec_info loop_vinfo, bool do_versioning,
2327                       tree *cond_expr, gimple_seq *cond_expr_stmt_list)
2328 {
2329   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2330   struct loop *nloop;
2331   basic_block condition_bb;
2332   gimple_stmt_iterator gsi, cond_exp_gsi;
2333   basic_block merge_bb;
2334   basic_block new_exit_bb;
2335   edge new_exit_e, e;
2336   gimple orig_phi, new_phi;
2337   tree arg;
2338   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2339   gimple_seq gimplify_stmt_list = NULL;
2340   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2341   int min_profitable_iters = 0;
2342   unsigned int th;
2343
2344   /* Get profitability threshold for vectorized loop.  */
2345   min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
2346
2347   th = conservative_cost_threshold (loop_vinfo,
2348                                     min_profitable_iters);
2349
2350   *cond_expr =
2351     fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2352                  build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2353
2354   *cond_expr = force_gimple_operand (*cond_expr, cond_expr_stmt_list,
2355                                      false, NULL_TREE);
2356
2357   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2358       vect_create_cond_for_align_checks (loop_vinfo, cond_expr,
2359                                          cond_expr_stmt_list);
2360
2361   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2362     vect_create_cond_for_alias_checks (loop_vinfo, cond_expr,
2363                                        cond_expr_stmt_list);
2364
2365   *cond_expr =
2366     fold_build2 (NE_EXPR, boolean_type_node, *cond_expr, integer_zero_node);
2367   *cond_expr =
2368     force_gimple_operand (*cond_expr, &gimplify_stmt_list, true, NULL_TREE);
2369   gimple_seq_add_seq (cond_expr_stmt_list, gimplify_stmt_list);
2370
2371   /* If we only needed the extra conditions and a new loop copy
2372      bail out here.  */
2373   if (!do_versioning)
2374     return;
2375
2376   initialize_original_copy_tables ();
2377   nloop = loop_version (loop, *cond_expr, &condition_bb,
2378                         prob, prob, REG_BR_PROB_BASE - prob, true);
2379   free_original_copy_tables();
2380
2381   /* Loop versioning violates an assumption we try to maintain during
2382      vectorization - that the loop exit block has a single predecessor.
2383      After versioning, the exit block of both loop versions is the same
2384      basic block (i.e. it has two predecessors). Just in order to simplify
2385      following transformations in the vectorizer, we fix this situation
2386      here by adding a new (empty) block on the exit-edge of the loop,
2387      with the proper loop-exit phis to maintain loop-closed-form.  */
2388
2389   merge_bb = single_exit (loop)->dest;
2390   gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2391   new_exit_bb = split_edge (single_exit (loop));
2392   new_exit_e = single_exit (loop);
2393   e = EDGE_SUCC (new_exit_bb, 0);
2394
2395   for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2396     {
2397       orig_phi = gsi_stmt (gsi);
2398       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
2399                                   new_exit_bb);
2400       arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2401       add_phi_arg (new_phi, arg, new_exit_e,
2402                    gimple_phi_arg_location_from_edge (orig_phi, e));
2403       SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
2404     }
2405
2406   /* End loop-exit-fixes after versioning.  */
2407
2408   update_ssa (TODO_update_ssa);
2409   if (*cond_expr_stmt_list)
2410     {
2411       cond_exp_gsi = gsi_last_bb (condition_bb);
2412       gsi_insert_seq_before (&cond_exp_gsi, *cond_expr_stmt_list,
2413                              GSI_SAME_STMT);
2414       *cond_expr_stmt_list = NULL;
2415     }
2416   *cond_expr = NULL_TREE;
2417 }
2418