gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
   3    Foundation, Inc.
   4    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   5    and Ira Rosen <irar@il.ibm.com>
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 3, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "tm.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "basic-block.h"
  30 #include "diagnostic.h"
  31 #include "tree-flow.h"
  32 #include "tree-dump.h"
  33 #include "cfgloop.h"
  34 #include "cfglayout.h"
  35 #include "expr.h"
  36 #include "toplev.h"
  37 #include "tree-scalar-evolution.h"
  38 #include "tree-vectorizer.h"
  39 #include "langhooks.h"
  40
  41 /*************************************************************************
  42   Simple Loop Peeling Utilities
  43
  44   Utilities to support loop peeling for vectorization purposes.
  45  *************************************************************************/
  46
  47
  48 /* Renames the use *OP_P.  */
  49
  50 static void
  51 rename_use_op (use_operand_p op_p)
  52 {
  53   tree new_name;
  54
  55   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  56     return;
  57
  58   new_name = get_current_def (USE_FROM_PTR (op_p));
  59
  60   /* Something defined outside of the loop.  */
  61   if (!new_name)
  62     return;
  63
  64   /* An ordinary ssa name defined in the loop.  */
  65
  66   SET_USE (op_p, new_name);
  67 }
  68
  69
  70 /* Renames the variables in basic block BB.  */
  71
  72 void
  73 rename_variables_in_bb (basic_block bb)
  74 {
  75   gimple_stmt_iterator gsi;
  76   gimple stmt;
  77   use_operand_p use_p;
  78   ssa_op_iter iter;
  79   edge e;
  80   edge_iterator ei;
  81   struct loop *loop = bb->loop_father;
  82
  83   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  84     {
  85       stmt = gsi_stmt (gsi);
  86       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
  87         rename_use_op (use_p);
  88     }
  89
  90   FOR_EACH_EDGE (e, ei, bb->succs)
  91     {
  92       if (!flow_bb_inside_loop_p (loop, e->dest))
  93         continue;
  94       for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
  95         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
  96     }
  97 }
  98
  99
 100 /* Renames variables in new generated LOOP.  */
 101
 102 void
 103 rename_variables_in_loop (struct loop *loop)
 104 {
 105   unsigned i;
 106   basic_block *bbs;
 107
 108   bbs = get_loop_body (loop);
 109
 110   for (i = 0; i < loop->num_nodes; i++)
 111     rename_variables_in_bb (bbs[i]);
 112
 113   free (bbs);
 114 }
 115
 116
 117 /* Update the PHI nodes of NEW_LOOP.
 118
 119    NEW_LOOP is a duplicate of ORIG_LOOP.
 120    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 121    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 122    executes before it.  */
 123
 124 static void
 125 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 126                                        struct loop *new_loop, bool after)
 127 {
 128   tree new_ssa_name;
 129   gimple phi_new, phi_orig;
 130   tree def;
 131   edge orig_loop_latch = loop_latch_edge (orig_loop);
 132   edge orig_entry_e = loop_preheader_edge (orig_loop);
 133   edge new_loop_exit_e = single_exit (new_loop);
 134   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 135   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 136   gimple_stmt_iterator gsi_new, gsi_orig;
 137
 138   /*
 139      step 1. For each loop-header-phi:
 140              Add the first phi argument for the phi in NEW_LOOP
 141             (the one associated with the entry of NEW_LOOP)
 142
 143      step 2. For each loop-header-phi:
 144              Add the second phi argument for the phi in NEW_LOOP
 145             (the one associated with the latch of NEW_LOOP)
 146
 147      step 3. Update the phis in the successor block of NEW_LOOP.
 148
 149         case 1: NEW_LOOP was placed before ORIG_LOOP:
 150                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 151                 Updating the phis in the successor block can therefore be done
 152                 along with the scanning of the loop header phis, because the
 153                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 154                 phi nodes, organized in the same order.
 155
 156         case 2: NEW_LOOP was placed after ORIG_LOOP:
 157                 The successor block of NEW_LOOP is the original exit block of
 158                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 159                 We postpone updating these phis to a later stage (when
 160                 loop guards are added).
 161    */
 162
 163
 164   /* Scan the phis in the headers of the old and new loops
 165      (they are organized in exactly the same order).  */
 166
 167   for (gsi_new = gsi_start_phis (new_loop->header),
 168        gsi_orig = gsi_start_phis (orig_loop->header);
 169        !gsi_end_p (gsi_new) && !gsi_end_p (gsi_orig);
 170        gsi_next (&gsi_new), gsi_next (&gsi_orig))
 171     {
 172       phi_new = gsi_stmt (gsi_new);
 173       phi_orig = gsi_stmt (gsi_orig);
 174
 175       /* step 1.  */
 176       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 177       add_phi_arg (phi_new, def, new_loop_entry_e);
 178
 179       /* step 2.  */
 180       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 181       if (TREE_CODE (def) != SSA_NAME)
 182         continue;
 183
 184       new_ssa_name = get_current_def (def);
 185       if (!new_ssa_name)
 186         {
 187           /* This only happens if there are no definitions
 188              inside the loop. use the phi_result in this case.  */
 189           new_ssa_name = PHI_RESULT (phi_new);
 190         }
 191
 192       /* An ordinary ssa name defined in the loop.  */
 193       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
 194
 195       /* step 3 (case 1).  */
 196       if (!after)
 197         {
 198           gcc_assert (new_loop_exit_e == orig_entry_e);
 199           SET_PHI_ARG_DEF (phi_orig,
 200                            new_loop_exit_e->dest_idx,
 201                            new_ssa_name);
 202         }
 203     }
 204 }
 205
 206
 207 /* Update PHI nodes for a guard of the LOOP.
 208
 209    Input:
 210    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 211         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 212         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 213         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 214         We denote this bb NEW_MERGE_BB because before the guard code was added
 215         it had a single predecessor (the LOOP header), and now it became a merge
 216         point of two paths - the path that ends with the LOOP exit-edge, and
 217         the path that ends with GUARD_EDGE.
 218    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 219         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 220
 221    ===> The CFG before the guard-code was added:
 222         LOOP_header_bb:
 223           loop_body
 224           if (exit_loop) goto update_bb
 225           else           goto LOOP_header_bb
 226         update_bb:
 227
 228    ==> The CFG after the guard-code was added:
 229         guard_bb:
 230           if (LOOP_guard_condition) goto new_merge_bb
 231           else                      goto LOOP_header_bb
 232         LOOP_header_bb:
 233           loop_body
 234           if (exit_loop_condition) goto new_merge_bb
 235           else                     goto LOOP_header_bb
 236         new_merge_bb:
 237           goto update_bb
 238         update_bb:
 239
 240    ==> The CFG after this function:
 241         guard_bb:
 242           if (LOOP_guard_condition) goto new_merge_bb
 243           else                      goto LOOP_header_bb
 244         LOOP_header_bb:
 245           loop_body
 246           if (exit_loop_condition) goto new_exit_bb
 247           else                     goto LOOP_header_bb
 248         new_exit_bb:
 249         new_merge_bb:
 250           goto update_bb
 251         update_bb:
 252
 253    This function:
 254    1. creates and updates the relevant phi nodes to account for the new
 255       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 256       1.1. Create phi nodes at NEW_MERGE_BB.
 257       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 258            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 259    2. preserves loop-closed-ssa-form by creating the required phi nodes
 260       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 261
 262    There are two flavors to this function:
 263
 264    slpeel_update_phi_nodes_for_guard1:
 265      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 266      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 267      for variables that have phis in the loop header.
 268
 269    slpeel_update_phi_nodes_for_guard2:
 270      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 271      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 272      for variables that have phis in the loop exit.
 273
 274    I.E., the overall structure is:
 275
 276         loop1_preheader_bb:
 277                 guard1 (goto loop1/merge1_bb)
 278         loop1
 279         loop1_exit_bb:
 280                 guard2 (goto merge1_bb/merge2_bb)
 281         merge1_bb
 282         loop2
 283         loop2_exit_bb
 284         merge2_bb
 285         next_bb
 286
 287    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 288    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 289    that have phis in loop1->header).
 290
 291    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 292    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 293    that have phis in next_bb). It also adds some of these phis to
 294    loop1_exit_bb.
 295
 296    slpeel_update_phi_nodes_for_guard1 is always called before
 297    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 298    to create correct data-flow and loop-closed-ssa-form.
 299
 300    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 301    that change between iterations of a loop (and therefore have a phi-node
 302    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 303    phis for variables that are used out of the loop (and therefore have
 304    loop-closed exit phis). Some variables may be both updated between
 305    iterations and used after the loop. This is why in loop1_exit_bb we
 306    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 307    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 308
 309    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 310      an original loop. i.e., we have:
 311
 312            orig_loop
 313            guard_bb (goto LOOP/new_merge)
 314            new_loop <-- LOOP
 315            new_exit
 316            new_merge
 317            next_bb
 318
 319      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 320      have:
 321
 322            new_loop
 323            guard_bb (goto LOOP/new_merge)
 324            orig_loop <-- LOOP
 325            new_exit
 326            new_merge
 327            next_bb
 328
 329      The SSA names defined in the original loop have a current
 330      reaching definition that that records the corresponding new
 331      ssa-name used in the new duplicated loop copy.
 332   */
 333
 334 /* Function slpeel_update_phi_nodes_for_guard1
 335
 336    Input:
 337    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 338    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 339             information.
 340
 341    In the context of the overall structure, we have:
 342
 343         loop1_preheader_bb:
 344                 guard1 (goto loop1/merge1_bb)
 345 LOOP->  loop1
 346         loop1_exit_bb:
 347                 guard2 (goto merge1_bb/merge2_bb)
 348         merge1_bb
 349         loop2
 350         loop2_exit_bb
 351         merge2_bb
 352         next_bb
 353
 354    For each name updated between loop iterations (i.e - for each name that has
 355    an entry (loop-header) phi in LOOP) we create a new phi in:
 356    1. merge1_bb (to account for the edge from guard1)
 357    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 358 */
 359
 360 static void
 361 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 362                                     bool is_new_loop, basic_block *new_exit_bb,
 363                                     bitmap *defs)
 364 {
 365   gimple orig_phi, new_phi;
 366   gimple update_phi, update_phi2;
 367   tree guard_arg, loop_arg;
 368   basic_block new_merge_bb = guard_edge->dest;
 369   edge e = EDGE_SUCC (new_merge_bb, 0);
 370   basic_block update_bb = e->dest;
 371   basic_block orig_bb = loop->header;
 372   edge new_exit_e;
 373   tree current_new_name;
 374   tree name;
 375   gimple_stmt_iterator gsi_orig, gsi_update;
 376
 377   /* Create new bb between loop and new_merge_bb.  */
 378   *new_exit_bb = split_edge (single_exit (loop));
 379
 380   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 381
 382   for (gsi_orig = gsi_start_phis (orig_bb),
 383        gsi_update = gsi_start_phis (update_bb);
 384        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
 385        gsi_next (&gsi_orig), gsi_next (&gsi_update))
 386     {
 387       orig_phi = gsi_stmt (gsi_orig);
 388       update_phi = gsi_stmt (gsi_update);
 389
 390       /* Virtual phi; Mark it for renaming. We actually want to call
 391          mar_sym_for_renaming, but since all ssa renaming datastructures
 392          are going to be freed before we get to call ssa_update, we just
 393          record this name for now in a bitmap, and will mark it for
 394          renaming later.  */
 395       name = PHI_RESULT (orig_phi);
 396       if (!is_gimple_reg (SSA_NAME_VAR (name)))
 397         bitmap_set_bit (vect_memsyms_to_rename, DECL_UID (SSA_NAME_VAR (name)));
 398
 399       /** 1. Handle new-merge-point phis  **/
 400
 401       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 402       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 403                                  new_merge_bb);
 404
 405       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 406             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 407       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 408       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 409
 410       add_phi_arg (new_phi, loop_arg, new_exit_e);
 411       add_phi_arg (new_phi, guard_arg, guard_edge);
 412
 413       /* 1.3. Update phi in successor block.  */
 414       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 415                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 416       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 417       update_phi2 = new_phi;
 418
 419
 420       /** 2. Handle loop-closed-ssa-form phis  **/
 421
 422       if (!is_gimple_reg (PHI_RESULT (orig_phi)))
 423         continue;
 424
 425       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 426       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 427                                  *new_exit_bb);
 428
 429       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 430       add_phi_arg (new_phi, loop_arg, single_exit (loop));
 431
 432       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 433       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 434       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 435
 436       /* 2.4. Record the newly created name with set_current_def.
 437          We want to find a name such that
 438                 name = get_current_def (orig_loop_name)
 439          and to set its current definition as follows:
 440                 set_current_def (name, new_phi_name)
 441
 442          If LOOP is a new loop then loop_arg is already the name we're
 443          looking for. If LOOP is the original loop, then loop_arg is
 444          the orig_loop_name and the relevant name is recorded in its
 445          current reaching definition.  */
 446       if (is_new_loop)
 447         current_new_name = loop_arg;
 448       else
 449         {
 450           current_new_name = get_current_def (loop_arg);
 451           /* current_def is not available only if the variable does not
 452              change inside the loop, in which case we also don't care
 453              about recording a current_def for it because we won't be
 454              trying to create loop-exit-phis for it.  */
 455           if (!current_new_name)
 456             continue;
 457         }
 458       gcc_assert (get_current_def (current_new_name) == NULL_TREE);
 459
 460       set_current_def (current_new_name, PHI_RESULT (new_phi));
 461       bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
 462     }
 463 }
 464
 465
 466 /* Function slpeel_update_phi_nodes_for_guard2
 467
 468    Input:
 469    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 470
 471    In the context of the overall structure, we have:
 472
 473         loop1_preheader_bb:
 474                 guard1 (goto loop1/merge1_bb)
 475         loop1
 476         loop1_exit_bb:
 477                 guard2 (goto merge1_bb/merge2_bb)
 478         merge1_bb
 479 LOOP->  loop2
 480         loop2_exit_bb
 481         merge2_bb
 482         next_bb
 483
 484    For each name used out side the loop (i.e - for each name that has an exit
 485    phi in next_bb) we create a new phi in:
 486    1. merge2_bb (to account for the edge from guard_bb)
 487    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 488    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 489       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 490 */
 491
 492 static void
 493 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 494                                     bool is_new_loop, basic_block *new_exit_bb)
 495 {
 496   gimple orig_phi, new_phi;
 497   gimple update_phi, update_phi2;
 498   tree guard_arg, loop_arg;
 499   basic_block new_merge_bb = guard_edge->dest;
 500   edge e = EDGE_SUCC (new_merge_bb, 0);
 501   basic_block update_bb = e->dest;
 502   edge new_exit_e;
 503   tree orig_def, orig_def_new_name;
 504   tree new_name, new_name2;
 505   tree arg;
 506   gimple_stmt_iterator gsi;
 507
 508   /* Create new bb between loop and new_merge_bb.  */
 509   *new_exit_bb = split_edge (single_exit (loop));
 510
 511   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 512
 513   for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
 514     {
 515       update_phi = gsi_stmt (gsi);
 516       orig_phi = update_phi;
 517       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 518       /* This loop-closed-phi actually doesn't represent a use
 519          out of the loop - the phi arg is a constant.  */
 520       if (TREE_CODE (orig_def) != SSA_NAME)
 521         continue;
 522       orig_def_new_name = get_current_def (orig_def);
 523       arg = NULL_TREE;
 524
 525       /** 1. Handle new-merge-point phis  **/
 526
 527       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 528       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 529                                  new_merge_bb);
 530
 531       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 532             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 533       new_name = orig_def;
 534       new_name2 = NULL_TREE;
 535       if (orig_def_new_name)
 536         {
 537           new_name = orig_def_new_name;
 538           /* Some variables have both loop-entry-phis and loop-exit-phis.
 539              Such variables were given yet newer names by phis placed in
 540              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 541              new_name2 = get_current_def (get_current_def (orig_name)).  */
 542           new_name2 = get_current_def (new_name);
 543         }
 544
 545       if (is_new_loop)
 546         {
 547           guard_arg = orig_def;
 548           loop_arg = new_name;
 549         }
 550       else
 551         {
 552           guard_arg = new_name;
 553           loop_arg = orig_def;
 554         }
 555       if (new_name2)
 556         guard_arg = new_name2;
 557
 558       add_phi_arg (new_phi, loop_arg, new_exit_e);
 559       add_phi_arg (new_phi, guard_arg, guard_edge);
 560
 561       /* 1.3. Update phi in successor block.  */
 562       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 563       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 564       update_phi2 = new_phi;
 565
 566
 567       /** 2. Handle loop-closed-ssa-form phis  **/
 568
 569       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 570       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 571                                  *new_exit_bb);
 572
 573       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 574       add_phi_arg (new_phi, loop_arg, single_exit (loop));
 575
 576       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 577       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 578       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 579
 580
 581       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 582
 583       /* 3.1. Find the relevant names that need an exit-phi in
 584          GUARD_BB, i.e. names for which
 585          slpeel_update_phi_nodes_for_guard1 had not already created a
 586          phi node. This is the case for names that are used outside
 587          the loop (and therefore need an exit phi) but are not updated
 588          across loop iterations (and therefore don't have a
 589          loop-header-phi).
 590
 591          slpeel_update_phi_nodes_for_guard1 is responsible for
 592          creating loop-exit phis in GUARD_BB for names that have a
 593          loop-header-phi.  When such a phi is created we also record
 594          the new name in its current definition.  If this new name
 595          exists, then guard_arg was set to this new name (see 1.2
 596          above).  Therefore, if guard_arg is not this new name, this
 597          is an indication that an exit-phi in GUARD_BB was not yet
 598          created, so we take care of it here.  */
 599       if (guard_arg == new_name2)
 600         continue;
 601       arg = guard_arg;
 602
 603       /* 3.2. Generate new phi node in GUARD_BB:  */
 604       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 605                                  guard_edge->src);
 606
 607       /* 3.3. GUARD_BB has one incoming edge:  */
 608       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 609       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
 610
 611       /* 3.4. Update phi in successor of GUARD_BB:  */
 612       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 613                                                                 == guard_arg);
 614       SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
 615     }
 616 }
 617
 618
 619 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 620    that starts at zero, increases by one and its limit is NITERS.
 621
 622    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 623
 624 void
 625 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 626 {
 627   tree indx_before_incr, indx_after_incr;
 628   gimple cond_stmt;
 629   gimple orig_cond;
 630   edge exit_edge = single_exit (loop);
 631   gimple_stmt_iterator loop_cond_gsi;
 632   gimple_stmt_iterator incr_gsi;
 633   bool insert_after;
 634   tree init = build_int_cst (TREE_TYPE (niters), 0);
 635   tree step = build_int_cst (TREE_TYPE (niters), 1);
 636   LOC loop_loc;
 637   enum tree_code code;
 638
 639   orig_cond = get_loop_exit_condition (loop);
 640   gcc_assert (orig_cond);
 641   loop_cond_gsi = gsi_for_stmt (orig_cond);
 642
 643   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 644   create_iv (init, step, NULL_TREE, loop,
 645              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 646
 647   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 648                                               true, NULL_TREE, true,
 649                                               GSI_SAME_STMT);
 650   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 651                                      true, GSI_SAME_STMT);
 652
 653   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 654   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 655                                  NULL_TREE);
 656
 657   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 658
 659   /* Remove old loop exit test:  */
 660   gsi_remove (&loop_cond_gsi, true);
 661
 662   loop_loc = find_loop_location (loop);
 663   if (dump_file && (dump_flags & TDF_DETAILS))
 664     {
 665       if (loop_loc != UNKNOWN_LOC)
 666         fprintf (dump_file, "\nloop at %s:%d: ",
 667                  LOC_FILE (loop_loc), LOC_LINE (loop_loc));
 668       print_gimple_stmt (dump_file, cond_stmt, 0, TDF_SLIM);
 669     }
 670
 671   loop->nb_iterations = niters;
 672 }
 673
 674
 675 /* Given LOOP this function generates a new copy of it and puts it
 676    on E which is either the entry or exit of LOOP.  */
 677
 678 struct loop *
 679 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
 680 {
 681   struct loop *new_loop;
 682   basic_block *new_bbs, *bbs;
 683   bool at_exit;
 684   bool was_imm_dom;
 685   basic_block exit_dest;
 686   gimple phi;
 687   tree phi_arg;
 688   edge exit, new_exit;
 689   gimple_stmt_iterator gsi;
 690
 691   at_exit = (e == single_exit (loop));
 692   if (!at_exit && e != loop_preheader_edge (loop))
 693     return NULL;
 694
 695   bbs = get_loop_body (loop);
 696
 697   /* Check whether duplication is possible.  */
 698   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 699     {
 700       free (bbs);
 701       return NULL;
 702     }
 703
 704   /* Generate new loop structure.  */
 705   new_loop = duplicate_loop (loop, loop_outer (loop));
 706   if (!new_loop)
 707     {
 708       free (bbs);
 709       return NULL;
 710     }
 711
 712   exit_dest = single_exit (loop)->dest;
 713   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 714                                           exit_dest) == loop->header ?
 715                  true : false);
 716
 717   new_bbs = XNEWVEC (basic_block, loop->num_nodes);
 718
 719   exit = single_exit (loop);
 720   copy_bbs (bbs, loop->num_nodes, new_bbs,
 721             &exit, 1, &new_exit, NULL,
 722             e->src);
 723
 724   /* Duplicating phi args at exit bbs as coming
 725      also from exit of duplicated loop.  */
 726   for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi); gsi_next (&gsi))
 727     {
 728       phi = gsi_stmt (gsi);
 729       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop));
 730       if (phi_arg)
 731         {
 732           edge new_loop_exit_edge;
 733
 734           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 735             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 736           else
 737             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 738
 739           add_phi_arg (phi, phi_arg, new_loop_exit_edge);
 740         }
 741     }
 742
 743   if (at_exit) /* Add the loop copy at exit.  */
 744     {
 745       redirect_edge_and_branch_force (e, new_loop->header);
 746       PENDING_STMT (e) = NULL;
 747       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 748       if (was_imm_dom)
 749         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 750     }
 751   else /* Add the copy at entry.  */
 752     {
 753       edge new_exit_e;
 754       edge entry_e = loop_preheader_edge (loop);
 755       basic_block preheader = entry_e->src;
 756
 757       if (!flow_bb_inside_loop_p (new_loop,
 758                                   EDGE_SUCC (new_loop->header, 0)->dest))
 759         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 760       else
 761         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 762
 763       redirect_edge_and_branch_force (new_exit_e, loop->header);
 764       PENDING_STMT (new_exit_e) = NULL;
 765       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 766                                new_exit_e->src);
 767
 768       /* We have to add phi args to the loop->header here as coming
 769          from new_exit_e edge.  */
 770       for (gsi = gsi_start_phis (loop->header);
 771            !gsi_end_p (gsi);
 772            gsi_next (&gsi))
 773         {
 774           phi = gsi_stmt (gsi);
 775           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 776           if (phi_arg)
 777             add_phi_arg (phi, phi_arg, new_exit_e);
 778         }
 779
 780       redirect_edge_and_branch_force (entry_e, new_loop->header);
 781       PENDING_STMT (entry_e) = NULL;
 782       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 783     }
 784
 785   free (new_bbs);
 786   free (bbs);
 787
 788   return new_loop;
 789 }
 790
 791
 792 /* Given the condition statement COND, put it as the last statement
 793    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 794    Assumes that this is the single exit of the guarded loop.
 795    Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.  */
 796
 797 static edge
 798 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 799                        gimple_seq cond_expr_stmt_list,
 800                        basic_block exit_bb, basic_block dom_bb)
 801 {
 802   gimple_stmt_iterator gsi;
 803   edge new_e, enter_e;
 804   gimple cond_stmt;
 805   gimple_seq gimplify_stmt_list = NULL;
 806
 807   enter_e = EDGE_SUCC (guard_bb, 0);
 808   enter_e->flags &= ~EDGE_FALLTHRU;
 809   enter_e->flags |= EDGE_FALSE_VALUE;
 810   gsi = gsi_last_bb (guard_bb);
 811
 812   cond = force_gimple_operand (cond, &gimplify_stmt_list, true, NULL_TREE);
 813   if (gimplify_stmt_list)
 814     gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
 815   cond_stmt = gimple_build_cond (NE_EXPR,
 816                                  cond, build_int_cst (TREE_TYPE (cond), 0),
 817                                  NULL_TREE, NULL_TREE);
 818   if (cond_expr_stmt_list)
 819     gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
 820
 821   gsi = gsi_last_bb (guard_bb);
 822   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 823
 824   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 825   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 826   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 827   return new_e;
 828 }
 829
 830
 831 /* This function verifies that the following restrictions apply to LOOP:
 832    (1) it is innermost
 833    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 834    (3) it is single entry, single exit
 835    (4) its exit condition is the last stmt in the header
 836    (5) E is the entry/exit edge of LOOP.
 837  */
 838
 839 bool
 840 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 841 {
 842   edge exit_e = single_exit (loop);
 843   edge entry_e = loop_preheader_edge (loop);
 844   gimple orig_cond = get_loop_exit_condition (loop);
 845   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 846
 847   if (need_ssa_update_p (cfun))
 848     return false;
 849
 850   if (loop->inner
 851       /* All loops have an outer scope; the only case loop->outer is NULL is for
 852          the function itself.  */
 853       || !loop_outer (loop)
 854       || loop->num_nodes != 2
 855       || !empty_block_p (loop->latch)
 856       || !single_exit (loop)
 857       /* Verify that new loop exit condition can be trivially modified.  */
 858       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 859       || (e != exit_e && e != entry_e))
 860     return false;
 861
 862   return true;
 863 }
 864
 865 #ifdef ENABLE_CHECKING
 866 static void
 867 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 868                                  struct loop *second_loop)
 869 {
 870   basic_block loop1_exit_bb = single_exit (first_loop)->dest;
 871   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
 872   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
 873
 874   /* A guard that controls whether the second_loop is to be executed or skipped
 875      is placed in first_loop->exit.  first_loop->exit therefore has two
 876      successors - one is the preheader of second_loop, and the other is a bb
 877      after second_loop.
 878    */
 879   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
 880
 881   /* 1. Verify that one of the successors of first_loop->exit is the preheader
 882         of second_loop.  */
 883
 884   /* The preheader of new_loop is expected to have two predecessors:
 885      first_loop->exit and the block that precedes first_loop.  */
 886
 887   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
 888               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
 889                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
 890                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
 891                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
 892
 893   /* Verify that the other successor of first_loop->exit is after the
 894      second_loop.  */
 895   /* TODO */
 896 }
 897 #endif
 898
 899 /* If the run time cost model check determines that vectorization is
 900    not profitable and hence scalar loop should be generated then set
 901    FIRST_NITERS to prologue peeled iterations. This will allow all the
 902    iterations to be executed in the prologue peeled scalar loop.  */
 903
 904 static void
 905 set_prologue_iterations (basic_block bb_before_first_loop,
 906                          tree first_niters,
 907                          struct loop *loop,
 908                          unsigned int th)
 909 {
 910   edge e;
 911   basic_block cond_bb, then_bb;
 912   tree var, prologue_after_cost_adjust_name;
 913   gimple_stmt_iterator gsi;
 914   gimple newphi;
 915   edge e_true, e_false, e_fallthru;
 916   gimple cond_stmt;
 917   gimple_seq gimplify_stmt_list = NULL, stmts = NULL;
 918   tree cost_pre_condition = NULL_TREE;
 919   tree scalar_loop_iters =
 920     unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
 921
 922   e = single_pred_edge (bb_before_first_loop);
 923   cond_bb = split_edge(e);
 924
 925   e = single_pred_edge (bb_before_first_loop);
 926   then_bb = split_edge(e);
 927   set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
 928
 929   e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
 930                                    EDGE_FALSE_VALUE);
 931   set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
 932
 933   e_true = EDGE_PRED (then_bb, 0);
 934   e_true->flags &= ~EDGE_FALLTHRU;
 935   e_true->flags |= EDGE_TRUE_VALUE;
 936
 937   e_fallthru = EDGE_SUCC (then_bb, 0);
 938
 939   cost_pre_condition =
 940     fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
 941                  build_int_cst (TREE_TYPE (scalar_loop_iters), th));
 942   cost_pre_condition =
 943     force_gimple_operand (cost_pre_condition, &gimplify_stmt_list,
 944                           true, NULL_TREE);
 945   cond_stmt = gimple_build_cond (NE_EXPR, cost_pre_condition,
 946                                  build_int_cst (TREE_TYPE (cost_pre_condition),
 947                                                 0), NULL_TREE, NULL_TREE);
 948
 949   gsi = gsi_last_bb (cond_bb);
 950   if (gimplify_stmt_list)
 951     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
 952
 953   gsi = gsi_last_bb (cond_bb);
 954   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 955
 956   var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
 957                         "prologue_after_cost_adjust");
 958   add_referenced_var (var);
 959   prologue_after_cost_adjust_name =
 960     force_gimple_operand (scalar_loop_iters, &stmts, false, var);
 961
 962   gsi = gsi_last_bb (then_bb);
 963   if (stmts)
 964     gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
 965
 966   newphi = create_phi_node (var, bb_before_first_loop);
 967   add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru);
 968   add_phi_arg (newphi, first_niters, e_false);
 969
 970   first_niters = PHI_RESULT (newphi);
 971 }
 972
 973
 974 /* Function slpeel_tree_peel_loop_to_edge.
 975
 976    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
 977    that is placed on the entry (exit) edge E of LOOP. After this transformation
 978    we have two loops one after the other - first-loop iterates FIRST_NITERS
 979    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
 980    If the cost model indicates that it is profitable to emit a scalar
 981    loop instead of the vector one, then the prolog (epilog) loop will iterate
 982    for the entire unchanged scalar iterations of the loop.
 983
 984    Input:
 985    - LOOP: the loop to be peeled.
 986    - E: the exit or entry edge of LOOP.
 987         If it is the entry edge, we peel the first iterations of LOOP. In this
 988         case first-loop is LOOP, and second-loop is the newly created loop.
 989         If it is the exit edge, we peel the last iterations of LOOP. In this
 990         case, first-loop is the newly created loop, and second-loop is LOOP.
 991    - NITERS: the number of iterations that LOOP iterates.
 992    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
 993    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
 994         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
 995         is false, the caller of this function may want to take care of this
 996         (this can be useful if we don't want new stmts added to first-loop).
 997    - TH: cost model profitability threshold of iterations for vectorization.
 998    - CHECK_PROFITABILITY: specify whether cost model check has not occurred
 999                           during versioning and hence needs to occur during
1000                           prologue generation or whether cost model check
1001                           has not occurred during prologue generation and hence
1002                           needs to occur during epilogue generation.
1003
1004
1005    Output:
1006    The function returns a pointer to the new loop-copy, or NULL if it failed
1007    to perform the transformation.
1008
1009    The function generates two if-then-else guards: one before the first loop,
1010    and the other before the second loop:
1011    The first guard is:
1012      if (FIRST_NITERS == 0) then skip the first loop,
1013      and go directly to the second loop.
1014    The second guard is:
1015      if (FIRST_NITERS == NITERS) then skip the second loop.
1016
1017    If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1018    then the generated condition is combined with COND_EXPR and the
1019    statements in COND_EXPR_STMT_LIST are emitted together with it.
1020
1021    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1022    FORNOW the resulting code will not be in loop-closed-ssa form.
1023 */
1024
1025 static struct loop*
1026 slpeel_tree_peel_loop_to_edge (struct loop *loop,
1027                                edge e, tree first_niters,
1028                                tree niters, bool update_first_loop_count,
1029                                unsigned int th, bool check_profitability,
1030                                tree cond_expr, gimple_seq cond_expr_stmt_list)
1031 {
1032   struct loop *new_loop = NULL, *first_loop, *second_loop;
1033   edge skip_e;
1034   tree pre_condition = NULL_TREE;
1035   bitmap definitions;
1036   basic_block bb_before_second_loop, bb_after_second_loop;
1037   basic_block bb_before_first_loop;
1038   basic_block bb_between_loops;
1039   basic_block new_exit_bb;
1040   edge exit_e = single_exit (loop);
1041   LOC loop_loc;
1042   tree cost_pre_condition = NULL_TREE;
1043
1044   if (!slpeel_can_duplicate_loop_p (loop, e))
1045     return NULL;
1046
1047   /* We have to initialize cfg_hooks. Then, when calling
1048    cfg_hooks->split_edge, the function tree_split_edge
1049    is actually called and, when calling cfg_hooks->duplicate_block,
1050    the function tree_duplicate_bb is called.  */
1051   gimple_register_cfg_hooks ();
1052
1053
1054   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1055         Resulting CFG would be:
1056
1057         first_loop:
1058         do {
1059         } while ...
1060
1061         second_loop:
1062         do {
1063         } while ...
1064
1065         orig_exit_bb:
1066    */
1067
1068   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1069     {
1070       loop_loc = find_loop_location (loop);
1071       if (dump_file && (dump_flags & TDF_DETAILS))
1072         {
1073           if (loop_loc != UNKNOWN_LOC)
1074             fprintf (dump_file, "\n%s:%d: note: ",
1075                      LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1076           fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1077         }
1078       return NULL;
1079     }
1080
1081   if (e == exit_e)
1082     {
1083       /* NEW_LOOP was placed after LOOP.  */
1084       first_loop = loop;
1085       second_loop = new_loop;
1086     }
1087   else
1088     {
1089       /* NEW_LOOP was placed before LOOP.  */
1090       first_loop = new_loop;
1091       second_loop = loop;
1092     }
1093
1094   definitions = ssa_names_to_replace ();
1095   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1096   rename_variables_in_loop (new_loop);
1097
1098
1099   /* 2.  Add the guard code in one of the following ways:
1100
1101      2.a Add the guard that controls whether the first loop is executed.
1102          This occurs when this function is invoked for prologue or epilogue
1103          generation and when the cost model check can be done at compile time.
1104
1105          Resulting CFG would be:
1106
1107          bb_before_first_loop:
1108          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1109                                 GOTO first-loop
1110
1111          first_loop:
1112          do {
1113          } while ...
1114
1115          bb_before_second_loop:
1116
1117          second_loop:
1118          do {
1119          } while ...
1120
1121          orig_exit_bb:
1122
1123      2.b Add the cost model check that allows the prologue
1124          to iterate for the entire unchanged scalar
1125          iterations of the loop in the event that the cost
1126          model indicates that the scalar loop is more
1127          profitable than the vector one. This occurs when
1128          this function is invoked for prologue generation
1129          and the cost model check needs to be done at run
1130          time.
1131
1132          Resulting CFG after prologue peeling would be:
1133
1134          if (scalar_loop_iterations <= th)
1135            FIRST_NITERS = scalar_loop_iterations
1136
1137          bb_before_first_loop:
1138          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1139                                 GOTO first-loop
1140
1141          first_loop:
1142          do {
1143          } while ...
1144
1145          bb_before_second_loop:
1146
1147          second_loop:
1148          do {
1149          } while ...
1150
1151          orig_exit_bb:
1152
1153      2.c Add the cost model check that allows the epilogue
1154          to iterate for the entire unchanged scalar
1155          iterations of the loop in the event that the cost
1156          model indicates that the scalar loop is more
1157          profitable than the vector one. This occurs when
1158          this function is invoked for epilogue generation
1159          and the cost model check needs to be done at run
1160          time.  This check is combined with any pre-existing
1161          check in COND_EXPR to avoid versioning.
1162
1163          Resulting CFG after prologue peeling would be:
1164
1165          bb_before_first_loop:
1166          if ((scalar_loop_iterations <= th)
1167              ||
1168              FIRST_NITERS == 0) GOTO bb_before_second_loop
1169                                 GOTO first-loop
1170
1171          first_loop:
1172          do {
1173          } while ...
1174
1175          bb_before_second_loop:
1176
1177          second_loop:
1178          do {
1179          } while ...
1180
1181          orig_exit_bb:
1182   */
1183
1184   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1185   bb_before_second_loop = split_edge (single_exit (first_loop));
1186
1187   /* Epilogue peeling.  */
1188   if (!update_first_loop_count)
1189     {
1190       pre_condition =
1191         fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1192                      build_int_cst (TREE_TYPE (first_niters), 0));
1193       if (check_profitability)
1194         {
1195           tree scalar_loop_iters
1196             = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1197                                         (loop_vec_info_for_loop (loop)));
1198           cost_pre_condition =
1199             fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1200                          build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1201
1202           pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1203                                        cost_pre_condition, pre_condition);
1204         }
1205       if (cond_expr)
1206         {
1207           pre_condition =
1208             fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1209                          pre_condition,
1210                          fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1211                                       cond_expr));
1212         }
1213     }
1214
1215   /* Prologue peeling.  */
1216   else
1217     {
1218       if (check_profitability)
1219         set_prologue_iterations (bb_before_first_loop, first_niters,
1220                                  loop, th);
1221
1222       pre_condition =
1223         fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1224                      build_int_cst (TREE_TYPE (first_niters), 0));
1225     }
1226
1227   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1228                                   cond_expr_stmt_list,
1229                                   bb_before_second_loop, bb_before_first_loop);
1230   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1231                                       first_loop == new_loop,
1232                                       &new_exit_bb, &definitions);
1233
1234
1235   /* 3. Add the guard that controls whether the second loop is executed.
1236         Resulting CFG would be:
1237
1238         bb_before_first_loop:
1239         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1240                                GOTO first-loop
1241
1242         first_loop:
1243         do {
1244         } while ...
1245
1246         bb_between_loops:
1247         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1248                                     GOTO bb_before_second_loop
1249
1250         bb_before_second_loop:
1251
1252         second_loop:
1253         do {
1254         } while ...
1255
1256         bb_after_second_loop:
1257
1258         orig_exit_bb:
1259    */
1260
1261   bb_between_loops = new_exit_bb;
1262   bb_after_second_loop = split_edge (single_exit (second_loop));
1263
1264   pre_condition =
1265         fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1266   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1267                                   bb_after_second_loop, bb_before_first_loop);
1268   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1269                                      second_loop == new_loop, &new_exit_bb);
1270
1271   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1272    */
1273   if (update_first_loop_count)
1274     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1275
1276   BITMAP_FREE (definitions);
1277   delete_update_ssa ();
1278
1279   return new_loop;
1280 }
1281
1282 /* Function vect_get_loop_location.
1283
1284    Extract the location of the loop in the source code.
1285    If the loop is not well formed for vectorization, an estimated
1286    location is calculated.
1287    Return the loop location if succeed and NULL if not.  */
1288
1289 LOC
1290 find_loop_location (struct loop *loop)
1291 {
1292   gimple stmt = NULL;
1293   basic_block bb;
1294   gimple_stmt_iterator si;
1295
1296   if (!loop)
1297     return UNKNOWN_LOC;
1298
1299   stmt = get_loop_exit_condition (loop);
1300
1301   if (stmt && gimple_location (stmt) != UNKNOWN_LOC)
1302     return gimple_location (stmt);
1303
1304   /* If we got here the loop is probably not "well formed",
1305      try to estimate the loop location */
1306
1307   if (!loop->header)
1308     return UNKNOWN_LOC;
1309
1310   bb = loop->header;
1311
1312   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1313     {
1314       stmt = gsi_stmt (si);
1315       if (gimple_location (stmt) != UNKNOWN_LOC)
1316         return gimple_location (stmt);
1317     }
1318
1319   return UNKNOWN_LOC;
1320 }
1321
1322
1323 /* This function builds ni_name = number of iterations loop executes
1324    on the loop preheader.  If SEQ is given the stmt is instead emitted
1325    there.  */
1326
1327 static tree
1328 vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
1329 {
1330   tree ni_name, var;
1331   gimple_seq stmts = NULL;
1332   edge pe;
1333   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1334   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1335
1336   var = create_tmp_var (TREE_TYPE (ni), "niters");
1337   add_referenced_var (var);
1338   ni_name = force_gimple_operand (ni, &stmts, false, var);
1339
1340   pe = loop_preheader_edge (loop);
1341   if (stmts)
1342     {
1343       if (seq)
1344         gimple_seq_add_seq (&seq, stmts);
1345       else
1346         {
1347           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1348           gcc_assert (!new_bb);
1349         }
1350     }
1351
1352   return ni_name;
1353 }
1354
1355
1356 /* This function generates the following statements:
1357
1358  ni_name = number of iterations loop executes
1359  ratio = ni_name / vf
1360  ratio_mult_vf_name = ratio * vf
1361
1362  and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
1363  if that is non-NULL.  */
1364
1365 static void
1366 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1367                                  tree *ni_name_ptr,
1368                                  tree *ratio_mult_vf_name_ptr,
1369                                  tree *ratio_name_ptr,
1370                                  gimple_seq cond_expr_stmt_list)
1371 {
1372
1373   edge pe;
1374   basic_block new_bb;
1375   gimple_seq stmts;
1376   tree ni_name;
1377   tree var;
1378   tree ratio_name;
1379   tree ratio_mult_vf_name;
1380   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1381   tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1382   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1383   tree log_vf;
1384
1385   pe = loop_preheader_edge (loop);
1386
1387   /* Generate temporary variable that contains
1388      number of iterations loop executes.  */
1389
1390   ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
1391   log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
1392
1393   /* Create: ratio = ni >> log2(vf) */
1394
1395   ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
1396   if (!is_gimple_val (ratio_name))
1397     {
1398       var = create_tmp_var (TREE_TYPE (ni), "bnd");
1399       add_referenced_var (var);
1400
1401       stmts = NULL;
1402       ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
1403       if (cond_expr_stmt_list)
1404         gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1405       else
1406         {
1407           pe = loop_preheader_edge (loop);
1408           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1409           gcc_assert (!new_bb);
1410         }
1411     }
1412
1413   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
1414
1415   ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
1416                                     ratio_name, log_vf);
1417   if (!is_gimple_val (ratio_mult_vf_name))
1418     {
1419       var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1420       add_referenced_var (var);
1421
1422       stmts = NULL;
1423       ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
1424                                                  true, var);
1425       if (cond_expr_stmt_list)
1426         gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1427       else
1428         {
1429           pe = loop_preheader_edge (loop);
1430           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1431           gcc_assert (!new_bb);
1432         }
1433     }
1434
1435   *ni_name_ptr = ni_name;
1436   *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1437   *ratio_name_ptr = ratio_name;
1438
1439   return;
1440 }
1441
1442 /* Function vect_can_advance_ivs_p
1443
1444    In case the number of iterations that LOOP iterates is unknown at compile
1445    time, an epilog loop will be generated, and the loop induction variables
1446    (IVs) will be "advanced" to the value they are supposed to take just before
1447    the epilog loop.  Here we check that the access function of the loop IVs
1448    and the expression that represents the loop bound are simple enough.
1449    These restrictions will be relaxed in the future.  */
1450
1451 bool
1452 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1453 {
1454   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1455   basic_block bb = loop->header;
1456   gimple phi;
1457   gimple_stmt_iterator gsi;
1458
1459   /* Analyze phi functions of the loop header.  */
1460
1461   if (vect_print_dump_info (REPORT_DETAILS))
1462     fprintf (vect_dump, "vect_can_advance_ivs_p:");
1463
1464   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1465     {
1466       tree access_fn = NULL;
1467       tree evolution_part;
1468
1469       phi = gsi_stmt (gsi);
1470       if (vect_print_dump_info (REPORT_DETAILS))
1471         {
1472           fprintf (vect_dump, "Analyze phi: ");
1473           print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1474         }
1475
1476       /* Skip virtual phi's. The data dependences that are associated with
1477          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1478
1479       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1480         {
1481           if (vect_print_dump_info (REPORT_DETAILS))
1482             fprintf (vect_dump, "virtual phi. skip.");
1483           continue;
1484         }
1485
1486       /* Skip reduction phis.  */
1487
1488       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1489         {
1490           if (vect_print_dump_info (REPORT_DETAILS))
1491             fprintf (vect_dump, "reduc phi. skip.");
1492           continue;
1493         }
1494
1495       /* Analyze the evolution function.  */
1496
1497       access_fn = instantiate_parameters
1498         (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1499
1500       if (!access_fn)
1501         {
1502           if (vect_print_dump_info (REPORT_DETAILS))
1503             fprintf (vect_dump, "No Access function.");
1504           return false;
1505         }
1506
1507       if (vect_print_dump_info (REPORT_DETAILS))
1508         {
1509           fprintf (vect_dump, "Access function of PHI: ");
1510           print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1511         }
1512
1513       evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1514
1515       if (evolution_part == NULL_TREE)
1516         {
1517           if (vect_print_dump_info (REPORT_DETAILS))
1518             fprintf (vect_dump, "No evolution.");
1519           return false;
1520         }
1521
1522       /* FORNOW: We do not transform initial conditions of IVs
1523          which evolution functions are a polynomial of degree >= 2.  */
1524
1525       if (tree_is_chrec (evolution_part))
1526         return false;
1527     }
1528
1529   return true;
1530 }
1531
1532
1533 /*   Function vect_update_ivs_after_vectorizer.
1534
1535      "Advance" the induction variables of LOOP to the value they should take
1536      after the execution of LOOP.  This is currently necessary because the
1537      vectorizer does not handle induction variables that are used after the
1538      loop.  Such a situation occurs when the last iterations of LOOP are
1539      peeled, because:
1540      1. We introduced new uses after LOOP for IVs that were not originally used
1541         after LOOP: the IVs of LOOP are now used by an epilog loop.
1542      2. LOOP is going to be vectorized; this means that it will iterate N/VF
1543         times, whereas the loop IVs should be bumped N times.
1544
1545      Input:
1546      - LOOP - a loop that is going to be vectorized. The last few iterations
1547               of LOOP were peeled.
1548      - NITERS - the number of iterations that LOOP executes (before it is
1549                 vectorized). i.e, the number of times the ivs should be bumped.
1550      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1551                   coming out from LOOP on which there are uses of the LOOP ivs
1552                   (this is the path from LOOP->exit to epilog_loop->preheader).
1553
1554                   The new definitions of the ivs are placed in LOOP->exit.
1555                   The phi args associated with the edge UPDATE_E in the bb
1556                   UPDATE_E->dest are updated accordingly.
1557
1558      Assumption 1: Like the rest of the vectorizer, this function assumes
1559      a single loop exit that has a single predecessor.
1560
1561      Assumption 2: The phi nodes in the LOOP header and in update_bb are
1562      organized in the same order.
1563
1564      Assumption 3: The access function of the ivs is simple enough (see
1565      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1566
1567      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1568      coming out of LOOP on which the ivs of LOOP are used (this is the path
1569      that leads to the epilog loop; other paths skip the epilog loop).  This
1570      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1571      needs to have its phis updated.
1572  */
1573
1574 static void
1575 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1576                                   edge update_e)
1577 {
1578   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1579   basic_block exit_bb = single_exit (loop)->dest;
1580   gimple phi, phi1;
1581   gimple_stmt_iterator gsi, gsi1;
1582   basic_block update_bb = update_e->dest;
1583
1584   /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1585
1586   /* Make sure there exists a single-predecessor exit bb:  */
1587   gcc_assert (single_pred_p (exit_bb));
1588
1589   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1590        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1591        gsi_next (&gsi), gsi_next (&gsi1))
1592     {
1593       tree access_fn = NULL;
1594       tree evolution_part;
1595       tree init_expr;
1596       tree step_expr;
1597       tree var, ni, ni_name;
1598       gimple_stmt_iterator last_gsi;
1599
1600       phi = gsi_stmt (gsi);
1601       phi1 = gsi_stmt (gsi1);
1602       if (vect_print_dump_info (REPORT_DETAILS))
1603         {
1604           fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
1605           print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1606         }
1607
1608       /* Skip virtual phi's.  */
1609       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1610         {
1611           if (vect_print_dump_info (REPORT_DETAILS))
1612             fprintf (vect_dump, "virtual phi. skip.");
1613           continue;
1614         }
1615
1616       /* Skip reduction phis.  */
1617       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1618         {
1619           if (vect_print_dump_info (REPORT_DETAILS))
1620             fprintf (vect_dump, "reduc phi. skip.");
1621           continue;
1622         }
1623
1624       access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
1625       gcc_assert (access_fn);
1626       STRIP_NOPS (access_fn);
1627       evolution_part =
1628          unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
1629       gcc_assert (evolution_part != NULL_TREE);
1630
1631       /* FORNOW: We do not support IVs whose evolution function is a polynomial
1632          of degree >= 2 or exponential.  */
1633       gcc_assert (!tree_is_chrec (evolution_part));
1634
1635       step_expr = evolution_part;
1636       init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1637                                                                loop->num));
1638
1639       if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
1640         ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
1641                           init_expr,
1642                           fold_build2 (MULT_EXPR, sizetype,
1643                                        fold_convert (sizetype, niters),
1644                                        step_expr));
1645       else
1646         ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
1647                           fold_build2 (MULT_EXPR, TREE_TYPE (init_expr),
1648                                        fold_convert (TREE_TYPE (init_expr),
1649                                                      niters),
1650                                        step_expr),
1651                           init_expr);
1652
1653
1654
1655       var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
1656       add_referenced_var (var);
1657
1658       last_gsi = gsi_last_bb (exit_bb);
1659       ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1660                                           true, GSI_SAME_STMT);
1661
1662       /* Fix phi expressions in the successor bb.  */
1663       SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
1664     }
1665 }
1666
1667 /* Return the more conservative threshold between the
1668    min_profitable_iters returned by the cost model and the user
1669    specified threshold, if provided.  */
1670
1671 static unsigned int
1672 conservative_cost_threshold (loop_vec_info loop_vinfo,
1673                              int min_profitable_iters)
1674 {
1675   unsigned int th;
1676   int min_scalar_loop_bound;
1677
1678   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1679                             * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
1680
1681   /* Use the cost model only if it is more conservative than user specified
1682      threshold.  */
1683   th = (unsigned) min_scalar_loop_bound;
1684   if (min_profitable_iters
1685       && (!min_scalar_loop_bound
1686           || min_profitable_iters > min_scalar_loop_bound))
1687     th = (unsigned) min_profitable_iters;
1688
1689   if (th && vect_print_dump_info (REPORT_COST))
1690     fprintf (vect_dump, "Vectorization may not be profitable.");
1691
1692   return th;
1693 }
1694
1695 /* Function vect_do_peeling_for_loop_bound
1696
1697    Peel the last iterations of the loop represented by LOOP_VINFO.
1698    The peeled iterations form a new epilog loop.  Given that the loop now
1699    iterates NITERS times, the new epilog loop iterates
1700    NITERS % VECTORIZATION_FACTOR times.
1701
1702    The original loop will later be made to iterate
1703    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1704
1705    COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1706    test.  */
1707
1708 void
1709 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1710                                 tree cond_expr, gimple_seq cond_expr_stmt_list)
1711 {
1712   tree ni_name, ratio_mult_vf_name;
1713   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1714   struct loop *new_loop;
1715   edge update_e;
1716   basic_block preheader;
1717   int loop_num;
1718   bool check_profitability = false;
1719   unsigned int th = 0;
1720   int min_profitable_iters;
1721
1722   if (vect_print_dump_info (REPORT_DETAILS))
1723     fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
1724
1725   initialize_original_copy_tables ();
1726
1727   /* Generate the following variables on the preheader of original loop:
1728
1729      ni_name = number of iteration the original loop executes
1730      ratio = ni_name / vf
1731      ratio_mult_vf_name = ratio * vf  */
1732   vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1733                                    &ratio_mult_vf_name, ratio,
1734                                    cond_expr_stmt_list);
1735
1736   loop_num  = loop->num;
1737
1738   /* If cost model check not done during versioning and
1739      peeling for alignment.  */
1740   if (!VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1741       && !VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo))
1742       && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)
1743       && !cond_expr)
1744     {
1745       check_profitability = true;
1746
1747       /* Get profitability threshold for vectorized loop.  */
1748       min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
1749
1750       th = conservative_cost_threshold (loop_vinfo,
1751                                         min_profitable_iters);
1752     }
1753
1754   new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
1755                                             ratio_mult_vf_name, ni_name, false,
1756                                             th, check_profitability,
1757                                             cond_expr, cond_expr_stmt_list);
1758   gcc_assert (new_loop);
1759   gcc_assert (loop_num == loop->num);
1760 #ifdef ENABLE_CHECKING
1761   slpeel_verify_cfg_after_peeling (loop, new_loop);
1762 #endif
1763
1764   /* A guard that controls whether the new_loop is to be executed or skipped
1765      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
1766      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
1767      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
1768      is on the path where the LOOP IVs are used and need to be updated.  */
1769
1770   preheader = loop_preheader_edge (new_loop)->src;
1771   if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1772     update_e = EDGE_PRED (preheader, 0);
1773   else
1774     update_e = EDGE_PRED (preheader, 1);
1775
1776   /* Update IVs of original loop as if they were advanced
1777      by ratio_mult_vf_name steps.  */
1778   vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1779
1780   /* After peeling we have to reset scalar evolution analyzer.  */
1781   scev_reset ();
1782
1783   free_original_copy_tables ();
1784 }
1785
1786
1787 /* Function vect_gen_niters_for_prolog_loop
1788
1789    Set the number of iterations for the loop represented by LOOP_VINFO
1790    to the minimum between LOOP_NITERS (the original iteration count of the loop)
1791    and the misalignment of DR - the data reference recorded in
1792    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
1793    this loop, the data reference DR will refer to an aligned location.
1794
1795    The following computation is generated:
1796
1797    If the misalignment of DR is known at compile time:
1798      addr_mis = int mis = DR_MISALIGNMENT (dr);
1799    Else, compute address misalignment in bytes:
1800      addr_mis = addr & (vectype_size - 1)
1801
1802    prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1803
1804    (elem_size = element type size; an element is the scalar element whose type
1805    is the inner type of the vectype)
1806
1807    When the step of the data-ref in the loop is not 1 (as in interleaved data
1808    and SLP), the number of iterations of the prolog must be divided by the step
1809    (which is equal to the size of interleaved group).
1810
1811    The above formulas assume that VF == number of elements in the vector. This
1812    may not hold when there are multiple-types in the loop.
1813    In this case, for some data-references in the loop the VF does not represent
1814    the number of elements that fit in the vector.  Therefore, instead of VF we
1815    use TYPE_VECTOR_SUBPARTS.  */
1816
1817 static tree
1818 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
1819 {
1820   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1821   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1822   tree var;
1823   gimple_seq stmts;
1824   tree iters, iters_name;
1825   edge pe;
1826   basic_block new_bb;
1827   gimple dr_stmt = DR_STMT (dr);
1828   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1829   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1830   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1831   tree niters_type = TREE_TYPE (loop_niters);
1832   int step = 1;
1833   int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1834   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1835
1836   if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1837     step = DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1838
1839   pe = loop_preheader_edge (loop);
1840
1841   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1842     {
1843       int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
1844       int elem_misalign = byte_misalign / element_size;
1845
1846       if (vect_print_dump_info (REPORT_DETAILS))
1847         fprintf (vect_dump, "known alignment = %d.", byte_misalign);
1848
1849       iters = build_int_cst (niters_type,
1850                      (((nelements - elem_misalign) & (nelements - 1)) / step));
1851     }
1852   else
1853     {
1854       gimple_seq new_stmts = NULL;
1855       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1856                                                 &new_stmts, NULL_TREE, loop);
1857       tree ptr_type = TREE_TYPE (start_addr);
1858       tree size = TYPE_SIZE (ptr_type);
1859       tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
1860       tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
1861       tree elem_size_log =
1862         build_int_cst (type, exact_log2 (vectype_align/nelements));
1863       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1864       tree nelements_tree = build_int_cst (type, nelements);
1865       tree byte_misalign;
1866       tree elem_misalign;
1867
1868       new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1869       gcc_assert (!new_bb);
1870
1871       /* Create:  byte_misalign = addr & (vectype_size - 1)  */
1872       byte_misalign =
1873         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
1874
1875       /* Create:  elem_misalign = byte_misalign / element_size  */
1876       elem_misalign =
1877         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1878
1879       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
1880       iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1881       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1882       iters = fold_convert (niters_type, iters);
1883     }
1884
1885   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
1886   /* If the loop bound is known at compile time we already verified that it is
1887      greater than vf; since the misalignment ('iters') is at most vf, there's
1888      no need to generate the MIN_EXPR in this case.  */
1889   if (TREE_CODE (loop_niters) != INTEGER_CST)
1890     iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1891
1892   if (vect_print_dump_info (REPORT_DETAILS))
1893     {
1894       fprintf (vect_dump, "niters for prolog loop: ");
1895       print_generic_expr (vect_dump, iters, TDF_SLIM);
1896     }
1897
1898   var = create_tmp_var (niters_type, "prolog_loop_niters");
1899   add_referenced_var (var);
1900   stmts = NULL;
1901   iters_name = force_gimple_operand (iters, &stmts, false, var);
1902
1903   /* Insert stmt on loop preheader edge.  */
1904   if (stmts)
1905     {
1906       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1907       gcc_assert (!new_bb);
1908     }
1909
1910   return iters_name;
1911 }
1912
1913
1914 /* Function vect_update_init_of_dr
1915
1916    NITERS iterations were peeled from LOOP.  DR represents a data reference
1917    in LOOP.  This function updates the information recorded in DR to
1918    account for the fact that the first NITERS iterations had already been
1919    executed.  Specifically, it updates the OFFSET field of DR.  */
1920
1921 static void
1922 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1923 {
1924   tree offset = DR_OFFSET (dr);
1925
1926   niters = fold_build2 (MULT_EXPR, sizetype,
1927                         fold_convert (sizetype, niters),
1928                         fold_convert (sizetype, DR_STEP (dr)));
1929   offset = fold_build2 (PLUS_EXPR, sizetype,
1930                         fold_convert (sizetype, offset), niters);
1931   DR_OFFSET (dr) = offset;
1932 }
1933
1934
1935 /* Function vect_update_inits_of_drs
1936
1937    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1938    This function updates the information recorded for the data references in
1939    the loop to account for the fact that the first NITERS iterations had
1940    already been executed.  Specifically, it updates the initial_condition of
1941    the access_function of all the data_references in the loop.  */
1942
1943 static void
1944 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1945 {
1946   unsigned int i;
1947   VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1948   struct data_reference *dr;
1949
1950   if (vect_print_dump_info (REPORT_DETAILS))
1951     fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
1952
1953   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1954     vect_update_init_of_dr (dr, niters);
1955 }
1956
1957
1958 /* Function vect_do_peeling_for_alignment
1959
1960    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
1961    'niters' is set to the misalignment of one of the data references in the
1962    loop, thereby forcing it to refer to an aligned location at the beginning
1963    of the execution of this loop.  The data reference for which we are
1964    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
1965
1966 void
1967 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
1968 {
1969   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1970   tree niters_of_prolog_loop, ni_name;
1971   tree n_iters;
1972   struct loop *new_loop;
1973   unsigned int th = 0;
1974   int min_profitable_iters;
1975
1976   if (vect_print_dump_info (REPORT_DETAILS))
1977     fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
1978
1979   initialize_original_copy_tables ();
1980
1981   ni_name = vect_build_loop_niters (loop_vinfo, NULL);
1982   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
1983
1984
1985   /* Get profitability threshold for vectorized loop.  */
1986   min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
1987   th = conservative_cost_threshold (loop_vinfo,
1988                                     min_profitable_iters);
1989
1990   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
1991   new_loop =
1992     slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
1993                                    niters_of_prolog_loop, ni_name, true,
1994                                    th, true, NULL_TREE, NULL);
1995
1996   gcc_assert (new_loop);
1997 #ifdef ENABLE_CHECKING
1998   slpeel_verify_cfg_after_peeling (new_loop, loop);
1999 #endif
2000
2001   /* Update number of times loop executes.  */
2002   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2003   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2004                 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2005
2006   /* Update the init conditions of the access functions of all data refs.  */
2007   vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
2008
2009   /* After peeling we have to reset scalar evolution analyzer.  */
2010   scev_reset ();
2011
2012   free_original_copy_tables ();
2013 }
2014
2015
2016 /* Function vect_create_cond_for_align_checks.
2017
2018    Create a conditional expression that represents the alignment checks for
2019    all of data references (array element references) whose alignment must be
2020    checked at runtime.
2021
2022    Input:
2023    COND_EXPR  - input conditional expression.  New conditions will be chained
2024                 with logical AND operation.
2025    LOOP_VINFO - two fields of the loop information are used.
2026                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2027                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2028
2029    Output:
2030    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2031                          expression.
2032    The returned value is the conditional expression to be used in the if
2033    statement that controls which version of the loop gets executed at runtime.
2034
2035    The algorithm makes two assumptions:
2036      1) The number of bytes "n" in a vector is a power of 2.
2037      2) An address "a" is aligned if a%n is zero and that this
2038         test can be done as a&(n-1) == 0.  For example, for 16
2039         byte vectors the test is a&0xf == 0.  */
2040
2041 static void
2042 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2043                                    tree *cond_expr,
2044                                    gimple_seq *cond_expr_stmt_list)
2045 {
2046   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2047   VEC(gimple,heap) *may_misalign_stmts
2048     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2049   gimple ref_stmt;
2050   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2051   tree mask_cst;
2052   unsigned int i;
2053   tree psize;
2054   tree int_ptrsize_type;
2055   char tmp_name[20];
2056   tree or_tmp_name = NULL_TREE;
2057   tree and_tmp, and_tmp_name;
2058   gimple and_stmt;
2059   tree ptrsize_zero;
2060   tree part_cond_expr;
2061
2062   /* Check that mask is one less than a power of 2, i.e., mask is
2063      all zeros followed by all ones.  */
2064   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2065
2066   /* CHECKME: what is the best integer or unsigned type to use to hold a
2067      cast from a pointer value?  */
2068   psize = TYPE_SIZE (ptr_type_node);
2069   int_ptrsize_type
2070     = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
2071
2072   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2073      of the first vector of the i'th data reference. */
2074
2075   for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, ref_stmt); i++)
2076     {
2077       gimple_seq new_stmt_list = NULL;
2078       tree addr_base;
2079       tree addr_tmp, addr_tmp_name;
2080       tree or_tmp, new_or_tmp_name;
2081       gimple addr_stmt, or_stmt;
2082
2083       /* create: addr_tmp = (int)(address_of_first_vector) */
2084       addr_base =
2085         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2086                                               NULL_TREE, loop);
2087       if (new_stmt_list != NULL)
2088         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2089
2090       sprintf (tmp_name, "%s%d", "addr2int", i);
2091       addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2092       add_referenced_var (addr_tmp);
2093       addr_tmp_name = make_ssa_name (addr_tmp, NULL);
2094       addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2095                                                 addr_base, NULL_TREE);
2096       SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
2097       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2098
2099       /* The addresses are OR together.  */
2100
2101       if (or_tmp_name != NULL_TREE)
2102         {
2103           /* create: or_tmp = or_tmp | addr_tmp */
2104           sprintf (tmp_name, "%s%d", "orptrs", i);
2105           or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2106           add_referenced_var (or_tmp);
2107           new_or_tmp_name = make_ssa_name (or_tmp, NULL);
2108           or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2109                                                   new_or_tmp_name,
2110                                                   or_tmp_name, addr_tmp_name);
2111           SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
2112           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2113           or_tmp_name = new_or_tmp_name;
2114         }
2115       else
2116         or_tmp_name = addr_tmp_name;
2117
2118     } /* end for i */
2119
2120   mask_cst = build_int_cst (int_ptrsize_type, mask);
2121
2122   /* create: and_tmp = or_tmp & mask  */
2123   and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
2124   add_referenced_var (and_tmp);
2125   and_tmp_name = make_ssa_name (and_tmp, NULL);
2126
2127   and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2128                                            or_tmp_name, mask_cst);
2129   SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
2130   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2131
2132   /* Make and_tmp the left operand of the conditional test against zero.
2133      if and_tmp has a nonzero bit then some address is unaligned.  */
2134   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2135   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2136                                 and_tmp_name, ptrsize_zero);
2137   if (*cond_expr)
2138     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2139                               *cond_expr, part_cond_expr);
2140   else
2141     *cond_expr = part_cond_expr;
2142 }
2143
2144
2145 /* Function vect_vfa_segment_size.
2146
2147    Create an expression that computes the size of segment
2148    that will be accessed for a data reference.  The functions takes into
2149    account that realignment loads may access one more vector.
2150
2151    Input:
2152      DR: The data reference.
2153      VECT_FACTOR: vectorization factor.
2154
2155    Return an expression whose value is the size of segment which will be
2156    accessed by DR.  */
2157
2158 static tree
2159 vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
2160 {
2161   tree segment_length = fold_build2 (MULT_EXPR, integer_type_node,
2162                                      DR_STEP (dr), vect_factor);
2163
2164   if (vect_supportable_dr_alignment (dr) == dr_explicit_realign_optimized)
2165     {
2166       tree vector_size = TYPE_SIZE_UNIT
2167                           (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2168
2169       segment_length = fold_build2 (PLUS_EXPR, integer_type_node,
2170                                     segment_length, vector_size);
2171     }
2172   return fold_convert (sizetype, segment_length);
2173 }
2174
2175
2176 /* Function vect_create_cond_for_alias_checks.
2177
2178    Create a conditional expression that represents the run-time checks for
2179    overlapping of address ranges represented by a list of data references
2180    relations passed as input.
2181
2182    Input:
2183    COND_EXPR  - input conditional expression.  New conditions will be chained
2184                 with logical AND operation.
2185    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2186                 to be checked.
2187
2188    Output:
2189    COND_EXPR - conditional expression.
2190    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2191                          expression.
2192
2193
2194    The returned value is the conditional expression to be used in the if
2195    statement that controls which version of the loop gets executed at runtime.
2196 */
2197
2198 static void
2199 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
2200                                    tree * cond_expr,
2201                                    gimple_seq * cond_expr_stmt_list)
2202 {
2203   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2204   VEC (ddr_p, heap) * may_alias_ddrs =
2205     LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2206   tree vect_factor =
2207     build_int_cst (integer_type_node, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2208
2209   ddr_p ddr;
2210   unsigned int i;
2211   tree part_cond_expr;
2212
2213   /* Create expression
2214      ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
2215      || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
2216      &&
2217      ...
2218      &&
2219      ((store_ptr_n + store_segment_length_n) < load_ptr_n)
2220      || (load_ptr_n + load_segment_length_n) < store_ptr_n))  */
2221
2222   if (VEC_empty (ddr_p, may_alias_ddrs))
2223     return;
2224
2225   for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
2226     {
2227       struct data_reference *dr_a, *dr_b;
2228       gimple dr_group_first_a, dr_group_first_b;
2229       tree addr_base_a, addr_base_b;
2230       tree segment_length_a, segment_length_b;
2231       gimple stmt_a, stmt_b;
2232
2233       dr_a = DDR_A (ddr);
2234       stmt_a = DR_STMT (DDR_A (ddr));
2235       dr_group_first_a = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a));
2236       if (dr_group_first_a)
2237         {
2238           stmt_a = dr_group_first_a;
2239           dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2240         }
2241
2242       dr_b = DDR_B (ddr);
2243       stmt_b = DR_STMT (DDR_B (ddr));
2244       dr_group_first_b = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b));
2245       if (dr_group_first_b)
2246         {
2247           stmt_b = dr_group_first_b;
2248           dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2249         }
2250
2251       addr_base_a =
2252         vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
2253                                               NULL_TREE, loop);
2254       addr_base_b =
2255         vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
2256                                               NULL_TREE, loop);
2257
2258       segment_length_a = vect_vfa_segment_size (dr_a, vect_factor);
2259       segment_length_b = vect_vfa_segment_size (dr_b, vect_factor);
2260
2261       if (vect_print_dump_info (REPORT_DR_DETAILS))
2262         {
2263           fprintf (vect_dump,
2264                    "create runtime check for data references ");
2265           print_generic_expr (vect_dump, DR_REF (dr_a), TDF_SLIM);
2266           fprintf (vect_dump, " and ");
2267           print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
2268         }
2269
2270
2271       part_cond_expr =
2272         fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2273           fold_build2 (LT_EXPR, boolean_type_node,
2274             fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a),
2275               addr_base_a,
2276               segment_length_a),
2277             addr_base_b),
2278           fold_build2 (LT_EXPR, boolean_type_node,
2279             fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
2280               addr_base_b,
2281               segment_length_b),
2282             addr_base_a));
2283
2284       if (*cond_expr)
2285         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2286                                   *cond_expr, part_cond_expr);
2287       else
2288         *cond_expr = part_cond_expr;
2289     }
2290     if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
2291       fprintf (vect_dump, "created %u versioning for alias checks.\n",
2292                VEC_length (ddr_p, may_alias_ddrs));
2293
2294 }
2295
2296
2297 /* Function vect_loop_versioning.
2298
2299    If the loop has data references that may or may not be aligned or/and
2300    has data reference relations whose independence was not proven then
2301    two versions of the loop need to be generated, one which is vectorized
2302    and one which isn't.  A test is then generated to control which of the
2303    loops is executed.  The test checks for the alignment of all of the
2304    data references that may or may not be aligned.  An additional
2305    sequence of runtime tests is generated for each pairs of DDRs whose
2306    independence was not proven.  The vectorized version of loop is
2307    executed only if both alias and alignment tests are passed.
2308
2309    The test generated to check which version of loop is executed
2310    is modified to also check for profitability as indicated by the
2311    cost model initially.
2312
2313    The versioning precondition(s) are placed in *COND_EXPR and
2314    *COND_EXPR_STMT_LIST.  If DO_VERSIONING is true versioning is
2315    also performed, otherwise only the conditions are generated.  */
2316
2317 void
2318 vect_loop_versioning (loop_vec_info loop_vinfo, bool do_versioning,
2319                       tree *cond_expr, gimple_seq *cond_expr_stmt_list)
2320 {
2321   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2322   struct loop *nloop;
2323   basic_block condition_bb;
2324   gimple_stmt_iterator gsi, cond_exp_gsi;
2325   basic_block merge_bb;
2326   basic_block new_exit_bb;
2327   edge new_exit_e, e;
2328   gimple orig_phi, new_phi;
2329   tree arg;
2330   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2331   gimple_seq gimplify_stmt_list = NULL;
2332   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2333   int min_profitable_iters = 0;
2334   unsigned int th;
2335
2336   /* Get profitability threshold for vectorized loop.  */
2337   min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
2338
2339   th = conservative_cost_threshold (loop_vinfo,
2340                                     min_profitable_iters);
2341
2342   *cond_expr =
2343     fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2344                  build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2345
2346   *cond_expr = force_gimple_operand (*cond_expr, cond_expr_stmt_list,
2347                                      false, NULL_TREE);
2348
2349   if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
2350       vect_create_cond_for_align_checks (loop_vinfo, cond_expr,
2351                                          cond_expr_stmt_list);
2352
2353   if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
2354     vect_create_cond_for_alias_checks (loop_vinfo, cond_expr,
2355                                        cond_expr_stmt_list);
2356
2357   *cond_expr =
2358     fold_build2 (NE_EXPR, boolean_type_node, *cond_expr, integer_zero_node);
2359   *cond_expr =
2360     force_gimple_operand (*cond_expr, &gimplify_stmt_list, true, NULL_TREE);
2361   gimple_seq_add_seq (cond_expr_stmt_list, gimplify_stmt_list);
2362
2363   /* If we only needed the extra conditions and a new loop copy
2364      bail out here.  */
2365   if (!do_versioning)
2366     return;
2367
2368   initialize_original_copy_tables ();
2369   nloop = loop_version (loop, *cond_expr, &condition_bb,
2370                         prob, prob, REG_BR_PROB_BASE - prob, true);
2371   free_original_copy_tables();
2372
2373   /* Loop versioning violates an assumption we try to maintain during
2374      vectorization - that the loop exit block has a single predecessor.
2375      After versioning, the exit block of both loop versions is the same
2376      basic block (i.e. it has two predecessors). Just in order to simplify
2377      following transformations in the vectorizer, we fix this situation
2378      here by adding a new (empty) block on the exit-edge of the loop,
2379      with the proper loop-exit phis to maintain loop-closed-form.  */
2380
2381   merge_bb = single_exit (loop)->dest;
2382   gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2383   new_exit_bb = split_edge (single_exit (loop));
2384   new_exit_e = single_exit (loop);
2385   e = EDGE_SUCC (new_exit_bb, 0);
2386
2387   for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2388     {
2389       orig_phi = gsi_stmt (gsi);
2390       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
2391                                   new_exit_bb);
2392       arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2393       add_phi_arg (new_phi, arg, new_exit_e);
2394       SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
2395     }
2396
2397   /* End loop-exit-fixes after versioning.  */
2398
2399   update_ssa (TODO_update_ssa);
2400   if (*cond_expr_stmt_list)
2401     {
2402       cond_exp_gsi = gsi_last_bb (condition_bb);
2403       gsi_insert_seq_before (&cond_exp_gsi, *cond_expr_stmt_list,
2404                              GSI_SAME_STMT);
2405       *cond_expr_stmt_list = NULL;
2406     }
2407   *cond_expr = NULL_TREE;
2408 }
2409