gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   3    Free Software Foundation, Inc.
   4    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   5    and Ira Rosen <irar@il.ibm.com>
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 3, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "tm.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "basic-block.h"
  30 #include "tree-pretty-print.h"
  31 #include "gimple-pretty-print.h"
  32 #include "tree-flow.h"
  33 #include "tree-dump.h"
  34 #include "cfgloop.h"
  35 #include "cfglayout.h"
  36 #include "diagnostic-core.h"
  37 #include "toplev.h"
  38 #include "tree-scalar-evolution.h"
  39 #include "tree-vectorizer.h"
  40 #include "langhooks.h"
  41
  42 /*************************************************************************
  43   Simple Loop Peeling Utilities
  44
  45   Utilities to support loop peeling for vectorization purposes.
  46  *************************************************************************/
  47
  48
  49 /* Renames the use *OP_P.  */
  50
  51 static void
  52 rename_use_op (use_operand_p op_p)
  53 {
  54   tree new_name;
  55
  56   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  57     return;
  58
  59   new_name = get_current_def (USE_FROM_PTR (op_p));
  60
  61   /* Something defined outside of the loop.  */
  62   if (!new_name)
  63     return;
  64
  65   /* An ordinary ssa name defined in the loop.  */
  66
  67   SET_USE (op_p, new_name);
  68 }
  69
  70
  71 /* Renames the variables in basic block BB.  */
  72
  73 void
  74 rename_variables_in_bb (basic_block bb)
  75 {
  76   gimple_stmt_iterator gsi;
  77   gimple stmt;
  78   use_operand_p use_p;
  79   ssa_op_iter iter;
  80   edge e;
  81   edge_iterator ei;
  82   struct loop *loop = bb->loop_father;
  83
  84   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  85     {
  86       stmt = gsi_stmt (gsi);
  87       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
  88         rename_use_op (use_p);
  89     }
  90
  91   FOR_EACH_EDGE (e, ei, bb->succs)
  92     {
  93       if (!flow_bb_inside_loop_p (loop, e->dest))
  94         continue;
  95       for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
  96         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
  97     }
  98 }
  99
 100
 101 /* Renames variables in new generated LOOP.  */
 102
 103 void
 104 rename_variables_in_loop (struct loop *loop)
 105 {
 106   unsigned i;
 107   basic_block *bbs;
 108
 109   bbs = get_loop_body (loop);
 110
 111   for (i = 0; i < loop->num_nodes; i++)
 112     rename_variables_in_bb (bbs[i]);
 113
 114   free (bbs);
 115 }
 116
 117 typedef struct
 118 {
 119   tree from, to;
 120   basic_block bb;
 121 } adjust_info;
 122
 123 DEF_VEC_O(adjust_info);
 124 DEF_VEC_ALLOC_O_STACK(adjust_info);
 125 #define VEC_adjust_info_stack_alloc(alloc) VEC_stack_alloc (adjust_info, alloc)
 126
 127 /* A stack of values to be adjusted in debug stmts.  We have to
 128    process them LIFO, so that the closest substitution applies.  If we
 129    processed them FIFO, without the stack, we might substitute uses
 130    with a PHI DEF that would soon become non-dominant, and when we got
 131    to the suitable one, it wouldn't have anything to substitute any
 132    more.  */
 133 static VEC(adjust_info, stack) *adjust_vec;
 134
 135 /* Adjust any debug stmts that referenced AI->from values to use the
 136    loop-closed AI->to, if the references are dominated by AI->bb and
 137    not by the definition of AI->from.  */
 138
 139 static void
 140 adjust_debug_stmts_now (adjust_info *ai)
 141 {
 142   basic_block bbphi = ai->bb;
 143   tree orig_def = ai->from;
 144   tree new_def = ai->to;
 145   imm_use_iterator imm_iter;
 146   gimple stmt;
 147   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 148
 149   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 150
 151   /* Adjust any debug stmts that held onto non-loop-closed
 152      references.  */
 153   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 154     {
 155       use_operand_p use_p;
 156       basic_block bbuse;
 157
 158       if (!is_gimple_debug (stmt))
 159         continue;
 160
 161       gcc_assert (gimple_debug_bind_p (stmt));
 162
 163       bbuse = gimple_bb (stmt);
 164
 165       if ((bbuse == bbphi
 166            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 167           && !(bbuse == bbdef
 168                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 169         {
 170           if (new_def)
 171             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 172               SET_USE (use_p, new_def);
 173           else
 174             {
 175               gimple_debug_bind_reset_value (stmt);
 176               update_stmt (stmt);
 177             }
 178         }
 179     }
 180 }
 181
 182 /* Adjust debug stmts as scheduled before.  */
 183
 184 static void
 185 adjust_vec_debug_stmts (void)
 186 {
 187   if (!MAY_HAVE_DEBUG_STMTS)
 188     return;
 189
 190   gcc_assert (adjust_vec);
 191
 192   while (!VEC_empty (adjust_info, adjust_vec))
 193     {
 194       adjust_debug_stmts_now (VEC_last (adjust_info, adjust_vec));
 195       VEC_pop (adjust_info, adjust_vec);
 196     }
 197
 198   VEC_free (adjust_info, stack, adjust_vec);
 199 }
 200
 201 /* Adjust any debug stmts that referenced FROM values to use the
 202    loop-closed TO, if the references are dominated by BB and not by
 203    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 204    will be postponed until adjust_vec_debug_stmts is called.  */
 205
 206 static void
 207 adjust_debug_stmts (tree from, tree to, basic_block bb)
 208 {
 209   adjust_info ai;
 210
 211   if (MAY_HAVE_DEBUG_STMTS && TREE_CODE (from) == SSA_NAME
 212       && SSA_NAME_VAR (from) != gimple_vop (cfun))
 213     {
 214       ai.from = from;
 215       ai.to = to;
 216       ai.bb = bb;
 217
 218       if (adjust_vec)
 219         VEC_safe_push (adjust_info, stack, adjust_vec, &ai);
 220       else
 221         adjust_debug_stmts_now (&ai);
 222     }
 223 }
 224
 225 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 226    to adjust any debug stmts that referenced the old phi arg,
 227    presumably non-loop-closed references left over from other
 228    transformations.  */
 229
 230 static void
 231 adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
 232 {
 233   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 234
 235   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 236
 237   if (MAY_HAVE_DEBUG_STMTS)
 238     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 239                         gimple_bb (update_phi));
 240 }
 241
 242
 243 /* Update the PHI nodes of NEW_LOOP.
 244
 245    NEW_LOOP is a duplicate of ORIG_LOOP.
 246    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 247    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 248    executes before it.  */
 249
 250 static void
 251 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 252                                        struct loop *new_loop, bool after)
 253 {
 254   tree new_ssa_name;
 255   gimple phi_new, phi_orig;
 256   tree def;
 257   edge orig_loop_latch = loop_latch_edge (orig_loop);
 258   edge orig_entry_e = loop_preheader_edge (orig_loop);
 259   edge new_loop_exit_e = single_exit (new_loop);
 260   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 261   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 262   gimple_stmt_iterator gsi_new, gsi_orig;
 263
 264   /*
 265      step 1. For each loop-header-phi:
 266              Add the first phi argument for the phi in NEW_LOOP
 267             (the one associated with the entry of NEW_LOOP)
 268
 269      step 2. For each loop-header-phi:
 270              Add the second phi argument for the phi in NEW_LOOP
 271             (the one associated with the latch of NEW_LOOP)
 272
 273      step 3. Update the phis in the successor block of NEW_LOOP.
 274
 275         case 1: NEW_LOOP was placed before ORIG_LOOP:
 276                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 277                 Updating the phis in the successor block can therefore be done
 278                 along with the scanning of the loop header phis, because the
 279                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 280                 phi nodes, organized in the same order.
 281
 282         case 2: NEW_LOOP was placed after ORIG_LOOP:
 283                 The successor block of NEW_LOOP is the original exit block of
 284                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 285                 We postpone updating these phis to a later stage (when
 286                 loop guards are added).
 287    */
 288
 289
 290   /* Scan the phis in the headers of the old and new loops
 291      (they are organized in exactly the same order).  */
 292
 293   for (gsi_new = gsi_start_phis (new_loop->header),
 294        gsi_orig = gsi_start_phis (orig_loop->header);
 295        !gsi_end_p (gsi_new) && !gsi_end_p (gsi_orig);
 296        gsi_next (&gsi_new), gsi_next (&gsi_orig))
 297     {
 298       source_location locus;
 299       phi_new = gsi_stmt (gsi_new);
 300       phi_orig = gsi_stmt (gsi_orig);
 301
 302       /* step 1.  */
 303       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 304       locus = gimple_phi_arg_location_from_edge (phi_orig, entry_arg_e);
 305       add_phi_arg (phi_new, def, new_loop_entry_e, locus);
 306
 307       /* step 2.  */
 308       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 309       locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
 310       if (TREE_CODE (def) != SSA_NAME)
 311         continue;
 312
 313       new_ssa_name = get_current_def (def);
 314       if (!new_ssa_name)
 315         {
 316           /* This only happens if there are no definitions
 317              inside the loop. use the phi_result in this case.  */
 318           new_ssa_name = PHI_RESULT (phi_new);
 319         }
 320
 321       /* An ordinary ssa name defined in the loop.  */
 322       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
 323
 324       /* Drop any debug references outside the loop, if they would
 325          become ill-formed SSA.  */
 326       adjust_debug_stmts (def, NULL, single_exit (orig_loop)->dest);
 327
 328       /* step 3 (case 1).  */
 329       if (!after)
 330         {
 331           gcc_assert (new_loop_exit_e == orig_entry_e);
 332           adjust_phi_and_debug_stmts (phi_orig, new_loop_exit_e, new_ssa_name);
 333         }
 334     }
 335 }
 336
 337
 338 /* Update PHI nodes for a guard of the LOOP.
 339
 340    Input:
 341    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 342         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 343         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 344         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 345         We denote this bb NEW_MERGE_BB because before the guard code was added
 346         it had a single predecessor (the LOOP header), and now it became a merge
 347         point of two paths - the path that ends with the LOOP exit-edge, and
 348         the path that ends with GUARD_EDGE.
 349    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 350         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 351
 352    ===> The CFG before the guard-code was added:
 353         LOOP_header_bb:
 354           loop_body
 355           if (exit_loop) goto update_bb
 356           else           goto LOOP_header_bb
 357         update_bb:
 358
 359    ==> The CFG after the guard-code was added:
 360         guard_bb:
 361           if (LOOP_guard_condition) goto new_merge_bb
 362           else                      goto LOOP_header_bb
 363         LOOP_header_bb:
 364           loop_body
 365           if (exit_loop_condition) goto new_merge_bb
 366           else                     goto LOOP_header_bb
 367         new_merge_bb:
 368           goto update_bb
 369         update_bb:
 370
 371    ==> The CFG after this function:
 372         guard_bb:
 373           if (LOOP_guard_condition) goto new_merge_bb
 374           else                      goto LOOP_header_bb
 375         LOOP_header_bb:
 376           loop_body
 377           if (exit_loop_condition) goto new_exit_bb
 378           else                     goto LOOP_header_bb
 379         new_exit_bb:
 380         new_merge_bb:
 381           goto update_bb
 382         update_bb:
 383
 384    This function:
 385    1. creates and updates the relevant phi nodes to account for the new
 386       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 387       1.1. Create phi nodes at NEW_MERGE_BB.
 388       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 389            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 390    2. preserves loop-closed-ssa-form by creating the required phi nodes
 391       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 392
 393    There are two flavors to this function:
 394
 395    slpeel_update_phi_nodes_for_guard1:
 396      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 397      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 398      for variables that have phis in the loop header.
 399
 400    slpeel_update_phi_nodes_for_guard2:
 401      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 402      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 403      for variables that have phis in the loop exit.
 404
 405    I.E., the overall structure is:
 406
 407         loop1_preheader_bb:
 408                 guard1 (goto loop1/merge1_bb)
 409         loop1
 410         loop1_exit_bb:
 411                 guard2 (goto merge1_bb/merge2_bb)
 412         merge1_bb
 413         loop2
 414         loop2_exit_bb
 415         merge2_bb
 416         next_bb
 417
 418    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 419    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 420    that have phis in loop1->header).
 421
 422    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 423    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 424    that have phis in next_bb). It also adds some of these phis to
 425    loop1_exit_bb.
 426
 427    slpeel_update_phi_nodes_for_guard1 is always called before
 428    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 429    to create correct data-flow and loop-closed-ssa-form.
 430
 431    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 432    that change between iterations of a loop (and therefore have a phi-node
 433    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 434    phis for variables that are used out of the loop (and therefore have
 435    loop-closed exit phis). Some variables may be both updated between
 436    iterations and used after the loop. This is why in loop1_exit_bb we
 437    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 438    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 439
 440    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 441      an original loop. i.e., we have:
 442
 443            orig_loop
 444            guard_bb (goto LOOP/new_merge)
 445            new_loop <-- LOOP
 446            new_exit
 447            new_merge
 448            next_bb
 449
 450      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 451      have:
 452
 453            new_loop
 454            guard_bb (goto LOOP/new_merge)
 455            orig_loop <-- LOOP
 456            new_exit
 457            new_merge
 458            next_bb
 459
 460      The SSA names defined in the original loop have a current
 461      reaching definition that that records the corresponding new
 462      ssa-name used in the new duplicated loop copy.
 463   */
 464
 465 /* Function slpeel_update_phi_nodes_for_guard1
 466
 467    Input:
 468    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 469    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 470             information.
 471
 472    In the context of the overall structure, we have:
 473
 474         loop1_preheader_bb:
 475                 guard1 (goto loop1/merge1_bb)
 476 LOOP->  loop1
 477         loop1_exit_bb:
 478                 guard2 (goto merge1_bb/merge2_bb)
 479         merge1_bb
 480         loop2
 481         loop2_exit_bb
 482         merge2_bb
 483         next_bb
 484
 485    For each name updated between loop iterations (i.e - for each name that has
 486    an entry (loop-header) phi in LOOP) we create a new phi in:
 487    1. merge1_bb (to account for the edge from guard1)
 488    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 489 */
 490
 491 static void
 492 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 493                                     bool is_new_loop, basic_block *new_exit_bb,
 494                                     bitmap *defs)
 495 {
 496   gimple orig_phi, new_phi;
 497   gimple update_phi, update_phi2;
 498   tree guard_arg, loop_arg;
 499   basic_block new_merge_bb = guard_edge->dest;
 500   edge e = EDGE_SUCC (new_merge_bb, 0);
 501   basic_block update_bb = e->dest;
 502   basic_block orig_bb = loop->header;
 503   edge new_exit_e;
 504   tree current_new_name;
 505   gimple_stmt_iterator gsi_orig, gsi_update;
 506
 507   /* Create new bb between loop and new_merge_bb.  */
 508   *new_exit_bb = split_edge (single_exit (loop));
 509
 510   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 511
 512   for (gsi_orig = gsi_start_phis (orig_bb),
 513        gsi_update = gsi_start_phis (update_bb);
 514        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
 515        gsi_next (&gsi_orig), gsi_next (&gsi_update))
 516     {
 517       source_location loop_locus, guard_locus;;
 518       orig_phi = gsi_stmt (gsi_orig);
 519       update_phi = gsi_stmt (gsi_update);
 520
 521       /** 1. Handle new-merge-point phis  **/
 522
 523       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 524       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 525                                  new_merge_bb);
 526
 527       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 528             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 529       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 530       loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
 531                                                       EDGE_SUCC (loop->latch,
 532                                                                  0));
 533       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 534       guard_locus
 535         = gimple_phi_arg_location_from_edge (orig_phi,
 536                                              loop_preheader_edge (loop));
 537
 538       add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
 539       add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
 540
 541       /* 1.3. Update phi in successor block.  */
 542       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 543                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 544       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
 545       update_phi2 = new_phi;
 546
 547
 548       /** 2. Handle loop-closed-ssa-form phis  **/
 549
 550       if (!is_gimple_reg (PHI_RESULT (orig_phi)))
 551         continue;
 552
 553       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 554       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 555                                  *new_exit_bb);
 556
 557       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 558       add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
 559
 560       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 561       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 562       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
 563                                   PHI_RESULT (new_phi));
 564
 565       /* 2.4. Record the newly created name with set_current_def.
 566          We want to find a name such that
 567                 name = get_current_def (orig_loop_name)
 568          and to set its current definition as follows:
 569                 set_current_def (name, new_phi_name)
 570
 571          If LOOP is a new loop then loop_arg is already the name we're
 572          looking for. If LOOP is the original loop, then loop_arg is
 573          the orig_loop_name and the relevant name is recorded in its
 574          current reaching definition.  */
 575       if (is_new_loop)
 576         current_new_name = loop_arg;
 577       else
 578         {
 579           current_new_name = get_current_def (loop_arg);
 580           /* current_def is not available only if the variable does not
 581              change inside the loop, in which case we also don't care
 582              about recording a current_def for it because we won't be
 583              trying to create loop-exit-phis for it.  */
 584           if (!current_new_name)
 585             continue;
 586         }
 587       gcc_assert (get_current_def (current_new_name) == NULL_TREE);
 588
 589       set_current_def (current_new_name, PHI_RESULT (new_phi));
 590       bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
 591     }
 592 }
 593
 594
 595 /* Function slpeel_update_phi_nodes_for_guard2
 596
 597    Input:
 598    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 599
 600    In the context of the overall structure, we have:
 601
 602         loop1_preheader_bb:
 603                 guard1 (goto loop1/merge1_bb)
 604         loop1
 605         loop1_exit_bb:
 606                 guard2 (goto merge1_bb/merge2_bb)
 607         merge1_bb
 608 LOOP->  loop2
 609         loop2_exit_bb
 610         merge2_bb
 611         next_bb
 612
 613    For each name used out side the loop (i.e - for each name that has an exit
 614    phi in next_bb) we create a new phi in:
 615    1. merge2_bb (to account for the edge from guard_bb)
 616    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 617    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 618       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 619 */
 620
 621 static void
 622 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 623                                     bool is_new_loop, basic_block *new_exit_bb)
 624 {
 625   gimple orig_phi, new_phi;
 626   gimple update_phi, update_phi2;
 627   tree guard_arg, loop_arg;
 628   basic_block new_merge_bb = guard_edge->dest;
 629   edge e = EDGE_SUCC (new_merge_bb, 0);
 630   basic_block update_bb = e->dest;
 631   edge new_exit_e;
 632   tree orig_def, orig_def_new_name;
 633   tree new_name, new_name2;
 634   tree arg;
 635   gimple_stmt_iterator gsi;
 636
 637   /* Create new bb between loop and new_merge_bb.  */
 638   *new_exit_bb = split_edge (single_exit (loop));
 639
 640   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 641
 642   for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
 643     {
 644       update_phi = gsi_stmt (gsi);
 645       orig_phi = update_phi;
 646       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 647       /* This loop-closed-phi actually doesn't represent a use
 648          out of the loop - the phi arg is a constant.  */
 649       if (TREE_CODE (orig_def) != SSA_NAME)
 650         continue;
 651       orig_def_new_name = get_current_def (orig_def);
 652       arg = NULL_TREE;
 653
 654       /** 1. Handle new-merge-point phis  **/
 655
 656       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 657       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 658                                  new_merge_bb);
 659
 660       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 661             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 662       new_name = orig_def;
 663       new_name2 = NULL_TREE;
 664       if (orig_def_new_name)
 665         {
 666           new_name = orig_def_new_name;
 667           /* Some variables have both loop-entry-phis and loop-exit-phis.
 668              Such variables were given yet newer names by phis placed in
 669              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 670              new_name2 = get_current_def (get_current_def (orig_name)).  */
 671           new_name2 = get_current_def (new_name);
 672         }
 673
 674       if (is_new_loop)
 675         {
 676           guard_arg = orig_def;
 677           loop_arg = new_name;
 678         }
 679       else
 680         {
 681           guard_arg = new_name;
 682           loop_arg = orig_def;
 683         }
 684       if (new_name2)
 685         guard_arg = new_name2;
 686
 687       add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
 688       add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
 689
 690       /* 1.3. Update phi in successor block.  */
 691       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 692       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
 693       update_phi2 = new_phi;
 694
 695
 696       /** 2. Handle loop-closed-ssa-form phis  **/
 697
 698       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 699       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 700                                  *new_exit_bb);
 701
 702       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 703       add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
 704
 705       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 706       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 707       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
 708                                   PHI_RESULT (new_phi));
 709
 710
 711       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 712
 713       /* 3.1. Find the relevant names that need an exit-phi in
 714          GUARD_BB, i.e. names for which
 715          slpeel_update_phi_nodes_for_guard1 had not already created a
 716          phi node. This is the case for names that are used outside
 717          the loop (and therefore need an exit phi) but are not updated
 718          across loop iterations (and therefore don't have a
 719          loop-header-phi).
 720
 721          slpeel_update_phi_nodes_for_guard1 is responsible for
 722          creating loop-exit phis in GUARD_BB for names that have a
 723          loop-header-phi.  When such a phi is created we also record
 724          the new name in its current definition.  If this new name
 725          exists, then guard_arg was set to this new name (see 1.2
 726          above).  Therefore, if guard_arg is not this new name, this
 727          is an indication that an exit-phi in GUARD_BB was not yet
 728          created, so we take care of it here.  */
 729       if (guard_arg == new_name2)
 730         continue;
 731       arg = guard_arg;
 732
 733       /* 3.2. Generate new phi node in GUARD_BB:  */
 734       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 735                                  guard_edge->src);
 736
 737       /* 3.3. GUARD_BB has one incoming edge:  */
 738       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 739       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
 740                    UNKNOWN_LOCATION);
 741
 742       /* 3.4. Update phi in successor of GUARD_BB:  */
 743       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 744                                                                 == guard_arg);
 745       adjust_phi_and_debug_stmts (update_phi2, guard_edge,
 746                                   PHI_RESULT (new_phi));
 747     }
 748 }
 749
 750
 751 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 752    that starts at zero, increases by one and its limit is NITERS.
 753
 754    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 755
 756 void
 757 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 758 {
 759   tree indx_before_incr, indx_after_incr;
 760   gimple cond_stmt;
 761   gimple orig_cond;
 762   edge exit_edge = single_exit (loop);
 763   gimple_stmt_iterator loop_cond_gsi;
 764   gimple_stmt_iterator incr_gsi;
 765   bool insert_after;
 766   tree init = build_int_cst (TREE_TYPE (niters), 0);
 767   tree step = build_int_cst (TREE_TYPE (niters), 1);
 768   LOC loop_loc;
 769   enum tree_code code;
 770
 771   orig_cond = get_loop_exit_condition (loop);
 772   gcc_assert (orig_cond);
 773   loop_cond_gsi = gsi_for_stmt (orig_cond);
 774
 775   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 776   create_iv (init, step, NULL_TREE, loop,
 777              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 778
 779   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 780                                               true, NULL_TREE, true,
 781                                               GSI_SAME_STMT);
 782   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 783                                      true, GSI_SAME_STMT);
 784
 785   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 786   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 787                                  NULL_TREE);
 788
 789   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 790
 791   /* Remove old loop exit test:  */
 792   gsi_remove (&loop_cond_gsi, true);
 793
 794   loop_loc = find_loop_location (loop);
 795   if (dump_file && (dump_flags & TDF_DETAILS))
 796     {
 797       if (loop_loc != UNKNOWN_LOC)
 798         fprintf (dump_file, "\nloop at %s:%d: ",
 799                  LOC_FILE (loop_loc), LOC_LINE (loop_loc));
 800       print_gimple_stmt (dump_file, cond_stmt, 0, TDF_SLIM);
 801     }
 802
 803   loop->nb_iterations = niters;
 804 }
 805
 806
 807 /* Given LOOP this function generates a new copy of it and puts it
 808    on E which is either the entry or exit of LOOP.  */
 809
 810 struct loop *
 811 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
 812 {
 813   struct loop *new_loop;
 814   basic_block *new_bbs, *bbs;
 815   bool at_exit;
 816   bool was_imm_dom;
 817   basic_block exit_dest;
 818   gimple phi;
 819   tree phi_arg;
 820   edge exit, new_exit;
 821   gimple_stmt_iterator gsi;
 822
 823   at_exit = (e == single_exit (loop));
 824   if (!at_exit && e != loop_preheader_edge (loop))
 825     return NULL;
 826
 827   bbs = get_loop_body (loop);
 828
 829   /* Check whether duplication is possible.  */
 830   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 831     {
 832       free (bbs);
 833       return NULL;
 834     }
 835
 836   /* Generate new loop structure.  */
 837   new_loop = duplicate_loop (loop, loop_outer (loop));
 838   if (!new_loop)
 839     {
 840       free (bbs);
 841       return NULL;
 842     }
 843
 844   exit_dest = single_exit (loop)->dest;
 845   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 846                                           exit_dest) == loop->header ?
 847                  true : false);
 848
 849   new_bbs = XNEWVEC (basic_block, loop->num_nodes);
 850
 851   exit = single_exit (loop);
 852   copy_bbs (bbs, loop->num_nodes, new_bbs,
 853             &exit, 1, &new_exit, NULL,
 854             e->src);
 855
 856   /* Duplicating phi args at exit bbs as coming
 857      also from exit of duplicated loop.  */
 858   for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi); gsi_next (&gsi))
 859     {
 860       phi = gsi_stmt (gsi);
 861       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop));
 862       if (phi_arg)
 863         {
 864           edge new_loop_exit_edge;
 865           source_location locus;
 866
 867           locus = gimple_phi_arg_location_from_edge (phi, single_exit (loop));
 868           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 869             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 870           else
 871             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 872
 873           add_phi_arg (phi, phi_arg, new_loop_exit_edge, locus);
 874         }
 875     }
 876
 877   if (at_exit) /* Add the loop copy at exit.  */
 878     {
 879       redirect_edge_and_branch_force (e, new_loop->header);
 880       PENDING_STMT (e) = NULL;
 881       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 882       if (was_imm_dom)
 883         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 884     }
 885   else /* Add the copy at entry.  */
 886     {
 887       edge new_exit_e;
 888       edge entry_e = loop_preheader_edge (loop);
 889       basic_block preheader = entry_e->src;
 890
 891       if (!flow_bb_inside_loop_p (new_loop,
 892                                   EDGE_SUCC (new_loop->header, 0)->dest))
 893         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 894       else
 895         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 896
 897       redirect_edge_and_branch_force (new_exit_e, loop->header);
 898       PENDING_STMT (new_exit_e) = NULL;
 899       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 900                                new_exit_e->src);
 901
 902       /* We have to add phi args to the loop->header here as coming
 903          from new_exit_e edge.  */
 904       for (gsi = gsi_start_phis (loop->header);
 905            !gsi_end_p (gsi);
 906            gsi_next (&gsi))
 907         {
 908           phi = gsi_stmt (gsi);
 909           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 910           if (phi_arg)
 911             add_phi_arg (phi, phi_arg, new_exit_e,
 912                          gimple_phi_arg_location_from_edge (phi, entry_e));
 913         }
 914
 915       redirect_edge_and_branch_force (entry_e, new_loop->header);
 916       PENDING_STMT (entry_e) = NULL;
 917       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 918     }
 919
 920   free (new_bbs);
 921   free (bbs);
 922
 923   return new_loop;
 924 }
 925
 926
 927 /* Given the condition statement COND, put it as the last statement
 928    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 929    Assumes that this is the single exit of the guarded loop.
 930    Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.  */
 931
 932 static edge
 933 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 934                        gimple_seq cond_expr_stmt_list,
 935                        basic_block exit_bb, basic_block dom_bb)
 936 {
 937   gimple_stmt_iterator gsi;
 938   edge new_e, enter_e;
 939   gimple cond_stmt;
 940   gimple_seq gimplify_stmt_list = NULL;
 941
 942   enter_e = EDGE_SUCC (guard_bb, 0);
 943   enter_e->flags &= ~EDGE_FALLTHRU;
 944   enter_e->flags |= EDGE_FALSE_VALUE;
 945   gsi = gsi_last_bb (guard_bb);
 946
 947   cond = force_gimple_operand (cond, &gimplify_stmt_list, true, NULL_TREE);
 948   if (gimplify_stmt_list)
 949     gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
 950   cond_stmt = gimple_build_cond (NE_EXPR,
 951                                  cond, build_int_cst (TREE_TYPE (cond), 0),
 952                                  NULL_TREE, NULL_TREE);
 953   if (cond_expr_stmt_list)
 954     gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
 955
 956   gsi = gsi_last_bb (guard_bb);
 957   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 958
 959   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 960   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 961   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 962   return new_e;
 963 }
 964
 965
 966 /* This function verifies that the following restrictions apply to LOOP:
 967    (1) it is innermost
 968    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 969    (3) it is single entry, single exit
 970    (4) its exit condition is the last stmt in the header
 971    (5) E is the entry/exit edge of LOOP.
 972  */
 973
 974 bool
 975 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 976 {
 977   edge exit_e = single_exit (loop);
 978   edge entry_e = loop_preheader_edge (loop);
 979   gimple orig_cond = get_loop_exit_condition (loop);
 980   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 981
 982   if (need_ssa_update_p (cfun))
 983     return false;
 984
 985   if (loop->inner
 986       /* All loops have an outer scope; the only case loop->outer is NULL is for
 987          the function itself.  */
 988       || !loop_outer (loop)
 989       || loop->num_nodes != 2
 990       || !empty_block_p (loop->latch)
 991       || !single_exit (loop)
 992       /* Verify that new loop exit condition can be trivially modified.  */
 993       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 994       || (e != exit_e && e != entry_e))
 995     return false;
 996
 997   return true;
 998 }
 999
1000 #ifdef ENABLE_CHECKING
1001 static void
1002 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
1003                                  struct loop *second_loop)
1004 {
1005   basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1006   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1007   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1008
1009   /* A guard that controls whether the second_loop is to be executed or skipped
1010      is placed in first_loop->exit.  first_loop->exit therefore has two
1011      successors - one is the preheader of second_loop, and the other is a bb
1012      after second_loop.
1013    */
1014   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1015
1016   /* 1. Verify that one of the successors of first_loop->exit is the preheader
1017         of second_loop.  */
1018
1019   /* The preheader of new_loop is expected to have two predecessors:
1020      first_loop->exit and the block that precedes first_loop.  */
1021
1022   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1023               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1024                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1025                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1026                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1027
1028   /* Verify that the other successor of first_loop->exit is after the
1029      second_loop.  */
1030   /* TODO */
1031 }
1032 #endif
1033
1034 /* If the run time cost model check determines that vectorization is
1035    not profitable and hence scalar loop should be generated then set
1036    FIRST_NITERS to prologue peeled iterations. This will allow all the
1037    iterations to be executed in the prologue peeled scalar loop.  */
1038
1039 static void
1040 set_prologue_iterations (basic_block bb_before_first_loop,
1041                          tree first_niters,
1042                          struct loop *loop,
1043                          unsigned int th)
1044 {
1045   edge e;
1046   basic_block cond_bb, then_bb;
1047   tree var, prologue_after_cost_adjust_name;
1048   gimple_stmt_iterator gsi;
1049   gimple newphi;
1050   edge e_true, e_false, e_fallthru;
1051   gimple cond_stmt;
1052   gimple_seq gimplify_stmt_list = NULL, stmts = NULL;
1053   tree cost_pre_condition = NULL_TREE;
1054   tree scalar_loop_iters =
1055     unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1056
1057   e = single_pred_edge (bb_before_first_loop);
1058   cond_bb = split_edge(e);
1059
1060   e = single_pred_edge (bb_before_first_loop);
1061   then_bb = split_edge(e);
1062   set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1063
1064   e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1065                                    EDGE_FALSE_VALUE);
1066   set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1067
1068   e_true = EDGE_PRED (then_bb, 0);
1069   e_true->flags &= ~EDGE_FALLTHRU;
1070   e_true->flags |= EDGE_TRUE_VALUE;
1071
1072   e_fallthru = EDGE_SUCC (then_bb, 0);
1073
1074   cost_pre_condition =
1075     fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1076                  build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1077   cost_pre_condition =
1078     force_gimple_operand (cost_pre_condition, &gimplify_stmt_list,
1079                           true, NULL_TREE);
1080   cond_stmt = gimple_build_cond (NE_EXPR, cost_pre_condition,
1081                                  build_int_cst (TREE_TYPE (cost_pre_condition),
1082                                                 0), NULL_TREE, NULL_TREE);
1083
1084   gsi = gsi_last_bb (cond_bb);
1085   if (gimplify_stmt_list)
1086     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
1087
1088   gsi = gsi_last_bb (cond_bb);
1089   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1090
1091   var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1092                         "prologue_after_cost_adjust");
1093   add_referenced_var (var);
1094   prologue_after_cost_adjust_name =
1095     force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1096
1097   gsi = gsi_last_bb (then_bb);
1098   if (stmts)
1099     gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1100
1101   newphi = create_phi_node (var, bb_before_first_loop);
1102   add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
1103                UNKNOWN_LOCATION);
1104   add_phi_arg (newphi, first_niters, e_false, UNKNOWN_LOCATION);
1105
1106   first_niters = PHI_RESULT (newphi);
1107 }
1108
1109
1110 /* Remove dead assignments from loop NEW_LOOP.  */
1111
1112 static void
1113 remove_dead_stmts_from_loop (struct loop *new_loop)
1114 {
1115   basic_block *bbs = get_loop_body (new_loop);
1116   unsigned i;
1117   for (i = 0; i < new_loop->num_nodes; ++i)
1118     {
1119       gimple_stmt_iterator gsi;
1120       for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);)
1121         {
1122           gimple stmt = gsi_stmt (gsi);
1123           if (is_gimple_assign (stmt)
1124               && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
1125               && has_zero_uses (gimple_assign_lhs (stmt)))
1126             {
1127               gsi_remove (&gsi, true);
1128               release_defs (stmt);
1129             }
1130           else
1131             gsi_next (&gsi);
1132         }
1133     }
1134   free (bbs);
1135 }
1136
1137
1138 /* Function slpeel_tree_peel_loop_to_edge.
1139
1140    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1141    that is placed on the entry (exit) edge E of LOOP. After this transformation
1142    we have two loops one after the other - first-loop iterates FIRST_NITERS
1143    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1144    If the cost model indicates that it is profitable to emit a scalar
1145    loop instead of the vector one, then the prolog (epilog) loop will iterate
1146    for the entire unchanged scalar iterations of the loop.
1147
1148    Input:
1149    - LOOP: the loop to be peeled.
1150    - E: the exit or entry edge of LOOP.
1151         If it is the entry edge, we peel the first iterations of LOOP. In this
1152         case first-loop is LOOP, and second-loop is the newly created loop.
1153         If it is the exit edge, we peel the last iterations of LOOP. In this
1154         case, first-loop is the newly created loop, and second-loop is LOOP.
1155    - NITERS: the number of iterations that LOOP iterates.
1156    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1157    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1158         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1159         is false, the caller of this function may want to take care of this
1160         (this can be useful if we don't want new stmts added to first-loop).
1161    - TH: cost model profitability threshold of iterations for vectorization.
1162    - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1163                           during versioning and hence needs to occur during
1164                           prologue generation or whether cost model check
1165                           has not occurred during prologue generation and hence
1166                           needs to occur during epilogue generation.
1167
1168
1169    Output:
1170    The function returns a pointer to the new loop-copy, or NULL if it failed
1171    to perform the transformation.
1172
1173    The function generates two if-then-else guards: one before the first loop,
1174    and the other before the second loop:
1175    The first guard is:
1176      if (FIRST_NITERS == 0) then skip the first loop,
1177      and go directly to the second loop.
1178    The second guard is:
1179      if (FIRST_NITERS == NITERS) then skip the second loop.
1180
1181    If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1182    then the generated condition is combined with COND_EXPR and the
1183    statements in COND_EXPR_STMT_LIST are emitted together with it.
1184
1185    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1186    FORNOW the resulting code will not be in loop-closed-ssa form.
1187 */
1188
1189 static struct loop*
1190 slpeel_tree_peel_loop_to_edge (struct loop *loop,
1191                                edge e, tree first_niters,
1192                                tree niters, bool update_first_loop_count,
1193                                unsigned int th, bool check_profitability,
1194                                tree cond_expr, gimple_seq cond_expr_stmt_list)
1195 {
1196   struct loop *new_loop = NULL, *first_loop, *second_loop;
1197   edge skip_e;
1198   tree pre_condition = NULL_TREE;
1199   bitmap definitions;
1200   basic_block bb_before_second_loop, bb_after_second_loop;
1201   basic_block bb_before_first_loop;
1202   basic_block bb_between_loops;
1203   basic_block new_exit_bb;
1204   edge exit_e = single_exit (loop);
1205   LOC loop_loc;
1206   tree cost_pre_condition = NULL_TREE;
1207
1208   if (!slpeel_can_duplicate_loop_p (loop, e))
1209     return NULL;
1210
1211   /* We have to initialize cfg_hooks. Then, when calling
1212    cfg_hooks->split_edge, the function tree_split_edge
1213    is actually called and, when calling cfg_hooks->duplicate_block,
1214    the function tree_duplicate_bb is called.  */
1215   gimple_register_cfg_hooks ();
1216
1217
1218   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1219         Resulting CFG would be:
1220
1221         first_loop:
1222         do {
1223         } while ...
1224
1225         second_loop:
1226         do {
1227         } while ...
1228
1229         orig_exit_bb:
1230    */
1231
1232   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1233     {
1234       loop_loc = find_loop_location (loop);
1235       if (dump_file && (dump_flags & TDF_DETAILS))
1236         {
1237           if (loop_loc != UNKNOWN_LOC)
1238             fprintf (dump_file, "\n%s:%d: note: ",
1239                      LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1240           fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1241         }
1242       return NULL;
1243     }
1244
1245   if (MAY_HAVE_DEBUG_STMTS)
1246     {
1247       gcc_assert (!adjust_vec);
1248       adjust_vec = VEC_alloc (adjust_info, stack, 32);
1249     }
1250
1251   if (e == exit_e)
1252     {
1253       /* NEW_LOOP was placed after LOOP.  */
1254       first_loop = loop;
1255       second_loop = new_loop;
1256     }
1257   else
1258     {
1259       /* NEW_LOOP was placed before LOOP.  */
1260       first_loop = new_loop;
1261       second_loop = loop;
1262     }
1263
1264   definitions = ssa_names_to_replace ();
1265   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1266   rename_variables_in_loop (new_loop);
1267
1268
1269   /* 2.  Add the guard code in one of the following ways:
1270
1271      2.a Add the guard that controls whether the first loop is executed.
1272          This occurs when this function is invoked for prologue or epilogue
1273          generation and when the cost model check can be done at compile time.
1274
1275          Resulting CFG would be:
1276
1277          bb_before_first_loop:
1278          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1279                                 GOTO first-loop
1280
1281          first_loop:
1282          do {
1283          } while ...
1284
1285          bb_before_second_loop:
1286
1287          second_loop:
1288          do {
1289          } while ...
1290
1291          orig_exit_bb:
1292
1293      2.b Add the cost model check that allows the prologue
1294          to iterate for the entire unchanged scalar
1295          iterations of the loop in the event that the cost
1296          model indicates that the scalar loop is more
1297          profitable than the vector one. This occurs when
1298          this function is invoked for prologue generation
1299          and the cost model check needs to be done at run
1300          time.
1301
1302          Resulting CFG after prologue peeling would be:
1303
1304          if (scalar_loop_iterations <= th)
1305            FIRST_NITERS = scalar_loop_iterations
1306
1307          bb_before_first_loop:
1308          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1309                                 GOTO first-loop
1310
1311          first_loop:
1312          do {
1313          } while ...
1314
1315          bb_before_second_loop:
1316
1317          second_loop:
1318          do {
1319          } while ...
1320
1321          orig_exit_bb:
1322
1323      2.c Add the cost model check that allows the epilogue
1324          to iterate for the entire unchanged scalar
1325          iterations of the loop in the event that the cost
1326          model indicates that the scalar loop is more
1327          profitable than the vector one. This occurs when
1328          this function is invoked for epilogue generation
1329          and the cost model check needs to be done at run
1330          time.  This check is combined with any pre-existing
1331          check in COND_EXPR to avoid versioning.
1332
1333          Resulting CFG after prologue peeling would be:
1334
1335          bb_before_first_loop:
1336          if ((scalar_loop_iterations <= th)
1337              ||
1338              FIRST_NITERS == 0) GOTO bb_before_second_loop
1339                                 GOTO first-loop
1340
1341          first_loop:
1342          do {
1343          } while ...
1344
1345          bb_before_second_loop:
1346
1347          second_loop:
1348          do {
1349          } while ...
1350
1351          orig_exit_bb:
1352   */
1353
1354   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1355   bb_before_second_loop = split_edge (single_exit (first_loop));
1356
1357   /* Epilogue peeling.  */
1358   if (!update_first_loop_count)
1359     {
1360       pre_condition =
1361         fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1362                      build_int_cst (TREE_TYPE (first_niters), 0));
1363       if (check_profitability)
1364         {
1365           tree scalar_loop_iters
1366             = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1367                                         (loop_vec_info_for_loop (loop)));
1368           cost_pre_condition =
1369             fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1370                          build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1371
1372           pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1373                                        cost_pre_condition, pre_condition);
1374         }
1375       if (cond_expr)
1376         {
1377           pre_condition =
1378             fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1379                          pre_condition,
1380                          fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1381                                       cond_expr));
1382         }
1383     }
1384
1385   /* Prologue peeling.  */
1386   else
1387     {
1388       if (check_profitability)
1389         set_prologue_iterations (bb_before_first_loop, first_niters,
1390                                  loop, th);
1391
1392       pre_condition =
1393         fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1394                      build_int_cst (TREE_TYPE (first_niters), 0));
1395     }
1396
1397   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1398                                   cond_expr_stmt_list,
1399                                   bb_before_second_loop, bb_before_first_loop);
1400   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1401                                       first_loop == new_loop,
1402                                       &new_exit_bb, &definitions);
1403
1404
1405   /* 3. Add the guard that controls whether the second loop is executed.
1406         Resulting CFG would be:
1407
1408         bb_before_first_loop:
1409         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1410                                GOTO first-loop
1411
1412         first_loop:
1413         do {
1414         } while ...
1415
1416         bb_between_loops:
1417         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1418                                     GOTO bb_before_second_loop
1419
1420         bb_before_second_loop:
1421
1422         second_loop:
1423         do {
1424         } while ...
1425
1426         bb_after_second_loop:
1427
1428         orig_exit_bb:
1429    */
1430
1431   bb_between_loops = new_exit_bb;
1432   bb_after_second_loop = split_edge (single_exit (second_loop));
1433
1434   pre_condition =
1435         fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1436   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1437                                   bb_after_second_loop, bb_before_first_loop);
1438   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1439                                      second_loop == new_loop, &new_exit_bb);
1440
1441   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1442    */
1443   if (update_first_loop_count)
1444     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1445
1446   /* Remove all pattern statements from the loop copy.  They will confuse
1447      the expander if DCE is disabled.
1448      ???  The pattern recognizer should be split into an analysis and
1449      a transformation phase that is then run only on the loop that is
1450      going to be transformed.  */
1451   remove_dead_stmts_from_loop (new_loop);
1452
1453   adjust_vec_debug_stmts ();
1454
1455   BITMAP_FREE (definitions);
1456   delete_update_ssa ();
1457
1458   return new_loop;
1459 }
1460
1461 /* Function vect_get_loop_location.
1462
1463    Extract the location of the loop in the source code.
1464    If the loop is not well formed for vectorization, an estimated
1465    location is calculated.
1466    Return the loop location if succeed and NULL if not.  */
1467
1468 LOC
1469 find_loop_location (struct loop *loop)
1470 {
1471   gimple stmt = NULL;
1472   basic_block bb;
1473   gimple_stmt_iterator si;
1474
1475   if (!loop)
1476     return UNKNOWN_LOC;
1477
1478   stmt = get_loop_exit_condition (loop);
1479
1480   if (stmt && gimple_location (stmt) != UNKNOWN_LOC)
1481     return gimple_location (stmt);
1482
1483   /* If we got here the loop is probably not "well formed",
1484      try to estimate the loop location */
1485
1486   if (!loop->header)
1487     return UNKNOWN_LOC;
1488
1489   bb = loop->header;
1490
1491   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1492     {
1493       stmt = gsi_stmt (si);
1494       if (gimple_location (stmt) != UNKNOWN_LOC)
1495         return gimple_location (stmt);
1496     }
1497
1498   return UNKNOWN_LOC;
1499 }
1500
1501
1502 /* This function builds ni_name = number of iterations loop executes
1503    on the loop preheader.  If SEQ is given the stmt is instead emitted
1504    there.  */
1505
1506 static tree
1507 vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
1508 {
1509   tree ni_name, var;
1510   gimple_seq stmts = NULL;
1511   edge pe;
1512   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1513   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1514
1515   var = create_tmp_var (TREE_TYPE (ni), "niters");
1516   add_referenced_var (var);
1517   ni_name = force_gimple_operand (ni, &stmts, false, var);
1518
1519   pe = loop_preheader_edge (loop);
1520   if (stmts)
1521     {
1522       if (seq)
1523         gimple_seq_add_seq (&seq, stmts);
1524       else
1525         {
1526           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1527           gcc_assert (!new_bb);
1528         }
1529     }
1530
1531   return ni_name;
1532 }
1533
1534
1535 /* This function generates the following statements:
1536
1537  ni_name = number of iterations loop executes
1538  ratio = ni_name / vf
1539  ratio_mult_vf_name = ratio * vf
1540
1541  and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
1542  if that is non-NULL.  */
1543
1544 static void
1545 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1546                                  tree *ni_name_ptr,
1547                                  tree *ratio_mult_vf_name_ptr,
1548                                  tree *ratio_name_ptr,
1549                                  gimple_seq cond_expr_stmt_list)
1550 {
1551
1552   edge pe;
1553   basic_block new_bb;
1554   gimple_seq stmts;
1555   tree ni_name;
1556   tree var;
1557   tree ratio_name;
1558   tree ratio_mult_vf_name;
1559   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1560   tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1561   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1562   tree log_vf;
1563
1564   pe = loop_preheader_edge (loop);
1565
1566   /* Generate temporary variable that contains
1567      number of iterations loop executes.  */
1568
1569   ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
1570   log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
1571
1572   /* Create: ratio = ni >> log2(vf) */
1573
1574   ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
1575   if (!is_gimple_val (ratio_name))
1576     {
1577       var = create_tmp_var (TREE_TYPE (ni), "bnd");
1578       add_referenced_var (var);
1579
1580       stmts = NULL;
1581       ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
1582       if (cond_expr_stmt_list)
1583         gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1584       else
1585         {
1586           pe = loop_preheader_edge (loop);
1587           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1588           gcc_assert (!new_bb);
1589         }
1590     }
1591
1592   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
1593
1594   ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
1595                                     ratio_name, log_vf);
1596   if (!is_gimple_val (ratio_mult_vf_name))
1597     {
1598       var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1599       add_referenced_var (var);
1600
1601       stmts = NULL;
1602       ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
1603                                                  true, var);
1604       if (cond_expr_stmt_list)
1605         gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1606       else
1607         {
1608           pe = loop_preheader_edge (loop);
1609           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1610           gcc_assert (!new_bb);
1611         }
1612     }
1613
1614   *ni_name_ptr = ni_name;
1615   *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1616   *ratio_name_ptr = ratio_name;
1617
1618   return;
1619 }
1620
1621 /* Function vect_can_advance_ivs_p
1622
1623    In case the number of iterations that LOOP iterates is unknown at compile
1624    time, an epilog loop will be generated, and the loop induction variables
1625    (IVs) will be "advanced" to the value they are supposed to take just before
1626    the epilog loop.  Here we check that the access function of the loop IVs
1627    and the expression that represents the loop bound are simple enough.
1628    These restrictions will be relaxed in the future.  */
1629
1630 bool
1631 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1632 {
1633   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1634   basic_block bb = loop->header;
1635   gimple phi;
1636   gimple_stmt_iterator gsi;
1637
1638   /* Analyze phi functions of the loop header.  */
1639
1640   if (vect_print_dump_info (REPORT_DETAILS))
1641     fprintf (vect_dump, "vect_can_advance_ivs_p:");
1642
1643   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1644     {
1645       tree access_fn = NULL;
1646       tree evolution_part;
1647
1648       phi = gsi_stmt (gsi);
1649       if (vect_print_dump_info (REPORT_DETAILS))
1650         {
1651           fprintf (vect_dump, "Analyze phi: ");
1652           print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1653         }
1654
1655       /* Skip virtual phi's. The data dependences that are associated with
1656          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1657
1658       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1659         {
1660           if (vect_print_dump_info (REPORT_DETAILS))
1661             fprintf (vect_dump, "virtual phi. skip.");
1662           continue;
1663         }
1664
1665       /* Skip reduction phis.  */
1666
1667       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1668         {
1669           if (vect_print_dump_info (REPORT_DETAILS))
1670             fprintf (vect_dump, "reduc phi. skip.");
1671           continue;
1672         }
1673
1674       /* Analyze the evolution function.  */
1675
1676       access_fn = instantiate_parameters
1677         (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1678
1679       if (!access_fn)
1680         {
1681           if (vect_print_dump_info (REPORT_DETAILS))
1682             fprintf (vect_dump, "No Access function.");
1683           return false;
1684         }
1685
1686       if (vect_print_dump_info (REPORT_DETAILS))
1687         {
1688           fprintf (vect_dump, "Access function of PHI: ");
1689           print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1690         }
1691
1692       evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1693
1694       if (evolution_part == NULL_TREE)
1695         {
1696           if (vect_print_dump_info (REPORT_DETAILS))
1697             fprintf (vect_dump, "No evolution.");
1698           return false;
1699         }
1700
1701       /* FORNOW: We do not transform initial conditions of IVs
1702          which evolution functions are a polynomial of degree >= 2.  */
1703
1704       if (tree_is_chrec (evolution_part))
1705         return false;
1706     }
1707
1708   return true;
1709 }
1710
1711
1712 /*   Function vect_update_ivs_after_vectorizer.
1713
1714      "Advance" the induction variables of LOOP to the value they should take
1715      after the execution of LOOP.  This is currently necessary because the
1716      vectorizer does not handle induction variables that are used after the
1717      loop.  Such a situation occurs when the last iterations of LOOP are
1718      peeled, because:
1719      1. We introduced new uses after LOOP for IVs that were not originally used
1720         after LOOP: the IVs of LOOP are now used by an epilog loop.
1721      2. LOOP is going to be vectorized; this means that it will iterate N/VF
1722         times, whereas the loop IVs should be bumped N times.
1723
1724      Input:
1725      - LOOP - a loop that is going to be vectorized. The last few iterations
1726               of LOOP were peeled.
1727      - NITERS - the number of iterations that LOOP executes (before it is
1728                 vectorized). i.e, the number of times the ivs should be bumped.
1729      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1730                   coming out from LOOP on which there are uses of the LOOP ivs
1731                   (this is the path from LOOP->exit to epilog_loop->preheader).
1732
1733                   The new definitions of the ivs are placed in LOOP->exit.
1734                   The phi args associated with the edge UPDATE_E in the bb
1735                   UPDATE_E->dest are updated accordingly.
1736
1737      Assumption 1: Like the rest of the vectorizer, this function assumes
1738      a single loop exit that has a single predecessor.
1739
1740      Assumption 2: The phi nodes in the LOOP header and in update_bb are
1741      organized in the same order.
1742
1743      Assumption 3: The access function of the ivs is simple enough (see
1744      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1745
1746      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1747      coming out of LOOP on which the ivs of LOOP are used (this is the path
1748      that leads to the epilog loop; other paths skip the epilog loop).  This
1749      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1750      needs to have its phis updated.
1751  */
1752
1753 static void
1754 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1755                                   edge update_e)
1756 {
1757   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1758   basic_block exit_bb = single_exit (loop)->dest;
1759   gimple phi, phi1;
1760   gimple_stmt_iterator gsi, gsi1;
1761   basic_block update_bb = update_e->dest;
1762
1763   /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1764
1765   /* Make sure there exists a single-predecessor exit bb:  */
1766   gcc_assert (single_pred_p (exit_bb));
1767
1768   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1769        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1770        gsi_next (&gsi), gsi_next (&gsi1))
1771     {
1772       tree access_fn = NULL;
1773       tree evolution_part;
1774       tree init_expr;
1775       tree step_expr, off;
1776       tree type;
1777       tree var, ni, ni_name;
1778       gimple_stmt_iterator last_gsi;
1779
1780       phi = gsi_stmt (gsi);
1781       phi1 = gsi_stmt (gsi1);
1782       if (vect_print_dump_info (REPORT_DETAILS))
1783         {
1784           fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
1785           print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1786         }
1787
1788       /* Skip virtual phi's.  */
1789       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1790         {
1791           if (vect_print_dump_info (REPORT_DETAILS))
1792             fprintf (vect_dump, "virtual phi. skip.");
1793           continue;
1794         }
1795
1796       /* Skip reduction phis.  */
1797       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1798         {
1799           if (vect_print_dump_info (REPORT_DETAILS))
1800             fprintf (vect_dump, "reduc phi. skip.");
1801           continue;
1802         }
1803
1804       access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
1805       gcc_assert (access_fn);
1806       /* We can end up with an access_fn like
1807            (short int) {(short unsigned int) i_49, +, 1}_1
1808          for further analysis we need to strip the outer cast but we
1809          need to preserve the original type.  */
1810       type = TREE_TYPE (access_fn);
1811       STRIP_NOPS (access_fn);
1812       evolution_part =
1813          unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
1814       gcc_assert (evolution_part != NULL_TREE);
1815
1816       /* FORNOW: We do not support IVs whose evolution function is a polynomial
1817          of degree >= 2 or exponential.  */
1818       gcc_assert (!tree_is_chrec (evolution_part));
1819
1820       step_expr = evolution_part;
1821       init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1822                                                                loop->num));
1823       init_expr = fold_convert (type, init_expr);
1824
1825       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1826                          fold_convert (TREE_TYPE (step_expr), niters),
1827                          step_expr);
1828       if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
1829         ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
1830                           init_expr,
1831                           fold_convert (sizetype, off));
1832       else
1833         ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
1834                           init_expr,
1835                           fold_convert (TREE_TYPE (init_expr), off));
1836
1837       var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
1838       add_referenced_var (var);
1839
1840       last_gsi = gsi_last_bb (exit_bb);
1841       ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1842                                           true, GSI_SAME_STMT);
1843
1844       /* Fix phi expressions in the successor bb.  */
1845       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1846     }
1847 }
1848
1849 /* Return the more conservative threshold between the
1850    min_profitable_iters returned by the cost model and the user
1851    specified threshold, if provided.  */
1852
1853 static unsigned int
1854 conservative_cost_threshold (loop_vec_info loop_vinfo,
1855                              int min_profitable_iters)
1856 {
1857   unsigned int th;
1858   int min_scalar_loop_bound;
1859
1860   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1861                             * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
1862
1863   /* Use the cost model only if it is more conservative than user specified
1864      threshold.  */
1865   th = (unsigned) min_scalar_loop_bound;
1866   if (min_profitable_iters
1867       && (!min_scalar_loop_bound
1868           || min_profitable_iters > min_scalar_loop_bound))
1869     th = (unsigned) min_profitable_iters;
1870
1871   if (th && vect_print_dump_info (REPORT_COST))
1872     fprintf (vect_dump, "Profitability threshold is %u loop iterations.", th);
1873
1874   return th;
1875 }
1876
1877 /* Function vect_do_peeling_for_loop_bound
1878
1879    Peel the last iterations of the loop represented by LOOP_VINFO.
1880    The peeled iterations form a new epilog loop.  Given that the loop now
1881    iterates NITERS times, the new epilog loop iterates
1882    NITERS % VECTORIZATION_FACTOR times.
1883
1884    The original loop will later be made to iterate
1885    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1886
1887    COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1888    test.  */
1889
1890 void
1891 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1892                                 tree cond_expr, gimple_seq cond_expr_stmt_list)
1893 {
1894   tree ni_name, ratio_mult_vf_name;
1895   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1896   struct loop *new_loop;
1897   edge update_e;
1898   basic_block preheader;
1899   int loop_num;
1900   bool check_profitability = false;
1901   unsigned int th = 0;
1902   int min_profitable_iters;
1903
1904   if (vect_print_dump_info (REPORT_DETAILS))
1905     fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
1906
1907   initialize_original_copy_tables ();
1908
1909   /* Generate the following variables on the preheader of original loop:
1910
1911      ni_name = number of iteration the original loop executes
1912      ratio = ni_name / vf
1913      ratio_mult_vf_name = ratio * vf  */
1914   vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1915                                    &ratio_mult_vf_name, ratio,
1916                                    cond_expr_stmt_list);
1917
1918   loop_num  = loop->num;
1919
1920   /* If cost model check not done during versioning and
1921      peeling for alignment.  */
1922   if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
1923       && !LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)
1924       && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)
1925       && !cond_expr)
1926     {
1927       check_profitability = true;
1928
1929       /* Get profitability threshold for vectorized loop.  */
1930       min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
1931
1932       th = conservative_cost_threshold (loop_vinfo,
1933                                         min_profitable_iters);
1934     }
1935
1936   new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
1937                                             ratio_mult_vf_name, ni_name, false,
1938                                             th, check_profitability,
1939                                             cond_expr, cond_expr_stmt_list);
1940   gcc_assert (new_loop);
1941   gcc_assert (loop_num == loop->num);
1942 #ifdef ENABLE_CHECKING
1943   slpeel_verify_cfg_after_peeling (loop, new_loop);
1944 #endif
1945
1946   /* A guard that controls whether the new_loop is to be executed or skipped
1947      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
1948      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
1949      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
1950      is on the path where the LOOP IVs are used and need to be updated.  */
1951
1952   preheader = loop_preheader_edge (new_loop)->src;
1953   if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1954     update_e = EDGE_PRED (preheader, 0);
1955   else
1956     update_e = EDGE_PRED (preheader, 1);
1957
1958   /* Update IVs of original loop as if they were advanced
1959      by ratio_mult_vf_name steps.  */
1960   vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1961
1962   /* After peeling we have to reset scalar evolution analyzer.  */
1963   scev_reset ();
1964
1965   free_original_copy_tables ();
1966 }
1967
1968
1969 /* Function vect_gen_niters_for_prolog_loop
1970
1971    Set the number of iterations for the loop represented by LOOP_VINFO
1972    to the minimum between LOOP_NITERS (the original iteration count of the loop)
1973    and the misalignment of DR - the data reference recorded in
1974    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
1975    this loop, the data reference DR will refer to an aligned location.
1976
1977    The following computation is generated:
1978
1979    If the misalignment of DR is known at compile time:
1980      addr_mis = int mis = DR_MISALIGNMENT (dr);
1981    Else, compute address misalignment in bytes:
1982      addr_mis = addr & (vectype_size - 1)
1983
1984    prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1985
1986    (elem_size = element type size; an element is the scalar element whose type
1987    is the inner type of the vectype)
1988
1989    When the step of the data-ref in the loop is not 1 (as in interleaved data
1990    and SLP), the number of iterations of the prolog must be divided by the step
1991    (which is equal to the size of interleaved group).
1992
1993    The above formulas assume that VF == number of elements in the vector. This
1994    may not hold when there are multiple-types in the loop.
1995    In this case, for some data-references in the loop the VF does not represent
1996    the number of elements that fit in the vector.  Therefore, instead of VF we
1997    use TYPE_VECTOR_SUBPARTS.  */
1998
1999 static tree
2000 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters,
2001                                  tree *wide_prolog_niters)
2002 {
2003   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2004   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2005   tree var;
2006   gimple_seq stmts;
2007   tree iters, iters_name;
2008   edge pe;
2009   basic_block new_bb;
2010   gimple dr_stmt = DR_STMT (dr);
2011   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
2012   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2013   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
2014   tree niters_type = TREE_TYPE (loop_niters);
2015   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
2016
2017   pe = loop_preheader_edge (loop);
2018
2019   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2020     {
2021       int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2022
2023       if (vect_print_dump_info (REPORT_DETAILS))
2024         fprintf (vect_dump, "known peeling = %d.", npeel);
2025
2026       iters = build_int_cst (niters_type, npeel);
2027     }
2028   else
2029     {
2030       gimple_seq new_stmts = NULL;
2031       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
2032       tree offset = negative
2033           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2034       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
2035                                                 &new_stmts, offset, loop);
2036       tree ptr_type = TREE_TYPE (start_addr);
2037       tree size = TYPE_SIZE (ptr_type);
2038       tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2039       tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
2040       tree elem_size_log =
2041         build_int_cst (type, exact_log2 (vectype_align/nelements));
2042       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
2043       tree nelements_tree = build_int_cst (type, nelements);
2044       tree byte_misalign;
2045       tree elem_misalign;
2046
2047       new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
2048       gcc_assert (!new_bb);
2049
2050       /* Create:  byte_misalign = addr & (vectype_size - 1)  */
2051       byte_misalign =
2052         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
2053                      vectype_size_minus_1);
2054
2055       /* Create:  elem_misalign = byte_misalign / element_size  */
2056       elem_misalign =
2057         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
2058
2059       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
2060       if (negative)
2061         iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
2062       else
2063         iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
2064       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
2065       iters = fold_convert (niters_type, iters);
2066     }
2067
2068   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
2069   /* If the loop bound is known at compile time we already verified that it is
2070      greater than vf; since the misalignment ('iters') is at most vf, there's
2071      no need to generate the MIN_EXPR in this case.  */
2072   if (TREE_CODE (loop_niters) != INTEGER_CST)
2073     iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
2074
2075   if (vect_print_dump_info (REPORT_DETAILS))
2076     {
2077       fprintf (vect_dump, "niters for prolog loop: ");
2078       print_generic_expr (vect_dump, iters, TDF_SLIM);
2079     }
2080
2081   var = create_tmp_var (niters_type, "prolog_loop_niters");
2082   add_referenced_var (var);
2083   stmts = NULL;
2084   iters_name = force_gimple_operand (iters, &stmts, false, var);
2085   if (types_compatible_p (sizetype, niters_type))
2086     *wide_prolog_niters = iters_name;
2087   else
2088     {
2089       gimple_seq seq = NULL;
2090       tree wide_iters = fold_convert (sizetype, iters);
2091       var = create_tmp_var (sizetype, "prolog_loop_niters");
2092       add_referenced_var (var);
2093       *wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2094                                                   var);
2095       if (seq)
2096         gimple_seq_add_seq (&stmts, seq);
2097     }
2098
2099   /* Insert stmt on loop preheader edge.  */
2100   if (stmts)
2101     {
2102       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2103       gcc_assert (!new_bb);
2104     }
2105
2106   return iters_name;
2107 }
2108
2109
2110 /* Function vect_update_init_of_dr
2111
2112    NITERS iterations were peeled from LOOP.  DR represents a data reference
2113    in LOOP.  This function updates the information recorded in DR to
2114    account for the fact that the first NITERS iterations had already been
2115    executed.  Specifically, it updates the OFFSET field of DR.  */
2116
2117 static void
2118 vect_update_init_of_dr (struct data_reference *dr, tree niters)
2119 {
2120   tree offset = DR_OFFSET (dr);
2121
2122   niters = fold_build2 (MULT_EXPR, sizetype,
2123                         fold_convert (sizetype, niters),
2124                         fold_convert (sizetype, DR_STEP (dr)));
2125   offset = fold_build2 (PLUS_EXPR, sizetype,
2126                         fold_convert (sizetype, offset), niters);
2127   DR_OFFSET (dr) = offset;
2128 }
2129
2130
2131 /* Function vect_update_inits_of_drs
2132
2133    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2134    This function updates the information recorded for the data references in
2135    the loop to account for the fact that the first NITERS iterations had
2136    already been executed.  Specifically, it updates the initial_condition of
2137    the access_function of all the data_references in the loop.  */
2138
2139 static void
2140 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2141 {
2142   unsigned int i;
2143   VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2144   struct data_reference *dr;
2145
2146   if (vect_print_dump_info (REPORT_DETAILS))
2147     fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2148
2149   FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2150     vect_update_init_of_dr (dr, niters);
2151 }
2152
2153
2154 /* Function vect_do_peeling_for_alignment
2155
2156    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2157    'niters' is set to the misalignment of one of the data references in the
2158    loop, thereby forcing it to refer to an aligned location at the beginning
2159    of the execution of this loop.  The data reference for which we are
2160    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
2161
2162 void
2163 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
2164 {
2165   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2166   tree niters_of_prolog_loop, ni_name;
2167   tree n_iters;
2168   tree wide_prolog_niters;
2169   struct loop *new_loop;
2170   unsigned int th = 0;
2171   int min_profitable_iters;
2172
2173   if (vect_print_dump_info (REPORT_DETAILS))
2174     fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2175
2176   initialize_original_copy_tables ();
2177
2178   ni_name = vect_build_loop_niters (loop_vinfo, NULL);
2179   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name,
2180                                                            &wide_prolog_niters);
2181
2182
2183   /* Get profitability threshold for vectorized loop.  */
2184   min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
2185   th = conservative_cost_threshold (loop_vinfo,
2186                                     min_profitable_iters);
2187
2188   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2189   new_loop =
2190     slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
2191                                    niters_of_prolog_loop, ni_name, true,
2192                                    th, true, NULL_TREE, NULL);
2193
2194   gcc_assert (new_loop);
2195 #ifdef ENABLE_CHECKING
2196   slpeel_verify_cfg_after_peeling (new_loop, loop);
2197 #endif
2198
2199   /* Update number of times loop executes.  */
2200   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2201   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2202                 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2203
2204   /* Update the init conditions of the access functions of all data refs.  */
2205   vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2206
2207   /* After peeling we have to reset scalar evolution analyzer.  */
2208   scev_reset ();
2209
2210   free_original_copy_tables ();
2211 }
2212
2213
2214 /* Function vect_create_cond_for_align_checks.
2215
2216    Create a conditional expression that represents the alignment checks for
2217    all of data references (array element references) whose alignment must be
2218    checked at runtime.
2219
2220    Input:
2221    COND_EXPR  - input conditional expression.  New conditions will be chained
2222                 with logical AND operation.
2223    LOOP_VINFO - two fields of the loop information are used.
2224                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2225                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2226
2227    Output:
2228    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2229                          expression.
2230    The returned value is the conditional expression to be used in the if
2231    statement that controls which version of the loop gets executed at runtime.
2232
2233    The algorithm makes two assumptions:
2234      1) The number of bytes "n" in a vector is a power of 2.
2235      2) An address "a" is aligned if a%n is zero and that this
2236         test can be done as a&(n-1) == 0.  For example, for 16
2237         byte vectors the test is a&0xf == 0.  */
2238
2239 static void
2240 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2241                                    tree *cond_expr,
2242                                    gimple_seq *cond_expr_stmt_list)
2243 {
2244   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2245   VEC(gimple,heap) *may_misalign_stmts
2246     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2247   gimple ref_stmt;
2248   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2249   tree mask_cst;
2250   unsigned int i;
2251   tree psize;
2252   tree int_ptrsize_type;
2253   char tmp_name[20];
2254   tree or_tmp_name = NULL_TREE;
2255   tree and_tmp, and_tmp_name;
2256   gimple and_stmt;
2257   tree ptrsize_zero;
2258   tree part_cond_expr;
2259
2260   /* Check that mask is one less than a power of 2, i.e., mask is
2261      all zeros followed by all ones.  */
2262   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2263
2264   /* CHECKME: what is the best integer or unsigned type to use to hold a
2265      cast from a pointer value?  */
2266   psize = TYPE_SIZE (ptr_type_node);
2267   int_ptrsize_type
2268     = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
2269
2270   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2271      of the first vector of the i'th data reference. */
2272
2273   FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, ref_stmt)
2274     {
2275       gimple_seq new_stmt_list = NULL;
2276       tree addr_base;
2277       tree addr_tmp, addr_tmp_name;
2278       tree or_tmp, new_or_tmp_name;
2279       gimple addr_stmt, or_stmt;
2280       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2281       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2282       bool negative = tree_int_cst_compare
2283         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2284       tree offset = negative
2285         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2286
2287       /* create: addr_tmp = (int)(address_of_first_vector) */
2288       addr_base =
2289         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2290                                               offset, loop);
2291       if (new_stmt_list != NULL)
2292         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2293
2294       sprintf (tmp_name, "%s%d", "addr2int", i);
2295       addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2296       add_referenced_var (addr_tmp);
2297       addr_tmp_name = make_ssa_name (addr_tmp, NULL);
2298       addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2299                                                 addr_base, NULL_TREE);
2300       SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
2301       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2302
2303       /* The addresses are OR together.  */
2304
2305       if (or_tmp_name != NULL_TREE)
2306         {
2307           /* create: or_tmp = or_tmp | addr_tmp */
2308           sprintf (tmp_name, "%s%d", "orptrs", i);
2309           or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2310           add_referenced_var (or_tmp);
2311           new_or_tmp_name = make_ssa_name (or_tmp, NULL);
2312           or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2313                                                   new_or_tmp_name,
2314                                                   or_tmp_name, addr_tmp_name);
2315           SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
2316           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2317           or_tmp_name = new_or_tmp_name;
2318         }
2319       else
2320         or_tmp_name = addr_tmp_name;
2321
2322     } /* end for i */
2323
2324   mask_cst = build_int_cst (int_ptrsize_type, mask);
2325
2326   /* create: and_tmp = or_tmp & mask  */
2327   and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
2328   add_referenced_var (and_tmp);
2329   and_tmp_name = make_ssa_name (and_tmp, NULL);
2330
2331   and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2332                                            or_tmp_name, mask_cst);
2333   SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
2334   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2335
2336   /* Make and_tmp the left operand of the conditional test against zero.
2337      if and_tmp has a nonzero bit then some address is unaligned.  */
2338   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2339   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2340                                 and_tmp_name, ptrsize_zero);
2341   if (*cond_expr)
2342     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2343                               *cond_expr, part_cond_expr);
2344   else
2345     *cond_expr = part_cond_expr;
2346 }
2347
2348
2349 /* Function vect_vfa_segment_size.
2350
2351    Create an expression that computes the size of segment
2352    that will be accessed for a data reference.  The functions takes into
2353    account that realignment loads may access one more vector.
2354
2355    Input:
2356      DR: The data reference.
2357      VECT_FACTOR: vectorization factor.
2358
2359    Return an expression whose value is the size of segment which will be
2360    accessed by DR.  */
2361
2362 static tree
2363 vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
2364 {
2365   tree segment_length = fold_build2 (MULT_EXPR, integer_type_node,
2366                                      DR_STEP (dr), vect_factor);
2367
2368   if (vect_supportable_dr_alignment (dr, false)
2369         == dr_explicit_realign_optimized)
2370     {
2371       tree vector_size = TYPE_SIZE_UNIT
2372                           (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2373
2374       segment_length = fold_build2 (PLUS_EXPR, integer_type_node,
2375                                     segment_length, vector_size);
2376     }
2377   return fold_convert (sizetype, segment_length);
2378 }
2379
2380
2381 /* Function vect_create_cond_for_alias_checks.
2382
2383    Create a conditional expression that represents the run-time checks for
2384    overlapping of address ranges represented by a list of data references
2385    relations passed as input.
2386
2387    Input:
2388    COND_EXPR  - input conditional expression.  New conditions will be chained
2389                 with logical AND operation.
2390    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2391                 to be checked.
2392
2393    Output:
2394    COND_EXPR - conditional expression.
2395    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2396                          expression.
2397
2398
2399    The returned value is the conditional expression to be used in the if
2400    statement that controls which version of the loop gets executed at runtime.
2401 */
2402
2403 static void
2404 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
2405                                    tree * cond_expr,
2406                                    gimple_seq * cond_expr_stmt_list)
2407 {
2408   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2409   VEC (ddr_p, heap) * may_alias_ddrs =
2410     LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2411   tree vect_factor =
2412     build_int_cst (integer_type_node, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2413
2414   ddr_p ddr;
2415   unsigned int i;
2416   tree part_cond_expr;
2417
2418   /* Create expression
2419      ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
2420      || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
2421      &&
2422      ...
2423      &&
2424      ((store_ptr_n + store_segment_length_n) < load_ptr_n)
2425      || (load_ptr_n + load_segment_length_n) < store_ptr_n))  */
2426
2427   if (VEC_empty (ddr_p, may_alias_ddrs))
2428     return;
2429
2430   FOR_EACH_VEC_ELT (ddr_p, may_alias_ddrs, i, ddr)
2431     {
2432       struct data_reference *dr_a, *dr_b;
2433       gimple dr_group_first_a, dr_group_first_b;
2434       tree addr_base_a, addr_base_b;
2435       tree segment_length_a, segment_length_b;
2436       gimple stmt_a, stmt_b;
2437       tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2438
2439       dr_a = DDR_A (ddr);
2440       stmt_a = DR_STMT (DDR_A (ddr));
2441       dr_group_first_a = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a));
2442       if (dr_group_first_a)
2443         {
2444           stmt_a = dr_group_first_a;
2445           dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2446         }
2447
2448       dr_b = DDR_B (ddr);
2449       stmt_b = DR_STMT (DDR_B (ddr));
2450       dr_group_first_b = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b));
2451       if (dr_group_first_b)
2452         {
2453           stmt_b = dr_group_first_b;
2454           dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2455         }
2456
2457       addr_base_a =
2458         vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
2459                                               NULL_TREE, loop);
2460       addr_base_b =
2461         vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
2462                                               NULL_TREE, loop);
2463
2464       segment_length_a = vect_vfa_segment_size (dr_a, vect_factor);
2465       segment_length_b = vect_vfa_segment_size (dr_b, vect_factor);
2466
2467       if (vect_print_dump_info (REPORT_DR_DETAILS))
2468         {
2469           fprintf (vect_dump,
2470                    "create runtime check for data references ");
2471           print_generic_expr (vect_dump, DR_REF (dr_a), TDF_SLIM);
2472           fprintf (vect_dump, " and ");
2473           print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
2474         }
2475
2476       seg_a_min = addr_base_a;
2477       seg_a_max = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a),
2478                                addr_base_a, segment_length_a);
2479       if (tree_int_cst_compare (DR_STEP (dr_a), size_zero_node) < 0)
2480         seg_a_min = seg_a_max, seg_a_max = addr_base_a;
2481
2482       seg_b_min = addr_base_b;
2483       seg_b_max = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
2484                                addr_base_b, segment_length_b);
2485       if (tree_int_cst_compare (DR_STEP (dr_b), size_zero_node) < 0)
2486         seg_b_min = seg_b_max, seg_b_max = addr_base_b;
2487
2488       part_cond_expr =
2489         fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2490           fold_build2 (LT_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2491           fold_build2 (LT_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2492
2493       if (*cond_expr)
2494         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2495                                   *cond_expr, part_cond_expr);
2496       else
2497         *cond_expr = part_cond_expr;
2498     }
2499
2500   if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
2501     fprintf (vect_dump, "created %u versioning for alias checks.\n",
2502              VEC_length (ddr_p, may_alias_ddrs));
2503 }
2504
2505
2506 /* Function vect_loop_versioning.
2507
2508    If the loop has data references that may or may not be aligned or/and
2509    has data reference relations whose independence was not proven then
2510    two versions of the loop need to be generated, one which is vectorized
2511    and one which isn't.  A test is then generated to control which of the
2512    loops is executed.  The test checks for the alignment of all of the
2513    data references that may or may not be aligned.  An additional
2514    sequence of runtime tests is generated for each pairs of DDRs whose
2515    independence was not proven.  The vectorized version of loop is
2516    executed only if both alias and alignment tests are passed.
2517
2518    The test generated to check which version of loop is executed
2519    is modified to also check for profitability as indicated by the
2520    cost model initially.
2521
2522    The versioning precondition(s) are placed in *COND_EXPR and
2523    *COND_EXPR_STMT_LIST.  If DO_VERSIONING is true versioning is
2524    also performed, otherwise only the conditions are generated.  */
2525
2526 void
2527 vect_loop_versioning (loop_vec_info loop_vinfo, bool do_versioning,
2528                       tree *cond_expr, gimple_seq *cond_expr_stmt_list)
2529 {
2530   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2531   basic_block condition_bb;
2532   gimple_stmt_iterator gsi, cond_exp_gsi;
2533   basic_block merge_bb;
2534   basic_block new_exit_bb;
2535   edge new_exit_e, e;
2536   gimple orig_phi, new_phi;
2537   tree arg;
2538   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2539   gimple_seq gimplify_stmt_list = NULL;
2540   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2541   int min_profitable_iters = 0;
2542   unsigned int th;
2543
2544   /* Get profitability threshold for vectorized loop.  */
2545   min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
2546
2547   th = conservative_cost_threshold (loop_vinfo,
2548                                     min_profitable_iters);
2549
2550   *cond_expr =
2551     fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2552                  build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2553
2554   *cond_expr = force_gimple_operand (*cond_expr, cond_expr_stmt_list,
2555                                      false, NULL_TREE);
2556
2557   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2558       vect_create_cond_for_align_checks (loop_vinfo, cond_expr,
2559                                          cond_expr_stmt_list);
2560
2561   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2562     vect_create_cond_for_alias_checks (loop_vinfo, cond_expr,
2563                                        cond_expr_stmt_list);
2564
2565   *cond_expr =
2566     fold_build2 (NE_EXPR, boolean_type_node, *cond_expr, integer_zero_node);
2567   *cond_expr =
2568     force_gimple_operand (*cond_expr, &gimplify_stmt_list, true, NULL_TREE);
2569   gimple_seq_add_seq (cond_expr_stmt_list, gimplify_stmt_list);
2570
2571   /* If we only needed the extra conditions and a new loop copy
2572      bail out here.  */
2573   if (!do_versioning)
2574     return;
2575
2576   initialize_original_copy_tables ();
2577   loop_version (loop, *cond_expr, &condition_bb,
2578                 prob, prob, REG_BR_PROB_BASE - prob, true);
2579   free_original_copy_tables();
2580
2581   /* Loop versioning violates an assumption we try to maintain during
2582      vectorization - that the loop exit block has a single predecessor.
2583      After versioning, the exit block of both loop versions is the same
2584      basic block (i.e. it has two predecessors). Just in order to simplify
2585      following transformations in the vectorizer, we fix this situation
2586      here by adding a new (empty) block on the exit-edge of the loop,
2587      with the proper loop-exit phis to maintain loop-closed-form.  */
2588
2589   merge_bb = single_exit (loop)->dest;
2590   gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2591   new_exit_bb = split_edge (single_exit (loop));
2592   new_exit_e = single_exit (loop);
2593   e = EDGE_SUCC (new_exit_bb, 0);
2594
2595   for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2596     {
2597       orig_phi = gsi_stmt (gsi);
2598       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
2599                                   new_exit_bb);
2600       arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2601       add_phi_arg (new_phi, arg, new_exit_e,
2602                    gimple_phi_arg_location_from_edge (orig_phi, e));
2603       adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2604     }
2605
2606   /* End loop-exit-fixes after versioning.  */
2607
2608   update_ssa (TODO_update_ssa);
2609   if (*cond_expr_stmt_list)
2610     {
2611       cond_exp_gsi = gsi_last_bb (condition_bb);
2612       gsi_insert_seq_before (&cond_exp_gsi, *cond_expr_stmt_list,
2613                              GSI_SAME_STMT);
2614       *cond_expr_stmt_list = NULL;
2615     }
2616   *cond_expr = NULL_TREE;
2617 }
2618