gcc/tree-vectorizer.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20 02111-1307, USA.  */
  21
  22 /* Loop Vectorization Pass.
  23
  24    This pass tries to vectorize loops. This first implementation focuses on
  25    simple inner-most loops, with no conditional control flow, and a set of
  26    simple operations which vector form can be expressed using existing
  27    tree codes (PLUS, MULT etc).
  28
  29    For example, the vectorizer transforms the following simple loop:
  30
  31         short a[N]; short b[N]; short c[N]; int i;
  32
  33         for (i=0; i<N; i++){
  34           a[i] = b[i] + c[i];
  35         }
  36
  37    as if it was manually vectorized by rewriting the source code into:
  38
  39         typedef int __attribute__((mode(V8HI))) v8hi;
  40         short a[N];  short b[N]; short c[N];   int i;
  41         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  42         v8hi va, vb, vc;
  43
  44         for (i=0; i<N/8; i++){
  45           vb = pb[i];
  46           vc = pc[i];
  47           va = vb + vc;
  48           pa[i] = va;
  49         }
  50
  51         The main entry to this pass is vectorize_loops(), in which
  52    the vectorizer applies a set of analyses on a given set of loops,
  53    followed by the actual vectorization transformation for the loops that
  54    had successfully passed the analysis phase.
  55
  56         Throughout this pass we make a distinction between two types of
  57    data: scalars (which are represented by SSA_NAMES), and memory references
  58    ("data-refs"). These two types of data require different handling both
  59    during analysis and transformation. The types of data-refs that the
  60    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  61    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  62    accesses are required to have a  simple (consecutive) access pattern.
  63
  64    Analysis phase:
  65    ===============
  66         The driver for the analysis phase is vect_analyze_loop_nest().
  67    It applies a set of analyses, some of which rely on the scalar evolution
  68    analyzer (scev) developed by Sebastian Pop.
  69
  70         During the analysis phase the vectorizer records some information
  71    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  72    loop, as well as general information about the loop as a whole, which is
  73    recorded in a "loop_vec_info" struct attached to each loop.
  74
  75    Transformation phase:
  76    =====================
  77         The loop transformation phase scans all the stmts in the loop, and
  78    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  79    the loop that needs to be vectorized. It insert the vector code sequence
  80    just before the scalar stmt S, and records a pointer to the vector code
  81    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
  82    attached to S). This pointer will be used for the vectorization of following
  83    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
  84    otherwise, we rely on dead code elimination for removing it.
  85
  86         For example, say stmt S1 was vectorized into stmt VS1:
  87
  88    VS1: vb = px[i];
  89    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  90    S2:  a = b;
  91
  92    To vectorize stmt S2, the vectorizer first finds the stmt that defines
  93    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
  94    vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
  95    resulting sequence would be:
  96
  97    VS1: vb = px[i];
  98    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  99    VS2: va = vb;
 100    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 101
 102         Operands that are not SSA_NAMEs, are data-refs that appear in
 103    load/store operations (like 'x[i]' in S1), and are handled differently.
 104
 105    Target modeling:
 106    =================
 107         Currently the only target specific information that is used is the
 108    size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
 109    support different sizes of vectors, for now will need to specify one value
 110    for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
 111
 112         Since we only vectorize operations which vector form can be
 113    expressed using existing tree codes, to verify that an operation is
 114    supported, the vectorizer checks the relevant optab at the relevant
 115    machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
 116    the value found is CODE_FOR_nothing, then there's no target support, and
 117    we can't vectorize the stmt.
 118
 119    For additional information on this project see:
 120    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 121 */
 122
 123 #include "config.h"
 124 #include "system.h"
 125 #include "coretypes.h"
 126 #include "tm.h"
 127 #include "ggc.h"
 128 #include "tree.h"
 129 #include "target.h"
 130 #include "rtl.h"
 131 #include "basic-block.h"
 132 #include "diagnostic.h"
 133 #include "tree-flow.h"
 134 #include "tree-dump.h"
 135 #include "timevar.h"
 136 #include "cfgloop.h"
 137 #include "cfglayout.h"
 138 #include "expr.h"
 139 #include "optabs.h"
 140 #include "toplev.h"
 141 #include "tree-chrec.h"
 142 #include "tree-data-ref.h"
 143 #include "tree-scalar-evolution.h"
 144 #include "input.h"
 145 #include "tree-vectorizer.h"
 146 #include "tree-pass.h"
 147
 148 /*************************************************************************
 149   Simple Loop Peeling Utilities
 150  *************************************************************************/
 151 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
 152   (struct loop *, struct loops *, edge);
 153 static void slpeel_update_phis_for_duplicate_loop
 154   (struct loop *, struct loop *, bool after);
 155 static void slpeel_update_phi_nodes_for_guard1
 156   (edge, struct loop *, bool, basic_block *, bitmap *);
 157 static void slpeel_update_phi_nodes_for_guard2
 158   (edge, struct loop *, bool, basic_block *);
 159 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
 160
 161 static void rename_use_op (use_operand_p);
 162 static void rename_variables_in_bb (basic_block);
 163 static void rename_variables_in_loop (struct loop *);
 164
 165 /*************************************************************************
 166   General Vectorization Utilities
 167  *************************************************************************/
 168 static void vect_set_dump_settings (void);
 169
 170 /* vect_dump will be set to stderr or dump_file if exist.  */
 171 FILE *vect_dump;
 172
 173 /* vect_verbosity_level set to an invalid value
 174    to mark that it's uninitialized.  */
 175 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
 176
 177 /* Number of loops, at the beginning of vectorization.  */
 178 unsigned int vect_loops_num;
 179 \f
 180 /*************************************************************************
 181   Simple Loop Peeling Utilities
 182
 183   Utilities to support loop peeling for vectorization purposes.
 184  *************************************************************************/
 185
 186
 187 /* Renames the use *OP_P.  */
 188
 189 static void
 190 rename_use_op (use_operand_p op_p)
 191 {
 192   tree new_name;
 193
 194   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
 195     return;
 196
 197   new_name = get_current_def (USE_FROM_PTR (op_p));
 198
 199   /* Something defined outside of the loop.  */
 200   if (!new_name)
 201     return;
 202
 203   /* An ordinary ssa name defined in the loop.  */
 204
 205   SET_USE (op_p, new_name);
 206 }
 207
 208
 209 /* Renames the variables in basic block BB.  */
 210
 211 static void
 212 rename_variables_in_bb (basic_block bb)
 213 {
 214   tree phi;
 215   block_stmt_iterator bsi;
 216   tree stmt;
 217   use_operand_p use_p;
 218   ssa_op_iter iter;
 219   edge e;
 220   edge_iterator ei;
 221   struct loop *loop = bb->loop_father;
 222
 223   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 224     {
 225       stmt = bsi_stmt (bsi);
 226       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter,
 227                                  (SSA_OP_ALL_USES | SSA_OP_ALL_KILLS))
 228         rename_use_op (use_p);
 229     }
 230
 231   FOR_EACH_EDGE (e, ei, bb->succs)
 232     {
 233       if (!flow_bb_inside_loop_p (loop, e->dest))
 234         continue;
 235       for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
 236         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
 237     }
 238 }
 239
 240
 241 /* Renames variables in new generated LOOP.  */
 242
 243 static void
 244 rename_variables_in_loop (struct loop *loop)
 245 {
 246   unsigned i;
 247   basic_block *bbs;
 248
 249   bbs = get_loop_body (loop);
 250
 251   for (i = 0; i < loop->num_nodes; i++)
 252     rename_variables_in_bb (bbs[i]);
 253
 254   free (bbs);
 255 }
 256
 257
 258 /* Update the PHI nodes of NEW_LOOP.
 259
 260    NEW_LOOP is a duplicate of ORIG_LOOP.
 261    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 262    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 263    executes before it.  */
 264
 265 static void
 266 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 267                                        struct loop *new_loop, bool after)
 268 {
 269   tree new_ssa_name;
 270   tree phi_new, phi_orig;
 271   tree def;
 272   edge orig_loop_latch = loop_latch_edge (orig_loop);
 273   edge orig_entry_e = loop_preheader_edge (orig_loop);
 274   edge new_loop_exit_e = new_loop->single_exit;
 275   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 276   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 277
 278   /*
 279      step 1. For each loop-header-phi:
 280              Add the first phi argument for the phi in NEW_LOOP
 281             (the one associated with the entry of NEW_LOOP)
 282
 283      step 2. For each loop-header-phi:
 284              Add the second phi argument for the phi in NEW_LOOP
 285             (the one associated with the latch of NEW_LOOP)
 286
 287      step 3. Update the phis in the successor block of NEW_LOOP.
 288
 289         case 1: NEW_LOOP was placed before ORIG_LOOP:
 290                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 291                 Updating the phis in the successor block can therefore be done
 292                 along with the scanning of the loop header phis, because the
 293                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 294                 phi nodes, organized in the same order.
 295
 296         case 2: NEW_LOOP was placed after ORIG_LOOP:
 297                 The successor block of NEW_LOOP is the original exit block of
 298                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 299                 We postpone updating these phis to a later stage (when
 300                 loop guards are added).
 301    */
 302
 303
 304   /* Scan the phis in the headers of the old and new loops
 305      (they are organized in exactly the same order).  */
 306
 307   for (phi_new = phi_nodes (new_loop->header),
 308        phi_orig = phi_nodes (orig_loop->header);
 309        phi_new && phi_orig;
 310        phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
 311     {
 312       /* step 1.  */
 313       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 314       add_phi_arg (phi_new, def, new_loop_entry_e);
 315
 316       /* step 2.  */
 317       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 318       if (TREE_CODE (def) != SSA_NAME)
 319         continue;
 320
 321       new_ssa_name = get_current_def (def);
 322       if (!new_ssa_name)
 323         {
 324           /* This only happens if there are no definitions
 325              inside the loop. use the phi_result in this case.  */
 326           new_ssa_name = PHI_RESULT (phi_new);
 327         }
 328
 329       /* An ordinary ssa name defined in the loop.  */
 330       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
 331
 332       /* step 3 (case 1).  */
 333       if (!after)
 334         {
 335           gcc_assert (new_loop_exit_e == orig_entry_e);
 336           SET_PHI_ARG_DEF (phi_orig,
 337                            new_loop_exit_e->dest_idx,
 338                            new_ssa_name);
 339         }
 340     }
 341 }
 342
 343
 344 /* Update PHI nodes for a guard of the LOOP.
 345
 346    Input:
 347    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 348         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 349         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 350         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 351         We denote this bb NEW_MERGE_BB because before the guard code was added
 352         it had a single predecessor (the LOOP header), and now it became a merge
 353         point of two paths - the path that ends with the LOOP exit-edge, and
 354         the path that ends with GUARD_EDGE.
 355    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 356         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 357
 358    ===> The CFG before the guard-code was added:
 359         LOOP_header_bb:
 360           loop_body
 361           if (exit_loop) goto update_bb
 362           else           goto LOOP_header_bb
 363         update_bb:
 364
 365    ==> The CFG after the guard-code was added:
 366         guard_bb:
 367           if (LOOP_guard_condition) goto new_merge_bb
 368           else                      goto LOOP_header_bb
 369         LOOP_header_bb:
 370           loop_body
 371           if (exit_loop_condition) goto new_merge_bb
 372           else                     goto LOOP_header_bb
 373         new_merge_bb:
 374           goto update_bb
 375         update_bb:
 376
 377    ==> The CFG after this function:
 378         guard_bb:
 379           if (LOOP_guard_condition) goto new_merge_bb
 380           else                      goto LOOP_header_bb
 381         LOOP_header_bb:
 382           loop_body
 383           if (exit_loop_condition) goto new_exit_bb
 384           else                     goto LOOP_header_bb
 385         new_exit_bb:
 386         new_merge_bb:
 387           goto update_bb
 388         update_bb:
 389
 390    This function:
 391    1. creates and updates the relevant phi nodes to account for the new
 392       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 393       1.1. Create phi nodes at NEW_MERGE_BB.
 394       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 395            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 396    2. preserves loop-closed-ssa-form by creating the required phi nodes
 397       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 398
 399    There are two flavors to this function:
 400
 401    slpeel_update_phi_nodes_for_guard1:
 402      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 403      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 404      for variables that have phis in the loop header.
 405
 406    slpeel_update_phi_nodes_for_guard2:
 407      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 408      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 409      for variables that have phis in the loop exit.
 410
 411    I.E., the overall structure is:
 412
 413         loop1_preheader_bb:
 414                 guard1 (goto loop1/merg1_bb)
 415         loop1
 416         loop1_exit_bb:
 417                 guard2 (goto merge1_bb/merge2_bb)
 418         merge1_bb
 419         loop2
 420         loop2_exit_bb
 421         merge2_bb
 422         next_bb
 423
 424    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 425    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 426    that have phis in loop1->header).
 427
 428    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 429    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 430    that have phis in next_bb). It also adds some of these phis to
 431    loop1_exit_bb.
 432
 433    slpeel_update_phi_nodes_for_guard1 is always called before
 434    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 435    to create correct data-flow and loop-closed-ssa-form.
 436
 437    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 438    that change between iterations of a loop (and therefore have a phi-node
 439    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 440    phis for variables that are used out of the loop (and therefore have
 441    loop-closed exit phis). Some variables may be both updated between
 442    iterations and used after the loop. This is why in loop1_exit_bb we
 443    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 444    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 445
 446    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 447      an original loop. i.e., we have:
 448
 449            orig_loop
 450            guard_bb (goto LOOP/new_merge)
 451            new_loop <-- LOOP
 452            new_exit
 453            new_merge
 454            next_bb
 455
 456      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 457      have:
 458
 459            new_loop
 460            guard_bb (goto LOOP/new_merge)
 461            orig_loop <-- LOOP
 462            new_exit
 463            new_merge
 464            next_bb
 465
 466      The SSA names defined in the original loop have a current
 467      reaching definition that that records the corresponding new
 468      ssa-name used in the new duplicated loop copy.
 469   */
 470
 471 /* Function slpeel_update_phi_nodes_for_guard1
 472
 473    Input:
 474    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 475    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 476             information.
 477
 478    In the context of the overall structure, we have:
 479
 480         loop1_preheader_bb:
 481                 guard1 (goto loop1/merg1_bb)
 482 LOOP->  loop1
 483         loop1_exit_bb:
 484                 guard2 (goto merge1_bb/merge2_bb)
 485         merge1_bb
 486         loop2
 487         loop2_exit_bb
 488         merge2_bb
 489         next_bb
 490
 491    For each name updated between loop iterations (i.e - for each name that has
 492    an entry (loop-header) phi in LOOP) we create a new phi in:
 493    1. merge1_bb (to account for the edge from guard1)
 494    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 495 */
 496
 497 static void
 498 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 499                                     bool is_new_loop, basic_block *new_exit_bb,
 500                                     bitmap *defs)
 501 {
 502   tree orig_phi, new_phi;
 503   tree update_phi, update_phi2;
 504   tree guard_arg, loop_arg;
 505   basic_block new_merge_bb = guard_edge->dest;
 506   edge e = EDGE_SUCC (new_merge_bb, 0);
 507   basic_block update_bb = e->dest;
 508   basic_block orig_bb = loop->header;
 509   edge new_exit_e;
 510   tree current_new_name;
 511
 512   /* Create new bb between loop and new_merge_bb.  */
 513   *new_exit_bb = split_edge (loop->single_exit);
 514   add_bb_to_loop (*new_exit_bb, loop->outer);
 515
 516   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 517
 518   for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
 519        orig_phi && update_phi;
 520        orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
 521     {
 522       /** 1. Handle new-merge-point phis  **/
 523
 524       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 525       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 526                                  new_merge_bb);
 527
 528       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 529             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 530       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 531       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 532
 533       add_phi_arg (new_phi, loop_arg, new_exit_e);
 534       add_phi_arg (new_phi, guard_arg, guard_edge);
 535
 536       /* 1.3. Update phi in successor block.  */
 537       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 538                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 539       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 540       update_phi2 = new_phi;
 541
 542
 543       /** 2. Handle loop-closed-ssa-form phis  **/
 544
 545       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 546       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 547                                  *new_exit_bb);
 548
 549       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 550       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 551
 552       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 553       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 554       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 555
 556       /* 2.4. Record the newly created name with set_current_def.
 557          We want to find a name such that
 558                 name = get_current_def (orig_loop_name)
 559          and to set its current definition as follows:
 560                 set_current_def (name, new_phi_name)
 561
 562          If LOOP is a new loop then loop_arg is already the name we're
 563          looking for. If LOOP is the original loop, then loop_arg is
 564          the orig_loop_name and the relevant name is recorded in its
 565          current reaching definition.  */
 566       if (is_new_loop)
 567         current_new_name = loop_arg;
 568       else
 569         {
 570           current_new_name = get_current_def (loop_arg);
 571           /* current_def is not available only if the variable does not
 572              change inside the loop, in which case we also don't care
 573              about recording a current_def for it because we won't be
 574              trying to create loop-exit-phis for it.  */
 575           if (!current_new_name)
 576             continue;
 577         }
 578 #ifdef ENABLE_CHECKING
 579       gcc_assert (get_current_def (current_new_name) == NULL_TREE);
 580 #endif
 581
 582       set_current_def (current_new_name, PHI_RESULT (new_phi));
 583       bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
 584     }
 585
 586   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 587 }
 588
 589
 590 /* Function slpeel_update_phi_nodes_for_guard2
 591
 592    Input:
 593    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 594
 595    In the context of the overall structure, we have:
 596
 597         loop1_preheader_bb:
 598                 guard1 (goto loop1/merg1_bb)
 599         loop1
 600         loop1_exit_bb:
 601                 guard2 (goto merge1_bb/merge2_bb)
 602         merge1_bb
 603 LOOP->  loop2
 604         loop2_exit_bb
 605         merge2_bb
 606         next_bb
 607
 608    For each name used out side the loop (i.e - for each name that has an exit
 609    phi in next_bb) we create a new phi in:
 610    1. merge2_bb (to account for the edge from guard_bb)
 611    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 612    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 613       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 614 */
 615
 616 static void
 617 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 618                                     bool is_new_loop, basic_block *new_exit_bb)
 619 {
 620   tree orig_phi, new_phi;
 621   tree update_phi, update_phi2;
 622   tree guard_arg, loop_arg;
 623   basic_block new_merge_bb = guard_edge->dest;
 624   edge e = EDGE_SUCC (new_merge_bb, 0);
 625   basic_block update_bb = e->dest;
 626   edge new_exit_e;
 627   tree orig_def, orig_def_new_name;
 628   tree new_name, new_name2;
 629   tree arg;
 630
 631   /* Create new bb between loop and new_merge_bb.  */
 632   *new_exit_bb = split_edge (loop->single_exit);
 633   add_bb_to_loop (*new_exit_bb, loop->outer);
 634
 635   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 636
 637   for (update_phi = phi_nodes (update_bb); update_phi;
 638        update_phi = PHI_CHAIN (update_phi))
 639     {
 640       orig_phi = update_phi;
 641       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 642       orig_def_new_name = get_current_def (orig_def);
 643       arg = NULL_TREE;
 644
 645       /** 1. Handle new-merge-point phis  **/
 646
 647       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 648       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 649                                  new_merge_bb);
 650
 651       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 652             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 653       new_name = orig_def;
 654       new_name2 = NULL_TREE;
 655       if (orig_def_new_name)
 656         {
 657           new_name = orig_def_new_name;
 658           /* Some variables have both loop-entry-phis and loop-exit-phis.
 659              Such variables were given yet newer names by phis placed in
 660              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 661              new_name2 = get_current_def (get_current_def (orig_name)).  */
 662           new_name2 = get_current_def (new_name);
 663         }
 664
 665       if (is_new_loop)
 666         {
 667           guard_arg = orig_def;
 668           loop_arg = new_name;
 669         }
 670       else
 671         {
 672           guard_arg = new_name;
 673           loop_arg = orig_def;
 674         }
 675       if (new_name2)
 676         guard_arg = new_name2;
 677
 678       add_phi_arg (new_phi, loop_arg, new_exit_e);
 679       add_phi_arg (new_phi, guard_arg, guard_edge);
 680
 681       /* 1.3. Update phi in successor block.  */
 682       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 683       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 684       update_phi2 = new_phi;
 685
 686
 687       /** 2. Handle loop-closed-ssa-form phis  **/
 688
 689       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 690       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 691                                  *new_exit_bb);
 692
 693       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 694       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 695
 696       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 697       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 698       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 699
 700
 701       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 702
 703       /* 3.1. Find the relevant names that need an exit-phi in
 704          GUARD_BB, i.e. names for which
 705          slpeel_update_phi_nodes_for_guard1 had not already created a
 706          phi node. This is the case for names that are used outside
 707          the loop (and therefore need an exit phi) but are not updated
 708          across loop iterations (and therefore don't have a
 709          loop-header-phi).
 710
 711          slpeel_update_phi_nodes_for_guard1 is responsible for
 712          creating loop-exit phis in GUARD_BB for names that have a
 713          loop-header-phi.  When such a phi is created we also record
 714          the new name in its current definition.  If this new name
 715          exists, then guard_arg was set to this new name (see 1.2
 716          above).  Therefore, if guard_arg is not this new name, this
 717          is an indication that an exit-phi in GUARD_BB was not yet
 718          created, so we take care of it here.  */
 719       if (guard_arg == new_name2)
 720         continue;
 721       arg = guard_arg;
 722
 723       /* 3.2. Generate new phi node in GUARD_BB:  */
 724       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 725                                  guard_edge->src);
 726
 727       /* 3.3. GUARD_BB has one incoming edge:  */
 728       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 729       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
 730
 731       /* 3.4. Update phi in successor of GUARD_BB:  */
 732       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 733                                                                 == guard_arg);
 734       SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
 735     }
 736
 737   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 738 }
 739
 740
 741 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 742    that starts at zero, increases by one and its limit is NITERS.
 743
 744    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 745
 746 void
 747 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 748 {
 749   tree indx_before_incr, indx_after_incr, cond_stmt, cond;
 750   tree orig_cond;
 751   edge exit_edge = loop->single_exit;
 752   block_stmt_iterator loop_cond_bsi;
 753   block_stmt_iterator incr_bsi;
 754   bool insert_after;
 755   tree begin_label = tree_block_label (loop->latch);
 756   tree exit_label = tree_block_label (loop->single_exit->dest);
 757   tree init = build_int_cst (TREE_TYPE (niters), 0);
 758   tree step = build_int_cst (TREE_TYPE (niters), 1);
 759   tree then_label;
 760   tree else_label;
 761   LOC loop_loc;
 762
 763   orig_cond = get_loop_exit_condition (loop);
 764 #ifdef ENABLE_CHECKING
 765   gcc_assert (orig_cond);
 766 #endif
 767   loop_cond_bsi = bsi_for_stmt (orig_cond);
 768
 769   standard_iv_increment_position (loop, &incr_bsi, &insert_after);
 770   create_iv (init, step, NULL_TREE, loop,
 771              &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
 772
 773   if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
 774     {
 775       cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
 776       then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 777       else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 778     }
 779   else /* 'then' edge loops back.  */
 780     {
 781       cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
 782       then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 783       else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 784     }
 785
 786   cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
 787                      then_label, else_label);
 788   bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
 789
 790   /* Remove old loop exit test:  */
 791   bsi_remove (&loop_cond_bsi);
 792
 793   loop_loc = find_loop_location (loop);
 794   if (dump_file && (dump_flags & TDF_DETAILS))
 795     {
 796       if (loop_loc != UNKNOWN_LOC)
 797         fprintf (dump_file, "\nloop at %s:%d: ",
 798                  LOC_FILE (loop_loc), LOC_LINE (loop_loc));
 799       print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
 800     }
 801
 802   loop->nb_iterations = niters;
 803 }
 804
 805
 806 /* Given LOOP this function generates a new copy of it and puts it
 807    on E which is either the entry or exit of LOOP.  */
 808
 809 static struct loop *
 810 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
 811                                         edge e)
 812 {
 813   struct loop *new_loop;
 814   basic_block *new_bbs, *bbs;
 815   bool at_exit;
 816   bool was_imm_dom;
 817   basic_block exit_dest;
 818   tree phi, phi_arg;
 819
 820   at_exit = (e == loop->single_exit);
 821   if (!at_exit && e != loop_preheader_edge (loop))
 822     return NULL;
 823
 824   bbs = get_loop_body (loop);
 825
 826   /* Check whether duplication is possible.  */
 827   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 828     {
 829       free (bbs);
 830       return NULL;
 831     }
 832
 833   /* Generate new loop structure.  */
 834   new_loop = duplicate_loop (loops, loop, loop->outer);
 835   if (!new_loop)
 836     {
 837       free (bbs);
 838       return NULL;
 839     }
 840
 841   exit_dest = loop->single_exit->dest;
 842   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 843                                           exit_dest) == loop->header ?
 844                  true : false);
 845
 846   new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
 847
 848   copy_bbs (bbs, loop->num_nodes, new_bbs,
 849             &loop->single_exit, 1, &new_loop->single_exit, NULL);
 850
 851   /* Duplicating phi args at exit bbs as coming
 852      also from exit of duplicated loop.  */
 853   for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
 854     {
 855       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit);
 856       if (phi_arg)
 857         {
 858           edge new_loop_exit_edge;
 859
 860           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 861             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 862           else
 863             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 864
 865           add_phi_arg (phi, phi_arg, new_loop_exit_edge);
 866         }
 867     }
 868
 869   if (at_exit) /* Add the loop copy at exit.  */
 870     {
 871       redirect_edge_and_branch_force (e, new_loop->header);
 872       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 873       if (was_imm_dom)
 874         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 875     }
 876   else /* Add the copy at entry.  */
 877     {
 878       edge new_exit_e;
 879       edge entry_e = loop_preheader_edge (loop);
 880       basic_block preheader = entry_e->src;
 881
 882       if (!flow_bb_inside_loop_p (new_loop,
 883                                   EDGE_SUCC (new_loop->header, 0)->dest))
 884         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 885       else
 886         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 887
 888       redirect_edge_and_branch_force (new_exit_e, loop->header);
 889       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 890                                new_exit_e->src);
 891
 892       /* We have to add phi args to the loop->header here as coming
 893          from new_exit_e edge.  */
 894       for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 895         {
 896           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 897           if (phi_arg)
 898             add_phi_arg (phi, phi_arg, new_exit_e);
 899         }
 900
 901       redirect_edge_and_branch_force (entry_e, new_loop->header);
 902       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 903     }
 904
 905   free (new_bbs);
 906   free (bbs);
 907
 908   return new_loop;
 909 }
 910
 911
 912 /* Given the condition statement COND, put it as the last statement
 913    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 914    Assumes that this is the single exit of the guarded loop.
 915    Returns the skip edge.  */
 916
 917 static edge
 918 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
 919                         basic_block dom_bb)
 920 {
 921   block_stmt_iterator bsi;
 922   edge new_e, enter_e;
 923   tree cond_stmt, then_label, else_label;
 924
 925   enter_e = EDGE_SUCC (guard_bb, 0);
 926   enter_e->flags &= ~EDGE_FALLTHRU;
 927   enter_e->flags |= EDGE_FALSE_VALUE;
 928   bsi = bsi_last (guard_bb);
 929
 930   then_label = build1 (GOTO_EXPR, void_type_node,
 931                        tree_block_label (exit_bb));
 932   else_label = build1 (GOTO_EXPR, void_type_node,
 933                        tree_block_label (enter_e->dest));
 934   cond_stmt = build3 (COND_EXPR, void_type_node, cond,
 935                      then_label, else_label);
 936   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
 937   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 938   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 939   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 940   return new_e;
 941 }
 942
 943
 944 /* This function verifies that the following restrictions apply to LOOP:
 945    (1) it is innermost
 946    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 947    (3) it is single entry, single exit
 948    (4) its exit condition is the last stmt in the header
 949    (5) E is the entry/exit edge of LOOP.
 950  */
 951
 952 bool
 953 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
 954 {
 955   edge exit_e = loop->single_exit;
 956   edge entry_e = loop_preheader_edge (loop);
 957   tree orig_cond = get_loop_exit_condition (loop);
 958   block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
 959
 960   if (need_ssa_update_p ())
 961     return false;
 962
 963   if (loop->inner
 964       /* All loops have an outer scope; the only case loop->outer is NULL is for
 965          the function itself.  */
 966       || !loop->outer
 967       || loop->num_nodes != 2
 968       || !empty_block_p (loop->latch)
 969       || !loop->single_exit
 970       /* Verify that new loop exit condition can be trivially modified.  */
 971       || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
 972       || (e != exit_e && e != entry_e))
 973     return false;
 974
 975   return true;
 976 }
 977
 978 #ifdef ENABLE_CHECKING
 979 void
 980 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 981                                  struct loop *second_loop)
 982 {
 983   basic_block loop1_exit_bb = first_loop->single_exit->dest;
 984   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
 985   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
 986
 987   /* A guard that controls whether the second_loop is to be executed or skipped
 988      is placed in first_loop->exit.  first_loopt->exit therefore has two
 989      successors - one is the preheader of second_loop, and the other is a bb
 990      after second_loop.
 991    */
 992   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
 993
 994   /* 1. Verify that one of the successors of first_loopt->exit is the preheader
 995         of second_loop.  */
 996
 997   /* The preheader of new_loop is expected to have two predecessors:
 998      first_loop->exit and the block that precedes first_loop.  */
 999
1000   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1001               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1002                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1003                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1004                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1005
1006   /* Verify that the other successor of first_loopt->exit is after the
1007      second_loop.  */
1008   /* TODO */
1009 }
1010 #endif
1011
1012 /* Function slpeel_tree_peel_loop_to_edge.
1013
1014    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1015    that is placed on the entry (exit) edge E of LOOP. After this transformation
1016    we have two loops one after the other - first-loop iterates FIRST_NITERS
1017    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1018
1019    Input:
1020    - LOOP: the loop to be peeled.
1021    - E: the exit or entry edge of LOOP.
1022         If it is the entry edge, we peel the first iterations of LOOP. In this
1023         case first-loop is LOOP, and second-loop is the newly created loop.
1024         If it is the exit edge, we peel the last iterations of LOOP. In this
1025         case, first-loop is the newly created loop, and second-loop is LOOP.
1026    - NITERS: the number of iterations that LOOP iterates.
1027    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1028    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1029         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1030         is false, the caller of this function may want to take care of this
1031         (this can be useful if we don't want new stmts added to first-loop).
1032
1033    Output:
1034    The function returns a pointer to the new loop-copy, or NULL if it failed
1035    to perform the transformation.
1036
1037    The function generates two if-then-else guards: one before the first loop,
1038    and the other before the second loop:
1039    The first guard is:
1040      if (FIRST_NITERS == 0) then skip the first loop,
1041      and go directly to the second loop.
1042    The second guard is:
1043      if (FIRST_NITERS == NITERS) then skip the second loop.
1044
1045    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1046    FORNOW the resulting code will not be in loop-closed-ssa form.
1047 */
1048
1049 struct loop*
1050 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
1051                                edge e, tree first_niters,
1052                                tree niters, bool update_first_loop_count)
1053 {
1054   struct loop *new_loop = NULL, *first_loop, *second_loop;
1055   edge skip_e;
1056   tree pre_condition;
1057   bitmap definitions;
1058   basic_block bb_before_second_loop, bb_after_second_loop;
1059   basic_block bb_before_first_loop;
1060   basic_block bb_between_loops;
1061   basic_block new_exit_bb;
1062   edge exit_e = loop->single_exit;
1063   LOC loop_loc;
1064
1065   if (!slpeel_can_duplicate_loop_p (loop, e))
1066     return NULL;
1067
1068   /* We have to initialize cfg_hooks. Then, when calling
1069    cfg_hooks->split_edge, the function tree_split_edge
1070    is actually called and, when calling cfg_hooks->duplicate_block,
1071    the function tree_duplicate_bb is called.  */
1072   tree_register_cfg_hooks ();
1073
1074
1075   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1076         Resulting CFG would be:
1077
1078         first_loop:
1079         do {
1080         } while ...
1081
1082         second_loop:
1083         do {
1084         } while ...
1085
1086         orig_exit_bb:
1087    */
1088
1089   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1090     {
1091       loop_loc = find_loop_location (loop);
1092       if (dump_file && (dump_flags & TDF_DETAILS))
1093         {
1094           if (loop_loc != UNKNOWN_LOC)
1095             fprintf (dump_file, "\n%s:%d: note: ",
1096                      LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1097           fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1098         }
1099       return NULL;
1100     }
1101
1102   if (e == exit_e)
1103     {
1104       /* NEW_LOOP was placed after LOOP.  */
1105       first_loop = loop;
1106       second_loop = new_loop;
1107     }
1108   else
1109     {
1110       /* NEW_LOOP was placed before LOOP.  */
1111       first_loop = new_loop;
1112       second_loop = loop;
1113     }
1114
1115   definitions = ssa_names_to_replace ();
1116   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1117   rename_variables_in_loop (new_loop);
1118
1119
1120   /* 2. Add the guard that controls whether the first loop is executed.
1121         Resulting CFG would be:
1122
1123         bb_before_first_loop:
1124         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1125                                GOTO first-loop
1126
1127         first_loop:
1128         do {
1129         } while ...
1130
1131         bb_before_second_loop:
1132
1133         second_loop:
1134         do {
1135         } while ...
1136
1137         orig_exit_bb:
1138    */
1139
1140   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1141   add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1142   bb_before_second_loop = split_edge (first_loop->single_exit);
1143   add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1144
1145   pre_condition =
1146     fold (build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node));
1147   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1148                                   bb_before_second_loop, bb_before_first_loop);
1149   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1150                                       first_loop == new_loop,
1151                                       &new_exit_bb, &definitions);
1152
1153
1154   /* 3. Add the guard that controls whether the second loop is executed.
1155         Resulting CFG would be:
1156
1157         bb_before_first_loop:
1158         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1159                                GOTO first-loop
1160
1161         first_loop:
1162         do {
1163         } while ...
1164
1165         bb_between_loops:
1166         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1167                                     GOTO bb_before_second_loop
1168
1169         bb_before_second_loop:
1170
1171         second_loop:
1172         do {
1173         } while ...
1174
1175         bb_after_second_loop:
1176
1177         orig_exit_bb:
1178    */
1179
1180   bb_between_loops = new_exit_bb;
1181   bb_after_second_loop = split_edge (second_loop->single_exit);
1182   add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1183
1184   pre_condition =
1185         fold (build2 (EQ_EXPR, boolean_type_node, first_niters, niters));
1186   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1187                                   bb_after_second_loop, bb_before_first_loop);
1188   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1189                                      second_loop == new_loop, &new_exit_bb);
1190
1191   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1192    */
1193   if (update_first_loop_count)
1194     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1195
1196   BITMAP_FREE (definitions);
1197   delete_update_ssa ();
1198
1199   return new_loop;
1200 }
1201
1202 /* Function vect_get_loop_location.
1203
1204    Extract the location of the loop in the source code.
1205    If the loop is not well formed for vectorization, an estimated
1206    location is calculated.
1207    Return the loop location if succeed and NULL if not.  */
1208
1209 LOC
1210 find_loop_location (struct loop *loop)
1211 {
1212   tree node = NULL_TREE;
1213   basic_block bb;
1214   block_stmt_iterator si;
1215
1216   if (!loop)
1217     return UNKNOWN_LOC;
1218
1219   node = get_loop_exit_condition (loop);
1220
1221   if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1222       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1223     return EXPR_LOC (node);
1224
1225   /* If we got here the loop is probably not "well formed",
1226      try to estimate the loop location */
1227
1228   if (!loop->header)
1229     return UNKNOWN_LOC;
1230
1231   bb = loop->header;
1232
1233   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1234     {
1235       node = bsi_stmt (si);
1236       if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1237         return EXPR_LOC (node);
1238     }
1239
1240   return UNKNOWN_LOC;
1241 }
1242
1243
1244 /*************************************************************************
1245   Vectorization Debug Information.
1246  *************************************************************************/
1247
1248 /* Function vect_set_verbosity_level.
1249
1250    Called from toplev.c upon detection of the
1251    -ftree-vectorizer-verbose=N option.  */
1252
1253 void
1254 vect_set_verbosity_level (const char *val)
1255 {
1256    unsigned int vl;
1257
1258    vl = atoi (val);
1259    if (vl < MAX_VERBOSITY_LEVEL)
1260      vect_verbosity_level = vl;
1261    else
1262      vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1263 }
1264
1265
1266 /* Function vect_set_dump_settings.
1267
1268    Fix the verbosity level of the vectorizer if the
1269    requested level was not set explicitly using the flag
1270    -ftree-vectorizer-verbose=N.
1271    Decide where to print the debugging information (dump_file/stderr).
1272    If the user defined the verbosity level, but there is no dump file,
1273    print to stderr, otherwise print to the dump file.  */
1274
1275 static void
1276 vect_set_dump_settings (void)
1277 {
1278   vect_dump = dump_file;
1279
1280   /* Check if the verbosity level was defined by the user:  */
1281   if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1282     {
1283       /* If there is no dump file, print to stderr.  */
1284       if (!dump_file)
1285         vect_dump = stderr;
1286       return;
1287     }
1288
1289   /* User didn't specify verbosity level:  */
1290   if (dump_file && (dump_flags & TDF_DETAILS))
1291     vect_verbosity_level = REPORT_DETAILS;
1292   else if (dump_file && (dump_flags & TDF_STATS))
1293     vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1294   else
1295     vect_verbosity_level = REPORT_NONE;
1296
1297   gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1298 }
1299
1300
1301 /* Function debug_loop_details.
1302
1303    For vectorization debug dumps.  */
1304
1305 bool
1306 vect_print_dump_info (enum verbosity_levels vl, LOC loc)
1307 {
1308   if (vl > vect_verbosity_level)
1309     return false;
1310
1311   if (loc == UNKNOWN_LOC)
1312     fprintf (vect_dump, "\n%s:%d: note: ",
1313                  DECL_SOURCE_FILE (current_function_decl),
1314                  DECL_SOURCE_LINE (current_function_decl));
1315   else
1316     fprintf (vect_dump, "\n%s:%d: note: ", LOC_FILE (loc), LOC_LINE (loc));
1317
1318
1319   return true;
1320 }
1321
1322
1323 /*************************************************************************
1324   Vectorization Utilities.
1325  *************************************************************************/
1326
1327 /* Function new_stmt_vec_info.
1328
1329    Create and initialize a new stmt_vec_info struct for STMT.  */
1330
1331 stmt_vec_info
1332 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1333 {
1334   stmt_vec_info res;
1335   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1336
1337   STMT_VINFO_TYPE (res) = undef_vec_info_type;
1338   STMT_VINFO_STMT (res) = stmt;
1339   STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1340   STMT_VINFO_RELEVANT_P (res) = 0;
1341   STMT_VINFO_VECTYPE (res) = NULL;
1342   STMT_VINFO_VEC_STMT (res) = NULL;
1343   STMT_VINFO_DATA_REF (res) = NULL;
1344   STMT_VINFO_MEMTAG (res) = NULL;
1345   STMT_VINFO_PTR_INFO (res) = NULL;
1346   STMT_VINFO_SUBVARS (res) = NULL;
1347   STMT_VINFO_VECT_DR_BASE_ADDRESS (res) = NULL;
1348   STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1349   STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1350   STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1351   STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1352
1353   return res;
1354 }
1355
1356
1357 /* Function new_loop_vec_info.
1358
1359    Create and initialize a new loop_vec_info struct for LOOP, as well as
1360    stmt_vec_info structs for all the stmts in LOOP.  */
1361
1362 loop_vec_info
1363 new_loop_vec_info (struct loop *loop)
1364 {
1365   loop_vec_info res;
1366   basic_block *bbs;
1367   block_stmt_iterator si;
1368   unsigned int i;
1369
1370   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1371
1372   bbs = get_loop_body (loop);
1373
1374   /* Create stmt_info for all stmts in the loop.  */
1375   for (i = 0; i < loop->num_nodes; i++)
1376     {
1377       basic_block bb = bbs[i];
1378       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1379         {
1380           tree stmt = bsi_stmt (si);
1381           stmt_ann_t ann;
1382
1383           ann = stmt_ann (stmt);
1384           set_stmt_info (ann, new_stmt_vec_info (stmt, res));
1385         }
1386     }
1387
1388   LOOP_VINFO_LOOP (res) = loop;
1389   LOOP_VINFO_BBS (res) = bbs;
1390   LOOP_VINFO_EXIT_COND (res) = NULL;
1391   LOOP_VINFO_NITERS (res) = NULL;
1392   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1393   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
1394   LOOP_VINFO_VECT_FACTOR (res) = 0;
1395   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1396                            "loop_write_datarefs");
1397   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1398                            "loop_read_datarefs");
1399   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1400   LOOP_VINFO_LOC (res) = UNKNOWN_LOC;
1401
1402   return res;
1403 }
1404
1405
1406 /* Function destroy_loop_vec_info.
1407
1408    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1409    stmts in the loop.  */
1410
1411 void
1412 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1413 {
1414   struct loop *loop;
1415   basic_block *bbs;
1416   int nbbs;
1417   block_stmt_iterator si;
1418   int j;
1419
1420   if (!loop_vinfo)
1421     return;
1422
1423   loop = LOOP_VINFO_LOOP (loop_vinfo);
1424
1425   bbs = LOOP_VINFO_BBS (loop_vinfo);
1426   nbbs = loop->num_nodes;
1427
1428   for (j = 0; j < nbbs; j++)
1429     {
1430       basic_block bb = bbs[j];
1431       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1432         {
1433           tree stmt = bsi_stmt (si);
1434           stmt_ann_t ann = stmt_ann (stmt);
1435           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1436           free (stmt_info);
1437           set_stmt_info (ann, NULL);
1438         }
1439     }
1440
1441   free (LOOP_VINFO_BBS (loop_vinfo));
1442   varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1443   varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1444
1445   free (loop_vinfo);
1446 }
1447
1448
1449 /* Function vect_strip_conversions
1450
1451    Strip conversions that don't narrow the mode.  */
1452
1453 tree
1454 vect_strip_conversion (tree expr)
1455 {
1456   tree to, ti, oprnd0;
1457
1458   while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
1459     {
1460       to = TREE_TYPE (expr);
1461       oprnd0 = TREE_OPERAND (expr, 0);
1462       ti = TREE_TYPE (oprnd0);
1463
1464       if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
1465         return NULL_TREE;
1466       if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
1467         return NULL_TREE;
1468
1469       expr = oprnd0;
1470     }
1471   return expr;
1472 }
1473
1474
1475 /* Function vect_force_dr_alignment_p.
1476
1477    Returns whether the alignment of a DECL can be forced to be aligned
1478    on ALIGNMENT bit boundary.  */
1479
1480 bool
1481 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1482 {
1483   if (TREE_CODE (decl) != VAR_DECL)
1484     return false;
1485
1486   if (DECL_EXTERNAL (decl))
1487     return false;
1488
1489   if (TREE_ASM_WRITTEN (decl))
1490     return false;
1491
1492   if (TREE_STATIC (decl))
1493     return (alignment <= MAX_OFILE_ALIGNMENT);
1494   else
1495     /* This is not 100% correct.  The absolute correct stack alignment
1496        is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1497        PREFERRED_STACK_BOUNDARY is honored by all translation units.
1498        However, until someone implements forced stack alignment, SSE
1499        isn't really usable without this.  */
1500     return (alignment <= PREFERRED_STACK_BOUNDARY);
1501 }
1502
1503
1504 /* Function get_vectype_for_scalar_type.
1505
1506    Returns the vector type corresponding to SCALAR_TYPE as supported
1507    by the target.  */
1508
1509 tree
1510 get_vectype_for_scalar_type (tree scalar_type)
1511 {
1512   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1513   int nbytes = GET_MODE_SIZE (inner_mode);
1514   int nunits;
1515   tree vectype;
1516
1517   if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD)
1518     return NULL_TREE;
1519
1520   /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1521      is expected.  */
1522   nunits = UNITS_PER_SIMD_WORD / nbytes;
1523
1524   vectype = build_vector_type (scalar_type, nunits);
1525   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1526     {
1527       fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1528       print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1529     }
1530
1531   if (!vectype)
1532     return NULL_TREE;
1533
1534   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1535     {
1536       fprintf (vect_dump, "vectype: ");
1537       print_generic_expr (vect_dump, vectype, TDF_SLIM);
1538     }
1539
1540   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1541       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
1542     {
1543       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1544         fprintf (vect_dump, "mode not supported by target.");
1545       return NULL_TREE;
1546     }
1547
1548   return vectype;
1549 }
1550
1551
1552 /* Function vect_supportable_dr_alignment
1553
1554    Return whether the data reference DR is supported with respect to its
1555    alignment.  */
1556
1557 enum dr_alignment_support
1558 vect_supportable_dr_alignment (struct data_reference *dr)
1559 {
1560   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
1561   enum machine_mode mode = (int) TYPE_MODE (vectype);
1562
1563   if (aligned_access_p (dr))
1564     return dr_aligned;
1565
1566   /* Possibly unaligned access.  */
1567
1568   if (DR_IS_READ (dr))
1569     {
1570       if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1571           && (!targetm.vectorize.builtin_mask_for_load
1572               || targetm.vectorize.builtin_mask_for_load ()))
1573         return dr_unaligned_software_pipeline;
1574
1575       if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1576         /* Can't software pipeline the loads, but can at least do them.  */
1577         return dr_unaligned_supported;
1578     }
1579
1580   /* Unsupported.  */
1581   return dr_unaligned_unsupported;
1582 }
1583
1584
1585 /* Function vect_is_simple_use.
1586
1587    Input:
1588    LOOP - the loop that is being vectorized.
1589    OPERAND - operand of a stmt in LOOP.
1590    DEF - the defining stmt in case OPERAND is an SSA_NAME.
1591
1592    Returns whether a stmt with OPERAND can be vectorized.
1593    Supportable operands are constants, loop invariants, and operands that are
1594    defined by the current iteration of the loop. Unsupportable operands are
1595    those that are defined by a previous iteration of the loop (as is the case
1596    in reduction/induction computations).  */
1597
1598 bool
1599 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def)
1600 {
1601   tree def_stmt;
1602   basic_block bb;
1603   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1604
1605   if (def)
1606     *def = NULL_TREE;
1607
1608   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1609     return true;
1610
1611   if (TREE_CODE (operand) != SSA_NAME)
1612     return false;
1613
1614   def_stmt = SSA_NAME_DEF_STMT (operand);
1615   if (def_stmt == NULL_TREE )
1616     {
1617       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1618         fprintf (vect_dump, "no def_stmt.");
1619       return false;
1620     }
1621
1622   /* empty stmt is expected only in case of a function argument.
1623      (Otherwise - we expect a phi_node or a modify_expr).  */
1624   if (IS_EMPTY_STMT (def_stmt))
1625     {
1626       tree arg = TREE_OPERAND (def_stmt, 0);
1627       if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1628         return true;
1629       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1630         {
1631           fprintf (vect_dump, "Unexpected empty stmt: ");
1632           print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1633         }
1634       return false;
1635     }
1636
1637   /* phi_node inside the loop indicates an induction/reduction pattern.
1638      This is not supported yet.  */
1639   bb = bb_for_stmt (def_stmt);
1640   if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
1641     {
1642       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1643         fprintf (vect_dump, "reduction/induction - unsupported.");
1644       return false; /* FORNOW: not supported yet.  */
1645     }
1646
1647   /* Expecting a modify_expr or a phi_node.  */
1648   if (TREE_CODE (def_stmt) == MODIFY_EXPR
1649       || TREE_CODE (def_stmt) == PHI_NODE)
1650     {
1651       if (def)
1652         *def = def_stmt;
1653       return true;
1654     }
1655
1656   return false;
1657 }
1658
1659
1660 /* Function vect_is_simple_iv_evolution.
1661
1662    FORNOW: A simple evolution of an induction variables in the loop is
1663    considered a polynomial evolution with constant step.  */
1664
1665 bool
1666 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
1667                              tree * step)
1668 {
1669   tree init_expr;
1670   tree step_expr;
1671
1672   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
1673
1674   /* When there is no evolution in this loop, the evolution function
1675      is not "simple".  */
1676   if (evolution_part == NULL_TREE)
1677     return false;
1678
1679   /* When the evolution is a polynomial of degree >= 2
1680      the evolution function is not "simple".  */
1681   if (tree_is_chrec (evolution_part))
1682     return false;
1683
1684   step_expr = evolution_part;
1685   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1686                                                            loop_nb));
1687
1688   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1689     {
1690       fprintf (vect_dump, "step: ");
1691       print_generic_expr (vect_dump, step_expr, TDF_SLIM);
1692       fprintf (vect_dump, ",  init: ");
1693       print_generic_expr (vect_dump, init_expr, TDF_SLIM);
1694     }
1695
1696   *init = init_expr;
1697   *step = step_expr;
1698
1699   if (TREE_CODE (step_expr) != INTEGER_CST)
1700     {
1701       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1702         fprintf (vect_dump, "step unknown.");
1703       return false;
1704     }
1705
1706   return true;
1707 }
1708
1709
1710 /* Function vectorize_loops.
1711
1712    Entry Point to loop vectorization phase.  */
1713
1714 void
1715 vectorize_loops (struct loops *loops)
1716 {
1717   unsigned int i;
1718   unsigned int num_vectorized_loops = 0;
1719
1720   /* Fix the verbosity level if not defined explicitly by the user.  */
1721   vect_set_dump_settings ();
1722
1723   /*  ----------- Analyze loops. -----------  */
1724
1725   /* If some loop was duplicated, it gets bigger number
1726      than all previously defined loops. This fact allows us to run
1727      only over initial loops skipping newly generated ones.  */
1728   vect_loops_num = loops->num;
1729   for (i = 1; i < vect_loops_num; i++)
1730     {
1731       loop_vec_info loop_vinfo;
1732       struct loop *loop = loops->parray[i];
1733
1734       if (!loop)
1735         continue;
1736
1737       loop_vinfo = vect_analyze_loop (loop);
1738       loop->aux = loop_vinfo;
1739
1740       if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
1741         continue;
1742
1743       vect_transform_loop (loop_vinfo, loops);
1744       num_vectorized_loops++;
1745     }
1746
1747   if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, UNKNOWN_LOC))
1748     fprintf (vect_dump, "vectorized %u loops in function.\n",
1749              num_vectorized_loops);
1750
1751   /*  ----------- Finalize. -----------  */
1752
1753   for (i = 1; i < vect_loops_num; i++)
1754     {
1755       struct loop *loop = loops->parray[i];
1756       loop_vec_info loop_vinfo;
1757
1758       if (!loop)
1759         continue;
1760       loop_vinfo = loop->aux;
1761       destroy_loop_vec_info (loop_vinfo);
1762       loop->aux = NULL;
1763     }
1764 }