gcc/tree-vectorizer.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  20 02110-1301, USA.  */
  21
  22 /* Loop Vectorization Pass.
  23
  24    This pass tries to vectorize loops. This first implementation focuses on
  25    simple inner-most loops, with no conditional control flow, and a set of
  26    simple operations which vector form can be expressed using existing
  27    tree codes (PLUS, MULT etc).
  28
  29    For example, the vectorizer transforms the following simple loop:
  30
  31         short a[N]; short b[N]; short c[N]; int i;
  32
  33         for (i=0; i<N; i++){
  34           a[i] = b[i] + c[i];
  35         }
  36
  37    as if it was manually vectorized by rewriting the source code into:
  38
  39         typedef int __attribute__((mode(V8HI))) v8hi;
  40         short a[N];  short b[N]; short c[N];   int i;
  41         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  42         v8hi va, vb, vc;
  43
  44         for (i=0; i<N/8; i++){
  45           vb = pb[i];
  46           vc = pc[i];
  47           va = vb + vc;
  48           pa[i] = va;
  49         }
  50
  51         The main entry to this pass is vectorize_loops(), in which
  52    the vectorizer applies a set of analyses on a given set of loops,
  53    followed by the actual vectorization transformation for the loops that
  54    had successfully passed the analysis phase.
  55
  56         Throughout this pass we make a distinction between two types of
  57    data: scalars (which are represented by SSA_NAMES), and memory references
  58    ("data-refs"). These two types of data require different handling both
  59    during analysis and transformation. The types of data-refs that the
  60    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  61    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  62    accesses are required to have a  simple (consecutive) access pattern.
  63
  64    Analysis phase:
  65    ===============
  66         The driver for the analysis phase is vect_analyze_loop_nest().
  67    It applies a set of analyses, some of which rely on the scalar evolution
  68    analyzer (scev) developed by Sebastian Pop.
  69
  70         During the analysis phase the vectorizer records some information
  71    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  72    loop, as well as general information about the loop as a whole, which is
  73    recorded in a "loop_vec_info" struct attached to each loop.
  74
  75    Transformation phase:
  76    =====================
  77         The loop transformation phase scans all the stmts in the loop, and
  78    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  79    the loop that needs to be vectorized. It insert the vector code sequence
  80    just before the scalar stmt S, and records a pointer to the vector code
  81    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
  82    attached to S). This pointer will be used for the vectorization of following
  83    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
  84    otherwise, we rely on dead code elimination for removing it.
  85
  86         For example, say stmt S1 was vectorized into stmt VS1:
  87
  88    VS1: vb = px[i];
  89    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  90    S2:  a = b;
  91
  92    To vectorize stmt S2, the vectorizer first finds the stmt that defines
  93    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
  94    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
  95    resulting sequence would be:
  96
  97    VS1: vb = px[i];
  98    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  99    VS2: va = vb;
 100    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 101
 102         Operands that are not SSA_NAMEs, are data-refs that appear in
 103    load/store operations (like 'x[i]' in S1), and are handled differently.
 104
 105    Target modeling:
 106    =================
 107         Currently the only target specific information that is used is the
 108    size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
 109    support different sizes of vectors, for now will need to specify one value
 110    for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
 111
 112         Since we only vectorize operations which vector form can be
 113    expressed using existing tree codes, to verify that an operation is
 114    supported, the vectorizer checks the relevant optab at the relevant
 115    machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
 116    the value found is CODE_FOR_nothing, then there's no target support, and
 117    we can't vectorize the stmt.
 118
 119    For additional information on this project see:
 120    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 121 */
 122
 123 #include "config.h"
 124 #include "system.h"
 125 #include "coretypes.h"
 126 #include "tm.h"
 127 #include "ggc.h"
 128 #include "tree.h"
 129 #include "target.h"
 130 #include "rtl.h"
 131 #include "basic-block.h"
 132 #include "diagnostic.h"
 133 #include "tree-flow.h"
 134 #include "tree-dump.h"
 135 #include "timevar.h"
 136 #include "cfgloop.h"
 137 #include "cfglayout.h"
 138 #include "expr.h"
 139 #include "optabs.h"
 140 #include "params.h"
 141 #include "toplev.h"
 142 #include "tree-chrec.h"
 143 #include "tree-data-ref.h"
 144 #include "tree-scalar-evolution.h"
 145 #include "input.h"
 146 #include "tree-vectorizer.h"
 147 #include "tree-pass.h"
 148
 149 /*************************************************************************
 150   Simple Loop Peeling Utilities
 151  *************************************************************************/
 152 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
 153   (struct loop *, struct loops *, edge);
 154 static void slpeel_update_phis_for_duplicate_loop
 155   (struct loop *, struct loop *, bool after);
 156 static void slpeel_update_phi_nodes_for_guard1
 157   (edge, struct loop *, bool, basic_block *, bitmap *);
 158 static void slpeel_update_phi_nodes_for_guard2
 159   (edge, struct loop *, bool, basic_block *);
 160 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
 161
 162 static void rename_use_op (use_operand_p);
 163 static void rename_variables_in_bb (basic_block);
 164 static void rename_variables_in_loop (struct loop *);
 165
 166 /*************************************************************************
 167   General Vectorization Utilities
 168  *************************************************************************/
 169 static void vect_set_dump_settings (void);
 170
 171 /* vect_dump will be set to stderr or dump_file if exist.  */
 172 FILE *vect_dump;
 173
 174 /* vect_verbosity_level set to an invalid value
 175    to mark that it's uninitialized.  */
 176 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
 177
 178 /* Number of loops, at the beginning of vectorization.  */
 179 unsigned int vect_loops_num;
 180
 181 /* Loop location.  */
 182 static LOC vect_loop_location;
 183
 184 /* Bitmap of virtual variables to be renamed.  */
 185 bitmap vect_vnames_to_rename;
 186 \f
 187 /*************************************************************************
 188   Simple Loop Peeling Utilities
 189
 190   Utilities to support loop peeling for vectorization purposes.
 191  *************************************************************************/
 192
 193
 194 /* Renames the use *OP_P.  */
 195
 196 static void
 197 rename_use_op (use_operand_p op_p)
 198 {
 199   tree new_name;
 200
 201   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
 202     return;
 203
 204   new_name = get_current_def (USE_FROM_PTR (op_p));
 205
 206   /* Something defined outside of the loop.  */
 207   if (!new_name)
 208     return;
 209
 210   /* An ordinary ssa name defined in the loop.  */
 211
 212   SET_USE (op_p, new_name);
 213 }
 214
 215
 216 /* Renames the variables in basic block BB.  */
 217
 218 static void
 219 rename_variables_in_bb (basic_block bb)
 220 {
 221   tree phi;
 222   block_stmt_iterator bsi;
 223   tree stmt;
 224   use_operand_p use_p;
 225   ssa_op_iter iter;
 226   edge e;
 227   edge_iterator ei;
 228   struct loop *loop = bb->loop_father;
 229
 230   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 231     {
 232       stmt = bsi_stmt (bsi);
 233       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter,
 234                                  (SSA_OP_ALL_USES | SSA_OP_ALL_KILLS))
 235         rename_use_op (use_p);
 236     }
 237
 238   FOR_EACH_EDGE (e, ei, bb->succs)
 239     {
 240       if (!flow_bb_inside_loop_p (loop, e->dest))
 241         continue;
 242       for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
 243         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
 244     }
 245 }
 246
 247
 248 /* Renames variables in new generated LOOP.  */
 249
 250 static void
 251 rename_variables_in_loop (struct loop *loop)
 252 {
 253   unsigned i;
 254   basic_block *bbs;
 255
 256   bbs = get_loop_body (loop);
 257
 258   for (i = 0; i < loop->num_nodes; i++)
 259     rename_variables_in_bb (bbs[i]);
 260
 261   free (bbs);
 262 }
 263
 264
 265 /* Update the PHI nodes of NEW_LOOP.
 266
 267    NEW_LOOP is a duplicate of ORIG_LOOP.
 268    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 269    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 270    executes before it.  */
 271
 272 static void
 273 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 274                                        struct loop *new_loop, bool after)
 275 {
 276   tree new_ssa_name;
 277   tree phi_new, phi_orig;
 278   tree def;
 279   edge orig_loop_latch = loop_latch_edge (orig_loop);
 280   edge orig_entry_e = loop_preheader_edge (orig_loop);
 281   edge new_loop_exit_e = new_loop->single_exit;
 282   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 283   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 284
 285   /*
 286      step 1. For each loop-header-phi:
 287              Add the first phi argument for the phi in NEW_LOOP
 288             (the one associated with the entry of NEW_LOOP)
 289
 290      step 2. For each loop-header-phi:
 291              Add the second phi argument for the phi in NEW_LOOP
 292             (the one associated with the latch of NEW_LOOP)
 293
 294      step 3. Update the phis in the successor block of NEW_LOOP.
 295
 296         case 1: NEW_LOOP was placed before ORIG_LOOP:
 297                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 298                 Updating the phis in the successor block can therefore be done
 299                 along with the scanning of the loop header phis, because the
 300                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 301                 phi nodes, organized in the same order.
 302
 303         case 2: NEW_LOOP was placed after ORIG_LOOP:
 304                 The successor block of NEW_LOOP is the original exit block of
 305                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 306                 We postpone updating these phis to a later stage (when
 307                 loop guards are added).
 308    */
 309
 310
 311   /* Scan the phis in the headers of the old and new loops
 312      (they are organized in exactly the same order).  */
 313
 314   for (phi_new = phi_nodes (new_loop->header),
 315        phi_orig = phi_nodes (orig_loop->header);
 316        phi_new && phi_orig;
 317        phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
 318     {
 319       /* step 1.  */
 320       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 321       add_phi_arg (phi_new, def, new_loop_entry_e);
 322
 323       /* step 2.  */
 324       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 325       if (TREE_CODE (def) != SSA_NAME)
 326         continue;
 327
 328       new_ssa_name = get_current_def (def);
 329       if (!new_ssa_name)
 330         {
 331           /* This only happens if there are no definitions
 332              inside the loop. use the phi_result in this case.  */
 333           new_ssa_name = PHI_RESULT (phi_new);
 334         }
 335
 336       /* An ordinary ssa name defined in the loop.  */
 337       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
 338
 339       /* step 3 (case 1).  */
 340       if (!after)
 341         {
 342           gcc_assert (new_loop_exit_e == orig_entry_e);
 343           SET_PHI_ARG_DEF (phi_orig,
 344                            new_loop_exit_e->dest_idx,
 345                            new_ssa_name);
 346         }
 347     }
 348 }
 349
 350
 351 /* Update PHI nodes for a guard of the LOOP.
 352
 353    Input:
 354    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 355         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 356         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 357         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 358         We denote this bb NEW_MERGE_BB because before the guard code was added
 359         it had a single predecessor (the LOOP header), and now it became a merge
 360         point of two paths - the path that ends with the LOOP exit-edge, and
 361         the path that ends with GUARD_EDGE.
 362    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 363         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 364
 365    ===> The CFG before the guard-code was added:
 366         LOOP_header_bb:
 367           loop_body
 368           if (exit_loop) goto update_bb
 369           else           goto LOOP_header_bb
 370         update_bb:
 371
 372    ==> The CFG after the guard-code was added:
 373         guard_bb:
 374           if (LOOP_guard_condition) goto new_merge_bb
 375           else                      goto LOOP_header_bb
 376         LOOP_header_bb:
 377           loop_body
 378           if (exit_loop_condition) goto new_merge_bb
 379           else                     goto LOOP_header_bb
 380         new_merge_bb:
 381           goto update_bb
 382         update_bb:
 383
 384    ==> The CFG after this function:
 385         guard_bb:
 386           if (LOOP_guard_condition) goto new_merge_bb
 387           else                      goto LOOP_header_bb
 388         LOOP_header_bb:
 389           loop_body
 390           if (exit_loop_condition) goto new_exit_bb
 391           else                     goto LOOP_header_bb
 392         new_exit_bb:
 393         new_merge_bb:
 394           goto update_bb
 395         update_bb:
 396
 397    This function:
 398    1. creates and updates the relevant phi nodes to account for the new
 399       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 400       1.1. Create phi nodes at NEW_MERGE_BB.
 401       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 402            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 403    2. preserves loop-closed-ssa-form by creating the required phi nodes
 404       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 405
 406    There are two flavors to this function:
 407
 408    slpeel_update_phi_nodes_for_guard1:
 409      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 410      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 411      for variables that have phis in the loop header.
 412
 413    slpeel_update_phi_nodes_for_guard2:
 414      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 415      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 416      for variables that have phis in the loop exit.
 417
 418    I.E., the overall structure is:
 419
 420         loop1_preheader_bb:
 421                 guard1 (goto loop1/merg1_bb)
 422         loop1
 423         loop1_exit_bb:
 424                 guard2 (goto merge1_bb/merge2_bb)
 425         merge1_bb
 426         loop2
 427         loop2_exit_bb
 428         merge2_bb
 429         next_bb
 430
 431    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 432    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 433    that have phis in loop1->header).
 434
 435    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 436    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 437    that have phis in next_bb). It also adds some of these phis to
 438    loop1_exit_bb.
 439
 440    slpeel_update_phi_nodes_for_guard1 is always called before
 441    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 442    to create correct data-flow and loop-closed-ssa-form.
 443
 444    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 445    that change between iterations of a loop (and therefore have a phi-node
 446    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 447    phis for variables that are used out of the loop (and therefore have
 448    loop-closed exit phis). Some variables may be both updated between
 449    iterations and used after the loop. This is why in loop1_exit_bb we
 450    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 451    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 452
 453    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 454      an original loop. i.e., we have:
 455
 456            orig_loop
 457            guard_bb (goto LOOP/new_merge)
 458            new_loop <-- LOOP
 459            new_exit
 460            new_merge
 461            next_bb
 462
 463      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 464      have:
 465
 466            new_loop
 467            guard_bb (goto LOOP/new_merge)
 468            orig_loop <-- LOOP
 469            new_exit
 470            new_merge
 471            next_bb
 472
 473      The SSA names defined in the original loop have a current
 474      reaching definition that that records the corresponding new
 475      ssa-name used in the new duplicated loop copy.
 476   */
 477
 478 /* Function slpeel_update_phi_nodes_for_guard1
 479
 480    Input:
 481    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 482    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 483             information.
 484
 485    In the context of the overall structure, we have:
 486
 487         loop1_preheader_bb:
 488                 guard1 (goto loop1/merg1_bb)
 489 LOOP->  loop1
 490         loop1_exit_bb:
 491                 guard2 (goto merge1_bb/merge2_bb)
 492         merge1_bb
 493         loop2
 494         loop2_exit_bb
 495         merge2_bb
 496         next_bb
 497
 498    For each name updated between loop iterations (i.e - for each name that has
 499    an entry (loop-header) phi in LOOP) we create a new phi in:
 500    1. merge1_bb (to account for the edge from guard1)
 501    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 502 */
 503
 504 static void
 505 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 506                                     bool is_new_loop, basic_block *new_exit_bb,
 507                                     bitmap *defs)
 508 {
 509   tree orig_phi, new_phi;
 510   tree update_phi, update_phi2;
 511   tree guard_arg, loop_arg;
 512   basic_block new_merge_bb = guard_edge->dest;
 513   edge e = EDGE_SUCC (new_merge_bb, 0);
 514   basic_block update_bb = e->dest;
 515   basic_block orig_bb = loop->header;
 516   edge new_exit_e;
 517   tree current_new_name;
 518   tree name;
 519
 520   /* Create new bb between loop and new_merge_bb.  */
 521   *new_exit_bb = split_edge (loop->single_exit);
 522   add_bb_to_loop (*new_exit_bb, loop->outer);
 523
 524   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 525
 526   for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
 527        orig_phi && update_phi;
 528        orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
 529     {
 530       /* Virtual phi; Mark it for renaming. We actually want to call
 531          mar_sym_for_renaming, but since all ssa renaming datastructures
 532          are going to be freed before we get to call ssa_upate, we just
 533          record this name for now in a bitmap, and will mark it for
 534          renaming later.  */
 535       name = PHI_RESULT (orig_phi);
 536       if (!is_gimple_reg (SSA_NAME_VAR (name)))
 537         bitmap_set_bit (vect_vnames_to_rename, SSA_NAME_VERSION (name));
 538
 539       /** 1. Handle new-merge-point phis  **/
 540
 541       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 542       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 543                                  new_merge_bb);
 544
 545       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 546             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 547       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 548       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 549
 550       add_phi_arg (new_phi, loop_arg, new_exit_e);
 551       add_phi_arg (new_phi, guard_arg, guard_edge);
 552
 553       /* 1.3. Update phi in successor block.  */
 554       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 555                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 556       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 557       update_phi2 = new_phi;
 558
 559
 560       /** 2. Handle loop-closed-ssa-form phis  **/
 561
 562       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 563       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 564                                  *new_exit_bb);
 565
 566       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 567       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 568
 569       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 570       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 571       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 572
 573       /* 2.4. Record the newly created name with set_current_def.
 574          We want to find a name such that
 575                 name = get_current_def (orig_loop_name)
 576          and to set its current definition as follows:
 577                 set_current_def (name, new_phi_name)
 578
 579          If LOOP is a new loop then loop_arg is already the name we're
 580          looking for. If LOOP is the original loop, then loop_arg is
 581          the orig_loop_name and the relevant name is recorded in its
 582          current reaching definition.  */
 583       if (is_new_loop)
 584         current_new_name = loop_arg;
 585       else
 586         {
 587           current_new_name = get_current_def (loop_arg);
 588           /* current_def is not available only if the variable does not
 589              change inside the loop, in which case we also don't care
 590              about recording a current_def for it because we won't be
 591              trying to create loop-exit-phis for it.  */
 592           if (!current_new_name)
 593             continue;
 594         }
 595       gcc_assert (get_current_def (current_new_name) == NULL_TREE);
 596
 597       set_current_def (current_new_name, PHI_RESULT (new_phi));
 598       bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
 599     }
 600
 601   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 602 }
 603
 604
 605 /* Function slpeel_update_phi_nodes_for_guard2
 606
 607    Input:
 608    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 609
 610    In the context of the overall structure, we have:
 611
 612         loop1_preheader_bb:
 613                 guard1 (goto loop1/merg1_bb)
 614         loop1
 615         loop1_exit_bb:
 616                 guard2 (goto merge1_bb/merge2_bb)
 617         merge1_bb
 618 LOOP->  loop2
 619         loop2_exit_bb
 620         merge2_bb
 621         next_bb
 622
 623    For each name used out side the loop (i.e - for each name that has an exit
 624    phi in next_bb) we create a new phi in:
 625    1. merge2_bb (to account for the edge from guard_bb)
 626    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 627    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 628       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 629 */
 630
 631 static void
 632 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 633                                     bool is_new_loop, basic_block *new_exit_bb)
 634 {
 635   tree orig_phi, new_phi;
 636   tree update_phi, update_phi2;
 637   tree guard_arg, loop_arg;
 638   basic_block new_merge_bb = guard_edge->dest;
 639   edge e = EDGE_SUCC (new_merge_bb, 0);
 640   basic_block update_bb = e->dest;
 641   edge new_exit_e;
 642   tree orig_def, orig_def_new_name;
 643   tree new_name, new_name2;
 644   tree arg;
 645
 646   /* Create new bb between loop and new_merge_bb.  */
 647   *new_exit_bb = split_edge (loop->single_exit);
 648   add_bb_to_loop (*new_exit_bb, loop->outer);
 649
 650   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 651
 652   for (update_phi = phi_nodes (update_bb); update_phi;
 653        update_phi = PHI_CHAIN (update_phi))
 654     {
 655       orig_phi = update_phi;
 656       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 657       /* This loop-closed-phi actually doesn't represent a use
 658          out of the loop - the phi arg is a constant.  */
 659       if (TREE_CODE (orig_def) != SSA_NAME)
 660         continue;
 661       orig_def_new_name = get_current_def (orig_def);
 662       arg = NULL_TREE;
 663
 664       /** 1. Handle new-merge-point phis  **/
 665
 666       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 667       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 668                                  new_merge_bb);
 669
 670       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 671             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 672       new_name = orig_def;
 673       new_name2 = NULL_TREE;
 674       if (orig_def_new_name)
 675         {
 676           new_name = orig_def_new_name;
 677           /* Some variables have both loop-entry-phis and loop-exit-phis.
 678              Such variables were given yet newer names by phis placed in
 679              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 680              new_name2 = get_current_def (get_current_def (orig_name)).  */
 681           new_name2 = get_current_def (new_name);
 682         }
 683
 684       if (is_new_loop)
 685         {
 686           guard_arg = orig_def;
 687           loop_arg = new_name;
 688         }
 689       else
 690         {
 691           guard_arg = new_name;
 692           loop_arg = orig_def;
 693         }
 694       if (new_name2)
 695         guard_arg = new_name2;
 696
 697       add_phi_arg (new_phi, loop_arg, new_exit_e);
 698       add_phi_arg (new_phi, guard_arg, guard_edge);
 699
 700       /* 1.3. Update phi in successor block.  */
 701       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 702       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 703       update_phi2 = new_phi;
 704
 705
 706       /** 2. Handle loop-closed-ssa-form phis  **/
 707
 708       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 709       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 710                                  *new_exit_bb);
 711
 712       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 713       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 714
 715       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 716       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 717       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 718
 719
 720       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 721
 722       /* 3.1. Find the relevant names that need an exit-phi in
 723          GUARD_BB, i.e. names for which
 724          slpeel_update_phi_nodes_for_guard1 had not already created a
 725          phi node. This is the case for names that are used outside
 726          the loop (and therefore need an exit phi) but are not updated
 727          across loop iterations (and therefore don't have a
 728          loop-header-phi).
 729
 730          slpeel_update_phi_nodes_for_guard1 is responsible for
 731          creating loop-exit phis in GUARD_BB for names that have a
 732          loop-header-phi.  When such a phi is created we also record
 733          the new name in its current definition.  If this new name
 734          exists, then guard_arg was set to this new name (see 1.2
 735          above).  Therefore, if guard_arg is not this new name, this
 736          is an indication that an exit-phi in GUARD_BB was not yet
 737          created, so we take care of it here.  */
 738       if (guard_arg == new_name2)
 739         continue;
 740       arg = guard_arg;
 741
 742       /* 3.2. Generate new phi node in GUARD_BB:  */
 743       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 744                                  guard_edge->src);
 745
 746       /* 3.3. GUARD_BB has one incoming edge:  */
 747       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 748       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
 749
 750       /* 3.4. Update phi in successor of GUARD_BB:  */
 751       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 752                                                                 == guard_arg);
 753       SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
 754     }
 755
 756   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 757 }
 758
 759
 760 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 761    that starts at zero, increases by one and its limit is NITERS.
 762
 763    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 764
 765 void
 766 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 767 {
 768   tree indx_before_incr, indx_after_incr, cond_stmt, cond;
 769   tree orig_cond;
 770   edge exit_edge = loop->single_exit;
 771   block_stmt_iterator loop_cond_bsi;
 772   block_stmt_iterator incr_bsi;
 773   bool insert_after;
 774   tree begin_label = tree_block_label (loop->latch);
 775   tree exit_label = tree_block_label (loop->single_exit->dest);
 776   tree init = build_int_cst (TREE_TYPE (niters), 0);
 777   tree step = build_int_cst (TREE_TYPE (niters), 1);
 778   tree then_label;
 779   tree else_label;
 780   LOC loop_loc;
 781
 782   orig_cond = get_loop_exit_condition (loop);
 783   gcc_assert (orig_cond);
 784   loop_cond_bsi = bsi_for_stmt (orig_cond);
 785
 786   standard_iv_increment_position (loop, &incr_bsi, &insert_after);
 787   create_iv (init, step, NULL_TREE, loop,
 788              &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
 789
 790   if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
 791     {
 792       cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
 793       then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 794       else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 795     }
 796   else /* 'then' edge loops back.  */
 797     {
 798       cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
 799       then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 800       else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 801     }
 802
 803   cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
 804                      then_label, else_label);
 805   bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
 806
 807   /* Remove old loop exit test:  */
 808   bsi_remove (&loop_cond_bsi);
 809
 810   loop_loc = find_loop_location (loop);
 811   if (dump_file && (dump_flags & TDF_DETAILS))
 812     {
 813       if (loop_loc != UNKNOWN_LOC)
 814         fprintf (dump_file, "\nloop at %s:%d: ",
 815                  LOC_FILE (loop_loc), LOC_LINE (loop_loc));
 816       print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
 817     }
 818
 819   loop->nb_iterations = niters;
 820 }
 821
 822
 823 /* Given LOOP this function generates a new copy of it and puts it
 824    on E which is either the entry or exit of LOOP.  */
 825
 826 static struct loop *
 827 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
 828                                         edge e)
 829 {
 830   struct loop *new_loop;
 831   basic_block *new_bbs, *bbs;
 832   bool at_exit;
 833   bool was_imm_dom;
 834   basic_block exit_dest;
 835   tree phi, phi_arg;
 836
 837   at_exit = (e == loop->single_exit);
 838   if (!at_exit && e != loop_preheader_edge (loop))
 839     return NULL;
 840
 841   bbs = get_loop_body (loop);
 842
 843   /* Check whether duplication is possible.  */
 844   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 845     {
 846       free (bbs);
 847       return NULL;
 848     }
 849
 850   /* Generate new loop structure.  */
 851   new_loop = duplicate_loop (loops, loop, loop->outer);
 852   if (!new_loop)
 853     {
 854       free (bbs);
 855       return NULL;
 856     }
 857
 858   exit_dest = loop->single_exit->dest;
 859   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 860                                           exit_dest) == loop->header ?
 861                  true : false);
 862
 863   new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
 864
 865   copy_bbs (bbs, loop->num_nodes, new_bbs,
 866             &loop->single_exit, 1, &new_loop->single_exit, NULL,
 867             e->src);
 868
 869   /* Duplicating phi args at exit bbs as coming
 870      also from exit of duplicated loop.  */
 871   for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
 872     {
 873       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit);
 874       if (phi_arg)
 875         {
 876           edge new_loop_exit_edge;
 877
 878           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 879             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 880           else
 881             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 882
 883           add_phi_arg (phi, phi_arg, new_loop_exit_edge);
 884         }
 885     }
 886
 887   if (at_exit) /* Add the loop copy at exit.  */
 888     {
 889       redirect_edge_and_branch_force (e, new_loop->header);
 890       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 891       if (was_imm_dom)
 892         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 893     }
 894   else /* Add the copy at entry.  */
 895     {
 896       edge new_exit_e;
 897       edge entry_e = loop_preheader_edge (loop);
 898       basic_block preheader = entry_e->src;
 899
 900       if (!flow_bb_inside_loop_p (new_loop,
 901                                   EDGE_SUCC (new_loop->header, 0)->dest))
 902         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 903       else
 904         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 905
 906       redirect_edge_and_branch_force (new_exit_e, loop->header);
 907       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 908                                new_exit_e->src);
 909
 910       /* We have to add phi args to the loop->header here as coming
 911          from new_exit_e edge.  */
 912       for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 913         {
 914           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 915           if (phi_arg)
 916             add_phi_arg (phi, phi_arg, new_exit_e);
 917         }
 918
 919       redirect_edge_and_branch_force (entry_e, new_loop->header);
 920       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 921     }
 922
 923   free (new_bbs);
 924   free (bbs);
 925
 926   return new_loop;
 927 }
 928
 929
 930 /* Given the condition statement COND, put it as the last statement
 931    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 932    Assumes that this is the single exit of the guarded loop.
 933    Returns the skip edge.  */
 934
 935 static edge
 936 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
 937                         basic_block dom_bb)
 938 {
 939   block_stmt_iterator bsi;
 940   edge new_e, enter_e;
 941   tree cond_stmt, then_label, else_label;
 942
 943   enter_e = EDGE_SUCC (guard_bb, 0);
 944   enter_e->flags &= ~EDGE_FALLTHRU;
 945   enter_e->flags |= EDGE_FALSE_VALUE;
 946   bsi = bsi_last (guard_bb);
 947
 948   then_label = build1 (GOTO_EXPR, void_type_node,
 949                        tree_block_label (exit_bb));
 950   else_label = build1 (GOTO_EXPR, void_type_node,
 951                        tree_block_label (enter_e->dest));
 952   cond_stmt = build3 (COND_EXPR, void_type_node, cond,
 953                      then_label, else_label);
 954   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
 955   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 956   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 957   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 958   return new_e;
 959 }
 960
 961
 962 /* This function verifies that the following restrictions apply to LOOP:
 963    (1) it is innermost
 964    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 965    (3) it is single entry, single exit
 966    (4) its exit condition is the last stmt in the header
 967    (5) E is the entry/exit edge of LOOP.
 968  */
 969
 970 bool
 971 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
 972 {
 973   edge exit_e = loop->single_exit;
 974   edge entry_e = loop_preheader_edge (loop);
 975   tree orig_cond = get_loop_exit_condition (loop);
 976   block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
 977
 978   if (need_ssa_update_p ())
 979     return false;
 980
 981   if (loop->inner
 982       /* All loops have an outer scope; the only case loop->outer is NULL is for
 983          the function itself.  */
 984       || !loop->outer
 985       || loop->num_nodes != 2
 986       || !empty_block_p (loop->latch)
 987       || !loop->single_exit
 988       /* Verify that new loop exit condition can be trivially modified.  */
 989       || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
 990       || (e != exit_e && e != entry_e))
 991     return false;
 992
 993   return true;
 994 }
 995
 996 #ifdef ENABLE_CHECKING
 997 void
 998 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 999                                  struct loop *second_loop)
1000 {
1001   basic_block loop1_exit_bb = first_loop->single_exit->dest;
1002   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1003   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1004
1005   /* A guard that controls whether the second_loop is to be executed or skipped
1006      is placed in first_loop->exit.  first_loopt->exit therefore has two
1007      successors - one is the preheader of second_loop, and the other is a bb
1008      after second_loop.
1009    */
1010   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1011
1012   /* 1. Verify that one of the successors of first_loopt->exit is the preheader
1013         of second_loop.  */
1014
1015   /* The preheader of new_loop is expected to have two predecessors:
1016      first_loop->exit and the block that precedes first_loop.  */
1017
1018   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1019               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1020                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1021                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1022                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1023
1024   /* Verify that the other successor of first_loopt->exit is after the
1025      second_loop.  */
1026   /* TODO */
1027 }
1028 #endif
1029
1030 /* Function slpeel_tree_peel_loop_to_edge.
1031
1032    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1033    that is placed on the entry (exit) edge E of LOOP. After this transformation
1034    we have two loops one after the other - first-loop iterates FIRST_NITERS
1035    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1036
1037    Input:
1038    - LOOP: the loop to be peeled.
1039    - E: the exit or entry edge of LOOP.
1040         If it is the entry edge, we peel the first iterations of LOOP. In this
1041         case first-loop is LOOP, and second-loop is the newly created loop.
1042         If it is the exit edge, we peel the last iterations of LOOP. In this
1043         case, first-loop is the newly created loop, and second-loop is LOOP.
1044    - NITERS: the number of iterations that LOOP iterates.
1045    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1046    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1047         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1048         is false, the caller of this function may want to take care of this
1049         (this can be useful if we don't want new stmts added to first-loop).
1050
1051    Output:
1052    The function returns a pointer to the new loop-copy, or NULL if it failed
1053    to perform the transformation.
1054
1055    The function generates two if-then-else guards: one before the first loop,
1056    and the other before the second loop:
1057    The first guard is:
1058      if (FIRST_NITERS == 0) then skip the first loop,
1059      and go directly to the second loop.
1060    The second guard is:
1061      if (FIRST_NITERS == NITERS) then skip the second loop.
1062
1063    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1064    FORNOW the resulting code will not be in loop-closed-ssa form.
1065 */
1066
1067 struct loop*
1068 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
1069                                edge e, tree first_niters,
1070                                tree niters, bool update_first_loop_count)
1071 {
1072   struct loop *new_loop = NULL, *first_loop, *second_loop;
1073   edge skip_e;
1074   tree pre_condition;
1075   bitmap definitions;
1076   basic_block bb_before_second_loop, bb_after_second_loop;
1077   basic_block bb_before_first_loop;
1078   basic_block bb_between_loops;
1079   basic_block new_exit_bb;
1080   edge exit_e = loop->single_exit;
1081   LOC loop_loc;
1082
1083   if (!slpeel_can_duplicate_loop_p (loop, e))
1084     return NULL;
1085
1086   /* We have to initialize cfg_hooks. Then, when calling
1087    cfg_hooks->split_edge, the function tree_split_edge
1088    is actually called and, when calling cfg_hooks->duplicate_block,
1089    the function tree_duplicate_bb is called.  */
1090   tree_register_cfg_hooks ();
1091
1092
1093   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1094         Resulting CFG would be:
1095
1096         first_loop:
1097         do {
1098         } while ...
1099
1100         second_loop:
1101         do {
1102         } while ...
1103
1104         orig_exit_bb:
1105    */
1106
1107   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1108     {
1109       loop_loc = find_loop_location (loop);
1110       if (dump_file && (dump_flags & TDF_DETAILS))
1111         {
1112           if (loop_loc != UNKNOWN_LOC)
1113             fprintf (dump_file, "\n%s:%d: note: ",
1114                      LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1115           fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1116         }
1117       return NULL;
1118     }
1119
1120   if (e == exit_e)
1121     {
1122       /* NEW_LOOP was placed after LOOP.  */
1123       first_loop = loop;
1124       second_loop = new_loop;
1125     }
1126   else
1127     {
1128       /* NEW_LOOP was placed before LOOP.  */
1129       first_loop = new_loop;
1130       second_loop = loop;
1131     }
1132
1133   definitions = ssa_names_to_replace ();
1134   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1135   rename_variables_in_loop (new_loop);
1136
1137
1138   /* 2. Add the guard that controls whether the first loop is executed.
1139         Resulting CFG would be:
1140
1141         bb_before_first_loop:
1142         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1143                                GOTO first-loop
1144
1145         first_loop:
1146         do {
1147         } while ...
1148
1149         bb_before_second_loop:
1150
1151         second_loop:
1152         do {
1153         } while ...
1154
1155         orig_exit_bb:
1156    */
1157
1158   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1159   add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1160   bb_before_second_loop = split_edge (first_loop->single_exit);
1161   add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1162
1163   pre_condition =
1164     fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1165                  build_int_cst (TREE_TYPE (first_niters), 0));
1166   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1167                                   bb_before_second_loop, bb_before_first_loop);
1168   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1169                                       first_loop == new_loop,
1170                                       &new_exit_bb, &definitions);
1171
1172
1173   /* 3. Add the guard that controls whether the second loop is executed.
1174         Resulting CFG would be:
1175
1176         bb_before_first_loop:
1177         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1178                                GOTO first-loop
1179
1180         first_loop:
1181         do {
1182         } while ...
1183
1184         bb_between_loops:
1185         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1186                                     GOTO bb_before_second_loop
1187
1188         bb_before_second_loop:
1189
1190         second_loop:
1191         do {
1192         } while ...
1193
1194         bb_after_second_loop:
1195
1196         orig_exit_bb:
1197    */
1198
1199   bb_between_loops = new_exit_bb;
1200   bb_after_second_loop = split_edge (second_loop->single_exit);
1201   add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1202
1203   pre_condition =
1204         fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1205   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1206                                   bb_after_second_loop, bb_before_first_loop);
1207   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1208                                      second_loop == new_loop, &new_exit_bb);
1209
1210   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1211    */
1212   if (update_first_loop_count)
1213     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1214
1215   BITMAP_FREE (definitions);
1216   delete_update_ssa ();
1217
1218   return new_loop;
1219 }
1220
1221 /* Function vect_get_loop_location.
1222
1223    Extract the location of the loop in the source code.
1224    If the loop is not well formed for vectorization, an estimated
1225    location is calculated.
1226    Return the loop location if succeed and NULL if not.  */
1227
1228 LOC
1229 find_loop_location (struct loop *loop)
1230 {
1231   tree node = NULL_TREE;
1232   basic_block bb;
1233   block_stmt_iterator si;
1234
1235   if (!loop)
1236     return UNKNOWN_LOC;
1237
1238   node = get_loop_exit_condition (loop);
1239
1240   if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1241       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1242     return EXPR_LOC (node);
1243
1244   /* If we got here the loop is probably not "well formed",
1245      try to estimate the loop location */
1246
1247   if (!loop->header)
1248     return UNKNOWN_LOC;
1249
1250   bb = loop->header;
1251
1252   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1253     {
1254       node = bsi_stmt (si);
1255       if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1256         return EXPR_LOC (node);
1257     }
1258
1259   return UNKNOWN_LOC;
1260 }
1261
1262
1263 /*************************************************************************
1264   Vectorization Debug Information.
1265  *************************************************************************/
1266
1267 /* Function vect_set_verbosity_level.
1268
1269    Called from toplev.c upon detection of the
1270    -ftree-vectorizer-verbose=N option.  */
1271
1272 void
1273 vect_set_verbosity_level (const char *val)
1274 {
1275    unsigned int vl;
1276
1277    vl = atoi (val);
1278    if (vl < MAX_VERBOSITY_LEVEL)
1279      vect_verbosity_level = vl;
1280    else
1281      vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1282 }
1283
1284
1285 /* Function vect_set_dump_settings.
1286
1287    Fix the verbosity level of the vectorizer if the
1288    requested level was not set explicitly using the flag
1289    -ftree-vectorizer-verbose=N.
1290    Decide where to print the debugging information (dump_file/stderr).
1291    If the user defined the verbosity level, but there is no dump file,
1292    print to stderr, otherwise print to the dump file.  */
1293
1294 static void
1295 vect_set_dump_settings (void)
1296 {
1297   vect_dump = dump_file;
1298
1299   /* Check if the verbosity level was defined by the user:  */
1300   if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1301     {
1302       /* If there is no dump file, print to stderr.  */
1303       if (!dump_file)
1304         vect_dump = stderr;
1305       return;
1306     }
1307
1308   /* User didn't specify verbosity level:  */
1309   if (dump_file && (dump_flags & TDF_DETAILS))
1310     vect_verbosity_level = REPORT_DETAILS;
1311   else if (dump_file && (dump_flags & TDF_STATS))
1312     vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1313   else
1314     vect_verbosity_level = REPORT_NONE;
1315
1316   gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1317 }
1318
1319
1320 /* Function debug_loop_details.
1321
1322    For vectorization debug dumps.  */
1323
1324 bool
1325 vect_print_dump_info (enum verbosity_levels vl)
1326 {
1327   if (vl > vect_verbosity_level)
1328     return false;
1329
1330   if (vect_loop_location == UNKNOWN_LOC)
1331     fprintf (vect_dump, "\n%s:%d: note: ",
1332                  DECL_SOURCE_FILE (current_function_decl),
1333                  DECL_SOURCE_LINE (current_function_decl));
1334   else
1335     fprintf (vect_dump, "\n%s:%d: note: ",
1336              LOC_FILE (vect_loop_location), LOC_LINE (vect_loop_location));
1337
1338
1339   return true;
1340 }
1341
1342
1343 /*************************************************************************
1344   Vectorization Utilities.
1345  *************************************************************************/
1346
1347 /* Function new_stmt_vec_info.
1348
1349    Create and initialize a new stmt_vec_info struct for STMT.  */
1350
1351 stmt_vec_info
1352 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1353 {
1354   stmt_vec_info res;
1355   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1356
1357   STMT_VINFO_TYPE (res) = undef_vec_info_type;
1358   STMT_VINFO_STMT (res) = stmt;
1359   STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1360   STMT_VINFO_RELEVANT_P (res) = 0;
1361   STMT_VINFO_LIVE_P (res) = 0;
1362   STMT_VINFO_VECTYPE (res) = NULL;
1363   STMT_VINFO_VEC_STMT (res) = NULL;
1364   STMT_VINFO_DATA_REF (res) = NULL;
1365   if (TREE_CODE (stmt) == PHI_NODE)
1366     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
1367   else
1368     STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
1369   STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
1370
1371   return res;
1372 }
1373
1374
1375 /* Function new_loop_vec_info.
1376
1377    Create and initialize a new loop_vec_info struct for LOOP, as well as
1378    stmt_vec_info structs for all the stmts in LOOP.  */
1379
1380 loop_vec_info
1381 new_loop_vec_info (struct loop *loop)
1382 {
1383   loop_vec_info res;
1384   basic_block *bbs;
1385   block_stmt_iterator si;
1386   unsigned int i;
1387
1388   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1389
1390   bbs = get_loop_body (loop);
1391
1392   /* Create stmt_info for all stmts in the loop.  */
1393   for (i = 0; i < loop->num_nodes; i++)
1394     {
1395       basic_block bb = bbs[i];
1396       tree phi;
1397
1398       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1399         {
1400           tree_ann_t ann = get_tree_ann (phi);
1401           set_stmt_info (ann, new_stmt_vec_info (phi, res));
1402         }
1403
1404       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1405         {
1406           tree stmt = bsi_stmt (si);
1407           stmt_ann_t ann;
1408
1409           ann = stmt_ann (stmt);
1410           set_stmt_info ((tree_ann_t)ann, new_stmt_vec_info (stmt, res));
1411         }
1412     }
1413
1414   LOOP_VINFO_LOOP (res) = loop;
1415   LOOP_VINFO_BBS (res) = bbs;
1416   LOOP_VINFO_EXIT_COND (res) = NULL;
1417   LOOP_VINFO_NITERS (res) = NULL;
1418   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1419   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
1420   LOOP_VINFO_VECT_FACTOR (res) = 0;
1421   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREFS (res), 20, "loop_datarefs");
1422   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DDRS (res), 20, "loop_ddrs");
1423   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1424   LOOP_VINFO_MAY_MISALIGN_STMTS (res)
1425     = VEC_alloc (tree, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS));
1426
1427   return res;
1428 }
1429
1430
1431 /* Function destroy_loop_vec_info.
1432
1433    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1434    stmts in the loop.  */
1435
1436 void
1437 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1438 {
1439   struct loop *loop;
1440   basic_block *bbs;
1441   int nbbs;
1442   block_stmt_iterator si;
1443   int j;
1444
1445   if (!loop_vinfo)
1446     return;
1447
1448   loop = LOOP_VINFO_LOOP (loop_vinfo);
1449
1450   bbs = LOOP_VINFO_BBS (loop_vinfo);
1451   nbbs = loop->num_nodes;
1452
1453   for (j = 0; j < nbbs; j++)
1454     {
1455       basic_block bb = bbs[j];
1456       tree phi;
1457       stmt_vec_info stmt_info;
1458
1459       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1460         {
1461           tree_ann_t ann = get_tree_ann (phi);
1462
1463           stmt_info = vinfo_for_stmt (phi);
1464           free (stmt_info);
1465           set_stmt_info (ann, NULL);
1466         }
1467
1468       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1469         {
1470           tree stmt = bsi_stmt (si);
1471           stmt_ann_t ann = stmt_ann (stmt);
1472           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1473
1474           if (stmt_info)
1475             {
1476               VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1477               free (stmt_info);
1478               set_stmt_info ((tree_ann_t)ann, NULL);
1479             }
1480         }
1481     }
1482
1483   free (LOOP_VINFO_BBS (loop_vinfo));
1484   varray_clear (LOOP_VINFO_DATAREFS (loop_vinfo));
1485   varray_clear (LOOP_VINFO_DDRS (loop_vinfo));
1486   VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
1487
1488   free (loop_vinfo);
1489 }
1490
1491
1492 /* Function vect_force_dr_alignment_p.
1493
1494    Returns whether the alignment of a DECL can be forced to be aligned
1495    on ALIGNMENT bit boundary.  */
1496
1497 bool
1498 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1499 {
1500   if (TREE_CODE (decl) != VAR_DECL)
1501     return false;
1502
1503   if (DECL_EXTERNAL (decl))
1504     return false;
1505
1506   if (TREE_ASM_WRITTEN (decl))
1507     return false;
1508
1509   if (TREE_STATIC (decl))
1510     return (alignment <= MAX_OFILE_ALIGNMENT);
1511   else
1512     /* This is not 100% correct.  The absolute correct stack alignment
1513        is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1514        PREFERRED_STACK_BOUNDARY is honored by all translation units.
1515        However, until someone implements forced stack alignment, SSE
1516        isn't really usable without this.  */
1517     return (alignment <= PREFERRED_STACK_BOUNDARY);
1518 }
1519
1520
1521 /* Function get_vectype_for_scalar_type.
1522
1523    Returns the vector type corresponding to SCALAR_TYPE as supported
1524    by the target.  */
1525
1526 tree
1527 get_vectype_for_scalar_type (tree scalar_type)
1528 {
1529   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1530   int nbytes = GET_MODE_SIZE (inner_mode);
1531   int nunits;
1532   tree vectype;
1533
1534   if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD)
1535     return NULL_TREE;
1536
1537   /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1538      is expected.  */
1539   nunits = UNITS_PER_SIMD_WORD / nbytes;
1540
1541   vectype = build_vector_type (scalar_type, nunits);
1542   if (vect_print_dump_info (REPORT_DETAILS))
1543     {
1544       fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1545       print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1546     }
1547
1548   if (!vectype)
1549     return NULL_TREE;
1550
1551   if (vect_print_dump_info (REPORT_DETAILS))
1552     {
1553       fprintf (vect_dump, "vectype: ");
1554       print_generic_expr (vect_dump, vectype, TDF_SLIM);
1555     }
1556
1557   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1558       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
1559     {
1560       if (vect_print_dump_info (REPORT_DETAILS))
1561         fprintf (vect_dump, "mode not supported by target.");
1562       return NULL_TREE;
1563     }
1564
1565   return vectype;
1566 }
1567
1568
1569 /* Function vect_supportable_dr_alignment
1570
1571    Return whether the data reference DR is supported with respect to its
1572    alignment.  */
1573
1574 enum dr_alignment_support
1575 vect_supportable_dr_alignment (struct data_reference *dr)
1576 {
1577   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
1578   enum machine_mode mode = (int) TYPE_MODE (vectype);
1579
1580   if (aligned_access_p (dr))
1581     return dr_aligned;
1582
1583   /* Possibly unaligned access.  */
1584
1585   if (DR_IS_READ (dr))
1586     {
1587       if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1588           && (!targetm.vectorize.builtin_mask_for_load
1589               || targetm.vectorize.builtin_mask_for_load ()))
1590         return dr_unaligned_software_pipeline;
1591
1592       if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1593         /* Can't software pipeline the loads, but can at least do them.  */
1594         return dr_unaligned_supported;
1595     }
1596
1597   /* Unsupported.  */
1598   return dr_unaligned_unsupported;
1599 }
1600
1601
1602 /* Function vect_is_simple_use.
1603
1604    Input:
1605    LOOP - the loop that is being vectorized.
1606    OPERAND - operand of a stmt in LOOP.
1607    DEF - the defining stmt in case OPERAND is an SSA_NAME.
1608
1609    Returns whether a stmt with OPERAND can be vectorized.
1610    Supportable operands are constants, loop invariants, and operands that are
1611    defined by the current iteration of the loop. Unsupportable operands are
1612    those that are defined by a previous iteration of the loop (as is the case
1613    in reduction/induction computations).  */
1614
1615 bool
1616 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
1617                     tree *def, enum vect_def_type *dt)
1618 {
1619   basic_block bb;
1620   stmt_vec_info stmt_vinfo;
1621   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1622
1623   *def_stmt = NULL_TREE;
1624   *def = NULL_TREE;
1625
1626   if (vect_print_dump_info (REPORT_DETAILS))
1627     {
1628       fprintf (vect_dump, "vect_is_simple_use: operand ");
1629       print_generic_expr (vect_dump, operand, TDF_SLIM);
1630     }
1631
1632   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1633     {
1634       *dt = vect_constant_def;
1635       return true;
1636     }
1637
1638   if (TREE_CODE (operand) != SSA_NAME)
1639     {
1640       if (vect_print_dump_info (REPORT_DETAILS))
1641         fprintf (vect_dump, "not ssa-name.");
1642       return false;
1643     }
1644
1645   *def_stmt = SSA_NAME_DEF_STMT (operand);
1646   if (*def_stmt == NULL_TREE )
1647     {
1648       if (vect_print_dump_info (REPORT_DETAILS))
1649         fprintf (vect_dump, "no def_stmt.");
1650       return false;
1651     }
1652
1653   if (vect_print_dump_info (REPORT_DETAILS))
1654     {
1655       fprintf (vect_dump, "def_stmt: ");
1656       print_generic_expr (vect_dump, *def_stmt, TDF_SLIM);
1657     }
1658
1659   /* empty stmt is expected only in case of a function argument.
1660      (Otherwise - we expect a phi_node or a modify_expr).  */
1661   if (IS_EMPTY_STMT (*def_stmt))
1662     {
1663       tree arg = TREE_OPERAND (*def_stmt, 0);
1664       if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1665         {
1666           *def = operand;
1667           *dt = vect_invariant_def;
1668           return true;
1669         }
1670
1671       if (vect_print_dump_info (REPORT_DETAILS))
1672         fprintf (vect_dump, "Unexpected empty stmt.");
1673       return false;
1674     }
1675
1676   bb = bb_for_stmt (*def_stmt);
1677   if (!flow_bb_inside_loop_p (loop, bb))
1678     *dt = vect_invariant_def;
1679   else
1680     {
1681       stmt_vinfo = vinfo_for_stmt (*def_stmt);
1682       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
1683     }
1684
1685   if (*dt == vect_unknown_def_type)
1686     {
1687       if (vect_print_dump_info (REPORT_DETAILS))
1688         fprintf (vect_dump, "Unsupported pattern.");
1689       return false;
1690     }
1691
1692   /* stmts inside the loop that have been identified as performing
1693      a reduction operation cannot have uses in the loop.  */
1694   if (*dt == vect_reduction_def && TREE_CODE (*def_stmt) != PHI_NODE)
1695     {
1696       if (vect_print_dump_info (REPORT_DETAILS))
1697         fprintf (vect_dump, "reduction used in loop.");
1698       return false;
1699     }
1700
1701   if (vect_print_dump_info (REPORT_DETAILS))
1702     fprintf (vect_dump, "type of def: %d.",*dt);
1703
1704   switch (TREE_CODE (*def_stmt))
1705     {
1706     case PHI_NODE:
1707       *def = PHI_RESULT (*def_stmt);
1708       gcc_assert (*dt == vect_induction_def || *dt == vect_reduction_def
1709                   || *dt == vect_invariant_def);
1710       break;
1711
1712     case MODIFY_EXPR:
1713       *def = TREE_OPERAND (*def_stmt, 0);
1714       gcc_assert (*dt == vect_loop_def || *dt == vect_invariant_def);
1715       break;
1716
1717     default:
1718       if (vect_print_dump_info (REPORT_DETAILS))
1719         fprintf (vect_dump, "unsupported defining stmt: ");
1720       return false;
1721     }
1722
1723   if (*dt == vect_induction_def)
1724     {
1725       if (vect_print_dump_info (REPORT_DETAILS))
1726         fprintf (vect_dump, "induction not supported.");
1727       return false;
1728     }
1729
1730   return true;
1731 }
1732
1733
1734 /* Function reduction_code_for_scalar_code
1735
1736    Input:
1737    CODE - tree_code of a reduction operations.
1738
1739    Output:
1740    REDUC_CODE - the corresponding tree-code to be used to reduce the
1741       vector of partial results into a single scalar result (which
1742       will also reside in a vector).
1743
1744    Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise.  */
1745
1746 bool
1747 reduction_code_for_scalar_code (enum tree_code code,
1748                                 enum tree_code *reduc_code)
1749 {
1750   switch (code)
1751   {
1752   case MAX_EXPR:
1753     *reduc_code = REDUC_MAX_EXPR;
1754     return true;
1755
1756   case MIN_EXPR:
1757     *reduc_code = REDUC_MIN_EXPR;
1758     return true;
1759
1760   case PLUS_EXPR:
1761     *reduc_code = REDUC_PLUS_EXPR;
1762     return true;
1763
1764   default:
1765     return false;
1766   }
1767 }
1768
1769
1770 /* Function vect_is_simple_reduction
1771
1772    Detect a cross-iteration def-use cucle that represents a simple
1773    reduction computation. We look for the following pattern:
1774
1775    loop_header:
1776      a1 = phi < a0, a2 >
1777      a3 = ...
1778      a2 = operation (a3, a1)
1779
1780    such that:
1781    1. operation is commutative and associative and it is safe to
1782       change the order of the computation.
1783    2. no uses for a2 in the loop (a2 is used out of the loop)
1784    3. no uses of a1 in the loop besides the reduction operation.
1785
1786    Condition 1 is tested here.
1787    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.  */
1788
1789 tree
1790 vect_is_simple_reduction (struct loop *loop, tree phi)
1791 {
1792   edge latch_e = loop_latch_edge (loop);
1793   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
1794   tree def_stmt, def1, def2;
1795   enum tree_code code;
1796   int op_type;
1797   tree operation, op1, op2;
1798   tree type;
1799
1800   if (TREE_CODE (loop_arg) != SSA_NAME)
1801     {
1802       if (vect_print_dump_info (REPORT_DETAILS))
1803         {
1804           fprintf (vect_dump, "reduction: not ssa_name: ");
1805           print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
1806         }
1807       return NULL_TREE;
1808     }
1809
1810   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
1811   if (!def_stmt)
1812     {
1813       if (vect_print_dump_info (REPORT_DETAILS))
1814         fprintf (vect_dump, "reduction: no def_stmt.");
1815       return NULL_TREE;
1816     }
1817
1818   if (TREE_CODE (def_stmt) != MODIFY_EXPR)
1819     {
1820       if (vect_print_dump_info (REPORT_DETAILS))
1821         {
1822           print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1823         }
1824       return NULL_TREE;
1825     }
1826
1827   operation = TREE_OPERAND (def_stmt, 1);
1828   code = TREE_CODE (operation);
1829   if (!commutative_tree_code (code) || !associative_tree_code (code))
1830     {
1831       if (vect_print_dump_info (REPORT_DETAILS))
1832         {
1833           fprintf (vect_dump, "reduction: not commutative/associative: ");
1834           print_generic_expr (vect_dump, operation, TDF_SLIM);
1835         }
1836       return NULL_TREE;
1837     }
1838
1839   op_type = TREE_CODE_LENGTH (code);
1840   if (op_type != binary_op)
1841     {
1842       if (vect_print_dump_info (REPORT_DETAILS))
1843         {
1844           fprintf (vect_dump, "reduction: not binary operation: ");
1845           print_generic_expr (vect_dump, operation, TDF_SLIM);
1846         }
1847       return NULL_TREE;
1848     }
1849
1850   op1 = TREE_OPERAND (operation, 0);
1851   op2 = TREE_OPERAND (operation, 1);
1852   if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
1853     {
1854       if (vect_print_dump_info (REPORT_DETAILS))
1855         {
1856           fprintf (vect_dump, "reduction: uses not ssa_names: ");
1857           print_generic_expr (vect_dump, operation, TDF_SLIM);
1858         }
1859       return NULL_TREE;
1860     }
1861
1862   /* Check that it's ok to change the order of the computation.  */
1863   type = TREE_TYPE (operation);
1864   if (TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1))
1865       || TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2)))
1866     {
1867       if (vect_print_dump_info (REPORT_DETAILS))
1868         {
1869           fprintf (vect_dump, "reduction: multiple types: operation type: ");
1870           print_generic_expr (vect_dump, type, TDF_SLIM);
1871           fprintf (vect_dump, ", operands types: ");
1872           print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
1873           fprintf (vect_dump, ",");
1874           print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
1875         }
1876       return NULL_TREE;
1877     }
1878
1879   /* CHECKME: check for !flag_finite_math_only too?  */
1880   if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations)
1881     {
1882       /* Changing the order of operations changes the semantics.  */
1883       if (vect_print_dump_info (REPORT_DETAILS))
1884         {
1885           fprintf (vect_dump, "reduction: unsafe fp math optimization: ");
1886           print_generic_expr (vect_dump, operation, TDF_SLIM);
1887         }
1888       return NULL_TREE;
1889     }
1890   else if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) && flag_trapv)
1891     {
1892       /* Changing the order of operations changes the semantics.  */
1893       if (vect_print_dump_info (REPORT_DETAILS))
1894         {
1895           fprintf (vect_dump, "reduction: unsafe int math optimization: ");
1896           print_generic_expr (vect_dump, operation, TDF_SLIM);
1897         }
1898       return NULL_TREE;
1899     }
1900
1901   /* reduction is safe. we're dealing with one of the following:
1902      1) integer arithmetic and no trapv
1903      2) floating point arithmetic, and special flags permit this optimization.
1904    */
1905   def1 = SSA_NAME_DEF_STMT (op1);
1906   def2 = SSA_NAME_DEF_STMT (op2);
1907   if (!def1 || !def2)
1908     {
1909       if (vect_print_dump_info (REPORT_DETAILS))
1910         {
1911           fprintf (vect_dump, "reduction: no defs for operands: ");
1912           print_generic_expr (vect_dump, operation, TDF_SLIM);
1913         }
1914       return NULL_TREE;
1915     }
1916
1917   if (TREE_CODE (def1) == MODIFY_EXPR
1918       && flow_bb_inside_loop_p (loop, bb_for_stmt (def1))
1919       && def2 == phi)
1920     {
1921       if (vect_print_dump_info (REPORT_DETAILS))
1922         {
1923           fprintf (vect_dump, "detected reduction:");
1924           print_generic_expr (vect_dump, operation, TDF_SLIM);
1925         }
1926       return def_stmt;
1927     }
1928   else if (TREE_CODE (def2) == MODIFY_EXPR
1929       && flow_bb_inside_loop_p (loop, bb_for_stmt (def2))
1930       && def1 == phi)
1931     {
1932       /* Swap operands (just for simplicity - so that the rest of the code
1933          can assume that the reduction variable is always the last (second)
1934          argument).  */
1935       if (vect_print_dump_info (REPORT_DETAILS))
1936         {
1937           fprintf (vect_dump, "detected reduction: need to swap operands:");
1938           print_generic_expr (vect_dump, operation, TDF_SLIM);
1939         }
1940       swap_tree_operands (def_stmt, &TREE_OPERAND (operation, 0),
1941                                     &TREE_OPERAND (operation, 1));
1942       return def_stmt;
1943     }
1944   else
1945     {
1946       if (vect_print_dump_info (REPORT_DETAILS))
1947         {
1948           fprintf (vect_dump, "reduction: unknown pattern.");
1949           print_generic_expr (vect_dump, operation, TDF_SLIM);
1950         }
1951       return NULL_TREE;
1952     }
1953 }
1954
1955
1956 /* Function vect_is_simple_iv_evolution.
1957
1958    FORNOW: A simple evolution of an induction variables in the loop is
1959    considered a polynomial evolution with constant step.  */
1960
1961 bool
1962 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
1963                              tree * step)
1964 {
1965   tree init_expr;
1966   tree step_expr;
1967
1968   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
1969
1970   /* When there is no evolution in this loop, the evolution function
1971      is not "simple".  */
1972   if (evolution_part == NULL_TREE)
1973     return false;
1974
1975   /* When the evolution is a polynomial of degree >= 2
1976      the evolution function is not "simple".  */
1977   if (tree_is_chrec (evolution_part))
1978     return false;
1979
1980   step_expr = evolution_part;
1981   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1982                                                            loop_nb));
1983
1984   if (vect_print_dump_info (REPORT_DETAILS))
1985     {
1986       fprintf (vect_dump, "step: ");
1987       print_generic_expr (vect_dump, step_expr, TDF_SLIM);
1988       fprintf (vect_dump, ",  init: ");
1989       print_generic_expr (vect_dump, init_expr, TDF_SLIM);
1990     }
1991
1992   *init = init_expr;
1993   *step = step_expr;
1994
1995   if (TREE_CODE (step_expr) != INTEGER_CST)
1996     {
1997       if (vect_print_dump_info (REPORT_DETAILS))
1998         fprintf (vect_dump, "step unknown.");
1999       return false;
2000     }
2001
2002   return true;
2003 }
2004
2005
2006 /* Function vectorize_loops.
2007
2008    Entry Point to loop vectorization phase.  */
2009
2010 void
2011 vectorize_loops (struct loops *loops)
2012 {
2013   unsigned int i;
2014   unsigned int num_vectorized_loops = 0;
2015
2016   /* Fix the verbosity level if not defined explicitly by the user.  */
2017   vect_set_dump_settings ();
2018
2019   /* Allocate the bitmap that records which virtual variables that
2020      need to be renamed.  */
2021   vect_vnames_to_rename = BITMAP_ALLOC (NULL);
2022
2023   /*  ----------- Analyze loops. -----------  */
2024
2025   /* If some loop was duplicated, it gets bigger number
2026      than all previously defined loops. This fact allows us to run
2027      only over initial loops skipping newly generated ones.  */
2028   vect_loops_num = loops->num;
2029   for (i = 1; i < vect_loops_num; i++)
2030     {
2031       loop_vec_info loop_vinfo;
2032       struct loop *loop = loops->parray[i];
2033
2034       if (!loop)
2035         continue;
2036
2037       vect_loop_location = find_loop_location (loop);
2038       loop_vinfo = vect_analyze_loop (loop);
2039       loop->aux = loop_vinfo;
2040
2041       if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
2042         continue;
2043
2044       vect_transform_loop (loop_vinfo, loops);
2045       num_vectorized_loops++;
2046     }
2047
2048   if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
2049     fprintf (vect_dump, "vectorized %u loops in function.\n",
2050              num_vectorized_loops);
2051
2052   /*  ----------- Finalize. -----------  */
2053
2054   BITMAP_FREE (vect_vnames_to_rename);
2055
2056   for (i = 1; i < vect_loops_num; i++)
2057     {
2058       struct loop *loop = loops->parray[i];
2059       loop_vec_info loop_vinfo;
2060
2061       if (!loop)
2062         continue;
2063       loop_vinfo = loop->aux;
2064       destroy_loop_vec_info (loop_vinfo);
2065       loop->aux = NULL;
2066     }
2067 }