gcc/tree-vectorizer.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  20 02110-1301, USA.  */
  21
  22 /* Loop Vectorization Pass.
  23
  24    This pass tries to vectorize loops. This first implementation focuses on
  25    simple inner-most loops, with no conditional control flow, and a set of
  26    simple operations which vector form can be expressed using existing
  27    tree codes (PLUS, MULT etc).
  28
  29    For example, the vectorizer transforms the following simple loop:
  30
  31         short a[N]; short b[N]; short c[N]; int i;
  32
  33         for (i=0; i<N; i++){
  34           a[i] = b[i] + c[i];
  35         }
  36
  37    as if it was manually vectorized by rewriting the source code into:
  38
  39         typedef int __attribute__((mode(V8HI))) v8hi;
  40         short a[N];  short b[N]; short c[N];   int i;
  41         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  42         v8hi va, vb, vc;
  43
  44         for (i=0; i<N/8; i++){
  45           vb = pb[i];
  46           vc = pc[i];
  47           va = vb + vc;
  48           pa[i] = va;
  49         }
  50
  51         The main entry to this pass is vectorize_loops(), in which
  52    the vectorizer applies a set of analyses on a given set of loops,
  53    followed by the actual vectorization transformation for the loops that
  54    had successfully passed the analysis phase.
  55
  56         Throughout this pass we make a distinction between two types of
  57    data: scalars (which are represented by SSA_NAMES), and memory references
  58    ("data-refs"). These two types of data require different handling both
  59    during analysis and transformation. The types of data-refs that the
  60    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  61    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  62    accesses are required to have a  simple (consecutive) access pattern.
  63
  64    Analysis phase:
  65    ===============
  66         The driver for the analysis phase is vect_analyze_loop_nest().
  67    It applies a set of analyses, some of which rely on the scalar evolution
  68    analyzer (scev) developed by Sebastian Pop.
  69
  70         During the analysis phase the vectorizer records some information
  71    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  72    loop, as well as general information about the loop as a whole, which is
  73    recorded in a "loop_vec_info" struct attached to each loop.
  74
  75    Transformation phase:
  76    =====================
  77         The loop transformation phase scans all the stmts in the loop, and
  78    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  79    the loop that needs to be vectorized. It insert the vector code sequence
  80    just before the scalar stmt S, and records a pointer to the vector code
  81    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
  82    attached to S). This pointer will be used for the vectorization of following
  83    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
  84    otherwise, we rely on dead code elimination for removing it.
  85
  86         For example, say stmt S1 was vectorized into stmt VS1:
  87
  88    VS1: vb = px[i];
  89    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  90    S2:  a = b;
  91
  92    To vectorize stmt S2, the vectorizer first finds the stmt that defines
  93    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
  94    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
  95    resulting sequence would be:
  96
  97    VS1: vb = px[i];
  98    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  99    VS2: va = vb;
 100    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 101
 102         Operands that are not SSA_NAMEs, are data-refs that appear in
 103    load/store operations (like 'x[i]' in S1), and are handled differently.
 104
 105    Target modeling:
 106    =================
 107         Currently the only target specific information that is used is the
 108    size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
 109    support different sizes of vectors, for now will need to specify one value
 110    for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
 111
 112         Since we only vectorize operations which vector form can be
 113    expressed using existing tree codes, to verify that an operation is
 114    supported, the vectorizer checks the relevant optab at the relevant
 115    machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
 116    the value found is CODE_FOR_nothing, then there's no target support, and
 117    we can't vectorize the stmt.
 118
 119    For additional information on this project see:
 120    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 121 */
 122
 123 #include "config.h"
 124 #include "system.h"
 125 #include "coretypes.h"
 126 #include "tm.h"
 127 #include "ggc.h"
 128 #include "tree.h"
 129 #include "target.h"
 130 #include "rtl.h"
 131 #include "basic-block.h"
 132 #include "diagnostic.h"
 133 #include "tree-flow.h"
 134 #include "tree-dump.h"
 135 #include "timevar.h"
 136 #include "cfgloop.h"
 137 #include "cfglayout.h"
 138 #include "expr.h"
 139 #include "optabs.h"
 140 #include "params.h"
 141 #include "toplev.h"
 142 #include "tree-chrec.h"
 143 #include "tree-data-ref.h"
 144 #include "tree-scalar-evolution.h"
 145 #include "input.h"
 146 #include "tree-vectorizer.h"
 147 #include "tree-pass.h"
 148
 149 /*************************************************************************
 150   Simple Loop Peeling Utilities
 151  *************************************************************************/
 152 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
 153   (struct loop *, struct loops *, edge);
 154 static void slpeel_update_phis_for_duplicate_loop
 155   (struct loop *, struct loop *, bool after);
 156 static void slpeel_update_phi_nodes_for_guard1
 157   (edge, struct loop *, bool, basic_block *, bitmap *);
 158 static void slpeel_update_phi_nodes_for_guard2
 159   (edge, struct loop *, bool, basic_block *);
 160 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
 161
 162 static void rename_use_op (use_operand_p);
 163 static void rename_variables_in_bb (basic_block);
 164 static void rename_variables_in_loop (struct loop *);
 165
 166 /*************************************************************************
 167   General Vectorization Utilities
 168  *************************************************************************/
 169 static void vect_set_dump_settings (void);
 170
 171 /* vect_dump will be set to stderr or dump_file if exist.  */
 172 FILE *vect_dump;
 173
 174 /* vect_verbosity_level set to an invalid value
 175    to mark that it's uninitialized.  */
 176 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
 177
 178 /* Number of loops, at the beginning of vectorization.  */
 179 unsigned int vect_loops_num;
 180
 181 /* Loop location.  */
 182 static LOC vect_loop_location;
 183
 184 /* Bitmap of virtual variables to be renamed.  */
 185 bitmap vect_vnames_to_rename;
 186 \f
 187 /*************************************************************************
 188   Simple Loop Peeling Utilities
 189
 190   Utilities to support loop peeling for vectorization purposes.
 191  *************************************************************************/
 192
 193
 194 /* Renames the use *OP_P.  */
 195
 196 static void
 197 rename_use_op (use_operand_p op_p)
 198 {
 199   tree new_name;
 200
 201   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
 202     return;
 203
 204   new_name = get_current_def (USE_FROM_PTR (op_p));
 205
 206   /* Something defined outside of the loop.  */
 207   if (!new_name)
 208     return;
 209
 210   /* An ordinary ssa name defined in the loop.  */
 211
 212   SET_USE (op_p, new_name);
 213 }
 214
 215
 216 /* Renames the variables in basic block BB.  */
 217
 218 static void
 219 rename_variables_in_bb (basic_block bb)
 220 {
 221   tree phi;
 222   block_stmt_iterator bsi;
 223   tree stmt;
 224   use_operand_p use_p;
 225   ssa_op_iter iter;
 226   edge e;
 227   edge_iterator ei;
 228   struct loop *loop = bb->loop_father;
 229
 230   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 231     {
 232       stmt = bsi_stmt (bsi);
 233       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter,
 234                                  (SSA_OP_ALL_USES | SSA_OP_ALL_KILLS))
 235         rename_use_op (use_p);
 236     }
 237
 238   FOR_EACH_EDGE (e, ei, bb->succs)
 239     {
 240       if (!flow_bb_inside_loop_p (loop, e->dest))
 241         continue;
 242       for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
 243         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
 244     }
 245 }
 246
 247
 248 /* Renames variables in new generated LOOP.  */
 249
 250 static void
 251 rename_variables_in_loop (struct loop *loop)
 252 {
 253   unsigned i;
 254   basic_block *bbs;
 255
 256   bbs = get_loop_body (loop);
 257
 258   for (i = 0; i < loop->num_nodes; i++)
 259     rename_variables_in_bb (bbs[i]);
 260
 261   free (bbs);
 262 }
 263
 264
 265 /* Update the PHI nodes of NEW_LOOP.
 266
 267    NEW_LOOP is a duplicate of ORIG_LOOP.
 268    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 269    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 270    executes before it.  */
 271
 272 static void
 273 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 274                                        struct loop *new_loop, bool after)
 275 {
 276   tree new_ssa_name;
 277   tree phi_new, phi_orig;
 278   tree def;
 279   edge orig_loop_latch = loop_latch_edge (orig_loop);
 280   edge orig_entry_e = loop_preheader_edge (orig_loop);
 281   edge new_loop_exit_e = new_loop->single_exit;
 282   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 283   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 284
 285   /*
 286      step 1. For each loop-header-phi:
 287              Add the first phi argument for the phi in NEW_LOOP
 288             (the one associated with the entry of NEW_LOOP)
 289
 290      step 2. For each loop-header-phi:
 291              Add the second phi argument for the phi in NEW_LOOP
 292             (the one associated with the latch of NEW_LOOP)
 293
 294      step 3. Update the phis in the successor block of NEW_LOOP.
 295
 296         case 1: NEW_LOOP was placed before ORIG_LOOP:
 297                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 298                 Updating the phis in the successor block can therefore be done
 299                 along with the scanning of the loop header phis, because the
 300                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 301                 phi nodes, organized in the same order.
 302
 303         case 2: NEW_LOOP was placed after ORIG_LOOP:
 304                 The successor block of NEW_LOOP is the original exit block of
 305                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 306                 We postpone updating these phis to a later stage (when
 307                 loop guards are added).
 308    */
 309
 310
 311   /* Scan the phis in the headers of the old and new loops
 312      (they are organized in exactly the same order).  */
 313
 314   for (phi_new = phi_nodes (new_loop->header),
 315        phi_orig = phi_nodes (orig_loop->header);
 316        phi_new && phi_orig;
 317        phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
 318     {
 319       /* step 1.  */
 320       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 321       add_phi_arg (phi_new, def, new_loop_entry_e);
 322
 323       /* step 2.  */
 324       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 325       if (TREE_CODE (def) != SSA_NAME)
 326         continue;
 327
 328       new_ssa_name = get_current_def (def);
 329       if (!new_ssa_name)
 330         {
 331           /* This only happens if there are no definitions
 332              inside the loop. use the phi_result in this case.  */
 333           new_ssa_name = PHI_RESULT (phi_new);
 334         }
 335
 336       /* An ordinary ssa name defined in the loop.  */
 337       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
 338
 339       /* step 3 (case 1).  */
 340       if (!after)
 341         {
 342           gcc_assert (new_loop_exit_e == orig_entry_e);
 343           SET_PHI_ARG_DEF (phi_orig,
 344                            new_loop_exit_e->dest_idx,
 345                            new_ssa_name);
 346         }
 347     }
 348 }
 349
 350
 351 /* Update PHI nodes for a guard of the LOOP.
 352
 353    Input:
 354    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 355         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 356         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 357         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 358         We denote this bb NEW_MERGE_BB because before the guard code was added
 359         it had a single predecessor (the LOOP header), and now it became a merge
 360         point of two paths - the path that ends with the LOOP exit-edge, and
 361         the path that ends with GUARD_EDGE.
 362    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 363         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 364
 365    ===> The CFG before the guard-code was added:
 366         LOOP_header_bb:
 367           loop_body
 368           if (exit_loop) goto update_bb
 369           else           goto LOOP_header_bb
 370         update_bb:
 371
 372    ==> The CFG after the guard-code was added:
 373         guard_bb:
 374           if (LOOP_guard_condition) goto new_merge_bb
 375           else                      goto LOOP_header_bb
 376         LOOP_header_bb:
 377           loop_body
 378           if (exit_loop_condition) goto new_merge_bb
 379           else                     goto LOOP_header_bb
 380         new_merge_bb:
 381           goto update_bb
 382         update_bb:
 383
 384    ==> The CFG after this function:
 385         guard_bb:
 386           if (LOOP_guard_condition) goto new_merge_bb
 387           else                      goto LOOP_header_bb
 388         LOOP_header_bb:
 389           loop_body
 390           if (exit_loop_condition) goto new_exit_bb
 391           else                     goto LOOP_header_bb
 392         new_exit_bb:
 393         new_merge_bb:
 394           goto update_bb
 395         update_bb:
 396
 397    This function:
 398    1. creates and updates the relevant phi nodes to account for the new
 399       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 400       1.1. Create phi nodes at NEW_MERGE_BB.
 401       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 402            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 403    2. preserves loop-closed-ssa-form by creating the required phi nodes
 404       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 405
 406    There are two flavors to this function:
 407
 408    slpeel_update_phi_nodes_for_guard1:
 409      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 410      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 411      for variables that have phis in the loop header.
 412
 413    slpeel_update_phi_nodes_for_guard2:
 414      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 415      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 416      for variables that have phis in the loop exit.
 417
 418    I.E., the overall structure is:
 419
 420         loop1_preheader_bb:
 421                 guard1 (goto loop1/merg1_bb)
 422         loop1
 423         loop1_exit_bb:
 424                 guard2 (goto merge1_bb/merge2_bb)
 425         merge1_bb
 426         loop2
 427         loop2_exit_bb
 428         merge2_bb
 429         next_bb
 430
 431    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 432    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 433    that have phis in loop1->header).
 434
 435    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 436    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 437    that have phis in next_bb). It also adds some of these phis to
 438    loop1_exit_bb.
 439
 440    slpeel_update_phi_nodes_for_guard1 is always called before
 441    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 442    to create correct data-flow and loop-closed-ssa-form.
 443
 444    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 445    that change between iterations of a loop (and therefore have a phi-node
 446    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 447    phis for variables that are used out of the loop (and therefore have
 448    loop-closed exit phis). Some variables may be both updated between
 449    iterations and used after the loop. This is why in loop1_exit_bb we
 450    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 451    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 452
 453    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 454      an original loop. i.e., we have:
 455
 456            orig_loop
 457            guard_bb (goto LOOP/new_merge)
 458            new_loop <-- LOOP
 459            new_exit
 460            new_merge
 461            next_bb
 462
 463      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 464      have:
 465
 466            new_loop
 467            guard_bb (goto LOOP/new_merge)
 468            orig_loop <-- LOOP
 469            new_exit
 470            new_merge
 471            next_bb
 472
 473      The SSA names defined in the original loop have a current
 474      reaching definition that that records the corresponding new
 475      ssa-name used in the new duplicated loop copy.
 476   */
 477
 478 /* Function slpeel_update_phi_nodes_for_guard1
 479
 480    Input:
 481    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 482    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 483             information.
 484
 485    In the context of the overall structure, we have:
 486
 487         loop1_preheader_bb:
 488                 guard1 (goto loop1/merg1_bb)
 489 LOOP->  loop1
 490         loop1_exit_bb:
 491                 guard2 (goto merge1_bb/merge2_bb)
 492         merge1_bb
 493         loop2
 494         loop2_exit_bb
 495         merge2_bb
 496         next_bb
 497
 498    For each name updated between loop iterations (i.e - for each name that has
 499    an entry (loop-header) phi in LOOP) we create a new phi in:
 500    1. merge1_bb (to account for the edge from guard1)
 501    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 502 */
 503
 504 static void
 505 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 506                                     bool is_new_loop, basic_block *new_exit_bb,
 507                                     bitmap *defs)
 508 {
 509   tree orig_phi, new_phi;
 510   tree update_phi, update_phi2;
 511   tree guard_arg, loop_arg;
 512   basic_block new_merge_bb = guard_edge->dest;
 513   edge e = EDGE_SUCC (new_merge_bb, 0);
 514   basic_block update_bb = e->dest;
 515   basic_block orig_bb = loop->header;
 516   edge new_exit_e;
 517   tree current_new_name;
 518   tree name;
 519
 520   /* Create new bb between loop and new_merge_bb.  */
 521   *new_exit_bb = split_edge (loop->single_exit);
 522   add_bb_to_loop (*new_exit_bb, loop->outer);
 523
 524   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 525
 526   for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
 527        orig_phi && update_phi;
 528        orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
 529     {
 530       /* Virtual phi; Mark it for renaming. We actually want to call
 531          mar_sym_for_renaming, but since all ssa renaming datastructures
 532          are going to be freed before we get to call ssa_upate, we just
 533          record this name for now in a bitmap, and will mark it for
 534          renaming later.  */
 535       name = PHI_RESULT (orig_phi);
 536       if (!is_gimple_reg (SSA_NAME_VAR (name)))
 537         bitmap_set_bit (vect_vnames_to_rename, SSA_NAME_VERSION (name));
 538
 539       /** 1. Handle new-merge-point phis  **/
 540
 541       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 542       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 543                                  new_merge_bb);
 544
 545       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 546             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 547       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 548       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 549
 550       add_phi_arg (new_phi, loop_arg, new_exit_e);
 551       add_phi_arg (new_phi, guard_arg, guard_edge);
 552
 553       /* 1.3. Update phi in successor block.  */
 554       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 555                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 556       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 557       update_phi2 = new_phi;
 558
 559
 560       /** 2. Handle loop-closed-ssa-form phis  **/
 561
 562       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 563       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 564                                  *new_exit_bb);
 565
 566       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 567       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 568
 569       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 570       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 571       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 572
 573       /* 2.4. Record the newly created name with set_current_def.
 574          We want to find a name such that
 575                 name = get_current_def (orig_loop_name)
 576          and to set its current definition as follows:
 577                 set_current_def (name, new_phi_name)
 578
 579          If LOOP is a new loop then loop_arg is already the name we're
 580          looking for. If LOOP is the original loop, then loop_arg is
 581          the orig_loop_name and the relevant name is recorded in its
 582          current reaching definition.  */
 583       if (is_new_loop)
 584         current_new_name = loop_arg;
 585       else
 586         {
 587           current_new_name = get_current_def (loop_arg);
 588           /* current_def is not available only if the variable does not
 589              change inside the loop, in which case we also don't care
 590              about recording a current_def for it because we won't be
 591              trying to create loop-exit-phis for it.  */
 592           if (!current_new_name)
 593             continue;
 594         }
 595       gcc_assert (get_current_def (current_new_name) == NULL_TREE);
 596
 597       set_current_def (current_new_name, PHI_RESULT (new_phi));
 598       bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
 599     }
 600
 601   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 602 }
 603
 604
 605 /* Function slpeel_update_phi_nodes_for_guard2
 606
 607    Input:
 608    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 609
 610    In the context of the overall structure, we have:
 611
 612         loop1_preheader_bb:
 613                 guard1 (goto loop1/merg1_bb)
 614         loop1
 615         loop1_exit_bb:
 616                 guard2 (goto merge1_bb/merge2_bb)
 617         merge1_bb
 618 LOOP->  loop2
 619         loop2_exit_bb
 620         merge2_bb
 621         next_bb
 622
 623    For each name used out side the loop (i.e - for each name that has an exit
 624    phi in next_bb) we create a new phi in:
 625    1. merge2_bb (to account for the edge from guard_bb)
 626    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 627    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 628       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 629 */
 630
 631 static void
 632 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 633                                     bool is_new_loop, basic_block *new_exit_bb)
 634 {
 635   tree orig_phi, new_phi;
 636   tree update_phi, update_phi2;
 637   tree guard_arg, loop_arg;
 638   basic_block new_merge_bb = guard_edge->dest;
 639   edge e = EDGE_SUCC (new_merge_bb, 0);
 640   basic_block update_bb = e->dest;
 641   edge new_exit_e;
 642   tree orig_def, orig_def_new_name;
 643   tree new_name, new_name2;
 644   tree arg;
 645
 646   /* Create new bb between loop and new_merge_bb.  */
 647   *new_exit_bb = split_edge (loop->single_exit);
 648   add_bb_to_loop (*new_exit_bb, loop->outer);
 649
 650   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 651
 652   for (update_phi = phi_nodes (update_bb); update_phi;
 653        update_phi = PHI_CHAIN (update_phi))
 654     {
 655       orig_phi = update_phi;
 656       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 657       /* This loop-closed-phi actually doesn't represent a use
 658          out of the loop - the phi arg is a constant.  */
 659       if (TREE_CODE (orig_def) != SSA_NAME)
 660         continue;
 661       orig_def_new_name = get_current_def (orig_def);
 662       arg = NULL_TREE;
 663
 664       /** 1. Handle new-merge-point phis  **/
 665
 666       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 667       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 668                                  new_merge_bb);
 669
 670       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 671             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 672       new_name = orig_def;
 673       new_name2 = NULL_TREE;
 674       if (orig_def_new_name)
 675         {
 676           new_name = orig_def_new_name;
 677           /* Some variables have both loop-entry-phis and loop-exit-phis.
 678              Such variables were given yet newer names by phis placed in
 679              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 680              new_name2 = get_current_def (get_current_def (orig_name)).  */
 681           new_name2 = get_current_def (new_name);
 682         }
 683
 684       if (is_new_loop)
 685         {
 686           guard_arg = orig_def;
 687           loop_arg = new_name;
 688         }
 689       else
 690         {
 691           guard_arg = new_name;
 692           loop_arg = orig_def;
 693         }
 694       if (new_name2)
 695         guard_arg = new_name2;
 696
 697       add_phi_arg (new_phi, loop_arg, new_exit_e);
 698       add_phi_arg (new_phi, guard_arg, guard_edge);
 699
 700       /* 1.3. Update phi in successor block.  */
 701       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 702       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 703       update_phi2 = new_phi;
 704
 705
 706       /** 2. Handle loop-closed-ssa-form phis  **/
 707
 708       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 709       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 710                                  *new_exit_bb);
 711
 712       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 713       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 714
 715       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 716       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 717       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 718
 719
 720       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 721
 722       /* 3.1. Find the relevant names that need an exit-phi in
 723          GUARD_BB, i.e. names for which
 724          slpeel_update_phi_nodes_for_guard1 had not already created a
 725          phi node. This is the case for names that are used outside
 726          the loop (and therefore need an exit phi) but are not updated
 727          across loop iterations (and therefore don't have a
 728          loop-header-phi).
 729
 730          slpeel_update_phi_nodes_for_guard1 is responsible for
 731          creating loop-exit phis in GUARD_BB for names that have a
 732          loop-header-phi.  When such a phi is created we also record
 733          the new name in its current definition.  If this new name
 734          exists, then guard_arg was set to this new name (see 1.2
 735          above).  Therefore, if guard_arg is not this new name, this
 736          is an indication that an exit-phi in GUARD_BB was not yet
 737          created, so we take care of it here.  */
 738       if (guard_arg == new_name2)
 739         continue;
 740       arg = guard_arg;
 741
 742       /* 3.2. Generate new phi node in GUARD_BB:  */
 743       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 744                                  guard_edge->src);
 745
 746       /* 3.3. GUARD_BB has one incoming edge:  */
 747       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 748       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
 749
 750       /* 3.4. Update phi in successor of GUARD_BB:  */
 751       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 752                                                                 == guard_arg);
 753       SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
 754     }
 755
 756   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 757 }
 758
 759
 760 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 761    that starts at zero, increases by one and its limit is NITERS.
 762
 763    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 764
 765 void
 766 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 767 {
 768   tree indx_before_incr, indx_after_incr, cond_stmt, cond;
 769   tree orig_cond;
 770   edge exit_edge = loop->single_exit;
 771   block_stmt_iterator loop_cond_bsi;
 772   block_stmt_iterator incr_bsi;
 773   bool insert_after;
 774   tree begin_label = tree_block_label (loop->latch);
 775   tree exit_label = tree_block_label (loop->single_exit->dest);
 776   tree init = build_int_cst (TREE_TYPE (niters), 0);
 777   tree step = build_int_cst (TREE_TYPE (niters), 1);
 778   tree then_label;
 779   tree else_label;
 780   LOC loop_loc;
 781
 782   orig_cond = get_loop_exit_condition (loop);
 783   gcc_assert (orig_cond);
 784   loop_cond_bsi = bsi_for_stmt (orig_cond);
 785
 786   standard_iv_increment_position (loop, &incr_bsi, &insert_after);
 787   create_iv (init, step, NULL_TREE, loop,
 788              &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
 789
 790   if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
 791     {
 792       cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
 793       then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 794       else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 795     }
 796   else /* 'then' edge loops back.  */
 797     {
 798       cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
 799       then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 800       else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 801     }
 802
 803   cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
 804                      then_label, else_label);
 805   bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
 806
 807   /* Remove old loop exit test:  */
 808   bsi_remove (&loop_cond_bsi, true);
 809
 810   loop_loc = find_loop_location (loop);
 811   if (dump_file && (dump_flags & TDF_DETAILS))
 812     {
 813       if (loop_loc != UNKNOWN_LOC)
 814         fprintf (dump_file, "\nloop at %s:%d: ",
 815                  LOC_FILE (loop_loc), LOC_LINE (loop_loc));
 816       print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
 817     }
 818
 819   loop->nb_iterations = niters;
 820 }
 821
 822
 823 /* Given LOOP this function generates a new copy of it and puts it
 824    on E which is either the entry or exit of LOOP.  */
 825
 826 static struct loop *
 827 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
 828                                         edge e)
 829 {
 830   struct loop *new_loop;
 831   basic_block *new_bbs, *bbs;
 832   bool at_exit;
 833   bool was_imm_dom;
 834   basic_block exit_dest;
 835   tree phi, phi_arg;
 836
 837   at_exit = (e == loop->single_exit);
 838   if (!at_exit && e != loop_preheader_edge (loop))
 839     return NULL;
 840
 841   bbs = get_loop_body (loop);
 842
 843   /* Check whether duplication is possible.  */
 844   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 845     {
 846       free (bbs);
 847       return NULL;
 848     }
 849
 850   /* Generate new loop structure.  */
 851   new_loop = duplicate_loop (loops, loop, loop->outer);
 852   if (!new_loop)
 853     {
 854       free (bbs);
 855       return NULL;
 856     }
 857
 858   exit_dest = loop->single_exit->dest;
 859   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 860                                           exit_dest) == loop->header ?
 861                  true : false);
 862
 863   new_bbs = XNEWVEC (basic_block, loop->num_nodes);
 864
 865   copy_bbs (bbs, loop->num_nodes, new_bbs,
 866             &loop->single_exit, 1, &new_loop->single_exit, NULL,
 867             e->src);
 868
 869   /* Duplicating phi args at exit bbs as coming
 870      also from exit of duplicated loop.  */
 871   for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
 872     {
 873       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit);
 874       if (phi_arg)
 875         {
 876           edge new_loop_exit_edge;
 877
 878           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 879             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 880           else
 881             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 882
 883           add_phi_arg (phi, phi_arg, new_loop_exit_edge);
 884         }
 885     }
 886
 887   if (at_exit) /* Add the loop copy at exit.  */
 888     {
 889       redirect_edge_and_branch_force (e, new_loop->header);
 890       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 891       if (was_imm_dom)
 892         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 893     }
 894   else /* Add the copy at entry.  */
 895     {
 896       edge new_exit_e;
 897       edge entry_e = loop_preheader_edge (loop);
 898       basic_block preheader = entry_e->src;
 899
 900       if (!flow_bb_inside_loop_p (new_loop,
 901                                   EDGE_SUCC (new_loop->header, 0)->dest))
 902         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 903       else
 904         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 905
 906       redirect_edge_and_branch_force (new_exit_e, loop->header);
 907       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 908                                new_exit_e->src);
 909
 910       /* We have to add phi args to the loop->header here as coming
 911          from new_exit_e edge.  */
 912       for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 913         {
 914           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 915           if (phi_arg)
 916             add_phi_arg (phi, phi_arg, new_exit_e);
 917         }
 918
 919       redirect_edge_and_branch_force (entry_e, new_loop->header);
 920       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 921     }
 922
 923   free (new_bbs);
 924   free (bbs);
 925
 926   return new_loop;
 927 }
 928
 929
 930 /* Given the condition statement COND, put it as the last statement
 931    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 932    Assumes that this is the single exit of the guarded loop.
 933    Returns the skip edge.  */
 934
 935 static edge
 936 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
 937                         basic_block dom_bb)
 938 {
 939   block_stmt_iterator bsi;
 940   edge new_e, enter_e;
 941   tree cond_stmt, then_label, else_label;
 942
 943   enter_e = EDGE_SUCC (guard_bb, 0);
 944   enter_e->flags &= ~EDGE_FALLTHRU;
 945   enter_e->flags |= EDGE_FALSE_VALUE;
 946   bsi = bsi_last (guard_bb);
 947
 948   then_label = build1 (GOTO_EXPR, void_type_node,
 949                        tree_block_label (exit_bb));
 950   else_label = build1 (GOTO_EXPR, void_type_node,
 951                        tree_block_label (enter_e->dest));
 952   cond_stmt = build3 (COND_EXPR, void_type_node, cond,
 953                      then_label, else_label);
 954   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
 955   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 956   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 957   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 958   return new_e;
 959 }
 960
 961
 962 /* This function verifies that the following restrictions apply to LOOP:
 963    (1) it is innermost
 964    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 965    (3) it is single entry, single exit
 966    (4) its exit condition is the last stmt in the header
 967    (5) E is the entry/exit edge of LOOP.
 968  */
 969
 970 bool
 971 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
 972 {
 973   edge exit_e = loop->single_exit;
 974   edge entry_e = loop_preheader_edge (loop);
 975   tree orig_cond = get_loop_exit_condition (loop);
 976   block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
 977
 978   if (need_ssa_update_p ())
 979     return false;
 980
 981   if (loop->inner
 982       /* All loops have an outer scope; the only case loop->outer is NULL is for
 983          the function itself.  */
 984       || !loop->outer
 985       || loop->num_nodes != 2
 986       || !empty_block_p (loop->latch)
 987       || !loop->single_exit
 988       /* Verify that new loop exit condition can be trivially modified.  */
 989       || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
 990       || (e != exit_e && e != entry_e))
 991     return false;
 992
 993   return true;
 994 }
 995
 996 #ifdef ENABLE_CHECKING
 997 void
 998 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 999                                  struct loop *second_loop)
1000 {
1001   basic_block loop1_exit_bb = first_loop->single_exit->dest;
1002   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1003   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1004
1005   /* A guard that controls whether the second_loop is to be executed or skipped
1006      is placed in first_loop->exit.  first_loopt->exit therefore has two
1007      successors - one is the preheader of second_loop, and the other is a bb
1008      after second_loop.
1009    */
1010   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1011
1012   /* 1. Verify that one of the successors of first_loopt->exit is the preheader
1013         of second_loop.  */
1014
1015   /* The preheader of new_loop is expected to have two predecessors:
1016      first_loop->exit and the block that precedes first_loop.  */
1017
1018   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1019               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1020                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1021                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1022                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1023
1024   /* Verify that the other successor of first_loopt->exit is after the
1025      second_loop.  */
1026   /* TODO */
1027 }
1028 #endif
1029
1030 /* Function slpeel_tree_peel_loop_to_edge.
1031
1032    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1033    that is placed on the entry (exit) edge E of LOOP. After this transformation
1034    we have two loops one after the other - first-loop iterates FIRST_NITERS
1035    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1036
1037    Input:
1038    - LOOP: the loop to be peeled.
1039    - E: the exit or entry edge of LOOP.
1040         If it is the entry edge, we peel the first iterations of LOOP. In this
1041         case first-loop is LOOP, and second-loop is the newly created loop.
1042         If it is the exit edge, we peel the last iterations of LOOP. In this
1043         case, first-loop is the newly created loop, and second-loop is LOOP.
1044    - NITERS: the number of iterations that LOOP iterates.
1045    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1046    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1047         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1048         is false, the caller of this function may want to take care of this
1049         (this can be useful if we don't want new stmts added to first-loop).
1050
1051    Output:
1052    The function returns a pointer to the new loop-copy, or NULL if it failed
1053    to perform the transformation.
1054
1055    The function generates two if-then-else guards: one before the first loop,
1056    and the other before the second loop:
1057    The first guard is:
1058      if (FIRST_NITERS == 0) then skip the first loop,
1059      and go directly to the second loop.
1060    The second guard is:
1061      if (FIRST_NITERS == NITERS) then skip the second loop.
1062
1063    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1064    FORNOW the resulting code will not be in loop-closed-ssa form.
1065 */
1066
1067 struct loop*
1068 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
1069                                edge e, tree first_niters,
1070                                tree niters, bool update_first_loop_count)
1071 {
1072   struct loop *new_loop = NULL, *first_loop, *second_loop;
1073   edge skip_e;
1074   tree pre_condition;
1075   bitmap definitions;
1076   basic_block bb_before_second_loop, bb_after_second_loop;
1077   basic_block bb_before_first_loop;
1078   basic_block bb_between_loops;
1079   basic_block new_exit_bb;
1080   edge exit_e = loop->single_exit;
1081   LOC loop_loc;
1082
1083   if (!slpeel_can_duplicate_loop_p (loop, e))
1084     return NULL;
1085
1086   /* We have to initialize cfg_hooks. Then, when calling
1087    cfg_hooks->split_edge, the function tree_split_edge
1088    is actually called and, when calling cfg_hooks->duplicate_block,
1089    the function tree_duplicate_bb is called.  */
1090   tree_register_cfg_hooks ();
1091
1092
1093   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1094         Resulting CFG would be:
1095
1096         first_loop:
1097         do {
1098         } while ...
1099
1100         second_loop:
1101         do {
1102         } while ...
1103
1104         orig_exit_bb:
1105    */
1106
1107   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1108     {
1109       loop_loc = find_loop_location (loop);
1110       if (dump_file && (dump_flags & TDF_DETAILS))
1111         {
1112           if (loop_loc != UNKNOWN_LOC)
1113             fprintf (dump_file, "\n%s:%d: note: ",
1114                      LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1115           fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1116         }
1117       return NULL;
1118     }
1119
1120   if (e == exit_e)
1121     {
1122       /* NEW_LOOP was placed after LOOP.  */
1123       first_loop = loop;
1124       second_loop = new_loop;
1125     }
1126   else
1127     {
1128       /* NEW_LOOP was placed before LOOP.  */
1129       first_loop = new_loop;
1130       second_loop = loop;
1131     }
1132
1133   definitions = ssa_names_to_replace ();
1134   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1135   rename_variables_in_loop (new_loop);
1136
1137
1138   /* 2. Add the guard that controls whether the first loop is executed.
1139         Resulting CFG would be:
1140
1141         bb_before_first_loop:
1142         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1143                                GOTO first-loop
1144
1145         first_loop:
1146         do {
1147         } while ...
1148
1149         bb_before_second_loop:
1150
1151         second_loop:
1152         do {
1153         } while ...
1154
1155         orig_exit_bb:
1156    */
1157
1158   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1159   add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1160   bb_before_second_loop = split_edge (first_loop->single_exit);
1161   add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1162
1163   pre_condition =
1164     fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1165                  build_int_cst (TREE_TYPE (first_niters), 0));
1166   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1167                                   bb_before_second_loop, bb_before_first_loop);
1168   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1169                                       first_loop == new_loop,
1170                                       &new_exit_bb, &definitions);
1171
1172
1173   /* 3. Add the guard that controls whether the second loop is executed.
1174         Resulting CFG would be:
1175
1176         bb_before_first_loop:
1177         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1178                                GOTO first-loop
1179
1180         first_loop:
1181         do {
1182         } while ...
1183
1184         bb_between_loops:
1185         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1186                                     GOTO bb_before_second_loop
1187
1188         bb_before_second_loop:
1189
1190         second_loop:
1191         do {
1192         } while ...
1193
1194         bb_after_second_loop:
1195
1196         orig_exit_bb:
1197    */
1198
1199   bb_between_loops = new_exit_bb;
1200   bb_after_second_loop = split_edge (second_loop->single_exit);
1201   add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1202
1203   pre_condition =
1204         fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1205   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1206                                   bb_after_second_loop, bb_before_first_loop);
1207   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1208                                      second_loop == new_loop, &new_exit_bb);
1209
1210   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1211    */
1212   if (update_first_loop_count)
1213     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1214
1215   BITMAP_FREE (definitions);
1216   delete_update_ssa ();
1217
1218   return new_loop;
1219 }
1220
1221 /* Function vect_get_loop_location.
1222
1223    Extract the location of the loop in the source code.
1224    If the loop is not well formed for vectorization, an estimated
1225    location is calculated.
1226    Return the loop location if succeed and NULL if not.  */
1227
1228 LOC
1229 find_loop_location (struct loop *loop)
1230 {
1231   tree node = NULL_TREE;
1232   basic_block bb;
1233   block_stmt_iterator si;
1234
1235   if (!loop)
1236     return UNKNOWN_LOC;
1237
1238   node = get_loop_exit_condition (loop);
1239
1240   if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1241       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1242     return EXPR_LOC (node);
1243
1244   /* If we got here the loop is probably not "well formed",
1245      try to estimate the loop location */
1246
1247   if (!loop->header)
1248     return UNKNOWN_LOC;
1249
1250   bb = loop->header;
1251
1252   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1253     {
1254       node = bsi_stmt (si);
1255       if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1256         return EXPR_LOC (node);
1257     }
1258
1259   return UNKNOWN_LOC;
1260 }
1261
1262
1263 /*************************************************************************
1264   Vectorization Debug Information.
1265  *************************************************************************/
1266
1267 /* Function vect_set_verbosity_level.
1268
1269    Called from toplev.c upon detection of the
1270    -ftree-vectorizer-verbose=N option.  */
1271
1272 void
1273 vect_set_verbosity_level (const char *val)
1274 {
1275    unsigned int vl;
1276
1277    vl = atoi (val);
1278    if (vl < MAX_VERBOSITY_LEVEL)
1279      vect_verbosity_level = vl;
1280    else
1281      vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1282 }
1283
1284
1285 /* Function vect_set_dump_settings.
1286
1287    Fix the verbosity level of the vectorizer if the
1288    requested level was not set explicitly using the flag
1289    -ftree-vectorizer-verbose=N.
1290    Decide where to print the debugging information (dump_file/stderr).
1291    If the user defined the verbosity level, but there is no dump file,
1292    print to stderr, otherwise print to the dump file.  */
1293
1294 static void
1295 vect_set_dump_settings (void)
1296 {
1297   vect_dump = dump_file;
1298
1299   /* Check if the verbosity level was defined by the user:  */
1300   if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1301     {
1302       /* If there is no dump file, print to stderr.  */
1303       if (!dump_file)
1304         vect_dump = stderr;
1305       return;
1306     }
1307
1308   /* User didn't specify verbosity level:  */
1309   if (dump_file && (dump_flags & TDF_DETAILS))
1310     vect_verbosity_level = REPORT_DETAILS;
1311   else if (dump_file && (dump_flags & TDF_STATS))
1312     vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1313   else
1314     vect_verbosity_level = REPORT_NONE;
1315
1316   gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1317 }
1318
1319
1320 /* Function debug_loop_details.
1321
1322    For vectorization debug dumps.  */
1323
1324 bool
1325 vect_print_dump_info (enum verbosity_levels vl)
1326 {
1327   if (vl > vect_verbosity_level)
1328     return false;
1329
1330   if (vect_loop_location == UNKNOWN_LOC)
1331     fprintf (vect_dump, "\n%s:%d: note: ",
1332                  DECL_SOURCE_FILE (current_function_decl),
1333                  DECL_SOURCE_LINE (current_function_decl));
1334   else
1335     fprintf (vect_dump, "\n%s:%d: note: ",
1336              LOC_FILE (vect_loop_location), LOC_LINE (vect_loop_location));
1337
1338
1339   return true;
1340 }
1341
1342
1343 /*************************************************************************
1344   Vectorization Utilities.
1345  *************************************************************************/
1346
1347 /* Function new_stmt_vec_info.
1348
1349    Create and initialize a new stmt_vec_info struct for STMT.  */
1350
1351 stmt_vec_info
1352 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1353 {
1354   stmt_vec_info res;
1355   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1356
1357   STMT_VINFO_TYPE (res) = undef_vec_info_type;
1358   STMT_VINFO_STMT (res) = stmt;
1359   STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1360   STMT_VINFO_RELEVANT_P (res) = 0;
1361   STMT_VINFO_LIVE_P (res) = 0;
1362   STMT_VINFO_VECTYPE (res) = NULL;
1363   STMT_VINFO_VEC_STMT (res) = NULL;
1364   STMT_VINFO_IN_PATTERN_P (res) = false;
1365   STMT_VINFO_RELATED_STMT (res) = NULL;
1366   STMT_VINFO_DATA_REF (res) = NULL;
1367   if (TREE_CODE (stmt) == PHI_NODE)
1368     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
1369   else
1370     STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
1371   STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
1372
1373   return res;
1374 }
1375
1376
1377 /* Function new_loop_vec_info.
1378
1379    Create and initialize a new loop_vec_info struct for LOOP, as well as
1380    stmt_vec_info structs for all the stmts in LOOP.  */
1381
1382 loop_vec_info
1383 new_loop_vec_info (struct loop *loop)
1384 {
1385   loop_vec_info res;
1386   basic_block *bbs;
1387   block_stmt_iterator si;
1388   unsigned int i;
1389
1390   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1391
1392   bbs = get_loop_body (loop);
1393
1394   /* Create stmt_info for all stmts in the loop.  */
1395   for (i = 0; i < loop->num_nodes; i++)
1396     {
1397       basic_block bb = bbs[i];
1398       tree phi;
1399
1400       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1401         {
1402           tree_ann_t ann = get_tree_ann (phi);
1403           set_stmt_info (ann, new_stmt_vec_info (phi, res));
1404         }
1405
1406       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1407         {
1408           tree stmt = bsi_stmt (si);
1409           stmt_ann_t ann;
1410
1411           ann = stmt_ann (stmt);
1412           set_stmt_info ((tree_ann_t)ann, new_stmt_vec_info (stmt, res));
1413         }
1414     }
1415
1416   LOOP_VINFO_LOOP (res) = loop;
1417   LOOP_VINFO_BBS (res) = bbs;
1418   LOOP_VINFO_EXIT_COND (res) = NULL;
1419   LOOP_VINFO_NITERS (res) = NULL;
1420   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1421   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
1422   LOOP_VINFO_VECT_FACTOR (res) = 0;
1423   LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
1424   LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
1425   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1426   LOOP_VINFO_MAY_MISALIGN_STMTS (res)
1427     = VEC_alloc (tree, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS));
1428
1429   return res;
1430 }
1431
1432
1433 /* Function destroy_loop_vec_info.
1434
1435    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1436    stmts in the loop.  */
1437
1438 void
1439 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1440 {
1441   struct loop *loop;
1442   basic_block *bbs;
1443   int nbbs;
1444   block_stmt_iterator si;
1445   int j;
1446
1447   if (!loop_vinfo)
1448     return;
1449
1450   loop = LOOP_VINFO_LOOP (loop_vinfo);
1451
1452   bbs = LOOP_VINFO_BBS (loop_vinfo);
1453   nbbs = loop->num_nodes;
1454
1455   for (j = 0; j < nbbs; j++)
1456     {
1457       basic_block bb = bbs[j];
1458       tree phi;
1459       stmt_vec_info stmt_info;
1460
1461       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1462         {
1463           tree_ann_t ann = get_tree_ann (phi);
1464
1465           stmt_info = vinfo_for_stmt (phi);
1466           free (stmt_info);
1467           set_stmt_info (ann, NULL);
1468         }
1469
1470       for (si = bsi_start (bb); !bsi_end_p (si); )
1471         {
1472           tree stmt = bsi_stmt (si);
1473           stmt_ann_t ann = stmt_ann (stmt);
1474           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1475
1476           if (stmt_info)
1477             {
1478               /* Check if this is a "pattern stmt" (introduced by the
1479                  vectorizer during the pattern recognition pass).  */
1480               bool remove_stmt_p = false;
1481               tree orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1482               if (orig_stmt)
1483                 {
1484                   stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
1485                   if (orig_stmt_info
1486                       && STMT_VINFO_IN_PATTERN_P (orig_stmt_info))
1487                     remove_stmt_p = true;
1488                 }
1489
1490               /* Free stmt_vec_info.  */
1491               VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1492               free (stmt_info);
1493               set_stmt_info ((tree_ann_t)ann, NULL);
1494
1495               /* Remove dead "pattern stmts".  */
1496               if (remove_stmt_p)
1497                 bsi_remove (&si, true);
1498             }
1499           bsi_next (&si);
1500         }
1501     }
1502
1503   free (LOOP_VINFO_BBS (loop_vinfo));
1504   free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
1505   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
1506   VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
1507
1508   free (loop_vinfo);
1509 }
1510
1511
1512 /* Function vect_force_dr_alignment_p.
1513
1514    Returns whether the alignment of a DECL can be forced to be aligned
1515    on ALIGNMENT bit boundary.  */
1516
1517 bool
1518 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1519 {
1520   if (TREE_CODE (decl) != VAR_DECL)
1521     return false;
1522
1523   if (DECL_EXTERNAL (decl))
1524     return false;
1525
1526   if (TREE_ASM_WRITTEN (decl))
1527     return false;
1528
1529   if (TREE_STATIC (decl))
1530     return (alignment <= MAX_OFILE_ALIGNMENT);
1531   else
1532     /* This is not 100% correct.  The absolute correct stack alignment
1533        is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1534        PREFERRED_STACK_BOUNDARY is honored by all translation units.
1535        However, until someone implements forced stack alignment, SSE
1536        isn't really usable without this.  */
1537     return (alignment <= PREFERRED_STACK_BOUNDARY);
1538 }
1539
1540
1541 /* Function get_vectype_for_scalar_type.
1542
1543    Returns the vector type corresponding to SCALAR_TYPE as supported
1544    by the target.  */
1545
1546 tree
1547 get_vectype_for_scalar_type (tree scalar_type)
1548 {
1549   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1550   int nbytes = GET_MODE_SIZE (inner_mode);
1551   int nunits;
1552   tree vectype;
1553
1554   if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD)
1555     return NULL_TREE;
1556
1557   /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1558      is expected.  */
1559   nunits = UNITS_PER_SIMD_WORD / nbytes;
1560
1561   vectype = build_vector_type (scalar_type, nunits);
1562   if (vect_print_dump_info (REPORT_DETAILS))
1563     {
1564       fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1565       print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1566     }
1567
1568   if (!vectype)
1569     return NULL_TREE;
1570
1571   if (vect_print_dump_info (REPORT_DETAILS))
1572     {
1573       fprintf (vect_dump, "vectype: ");
1574       print_generic_expr (vect_dump, vectype, TDF_SLIM);
1575     }
1576
1577   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1578       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
1579     {
1580       if (vect_print_dump_info (REPORT_DETAILS))
1581         fprintf (vect_dump, "mode not supported by target.");
1582       return NULL_TREE;
1583     }
1584
1585   return vectype;
1586 }
1587
1588
1589 /* Function vect_supportable_dr_alignment
1590
1591    Return whether the data reference DR is supported with respect to its
1592    alignment.  */
1593
1594 enum dr_alignment_support
1595 vect_supportable_dr_alignment (struct data_reference *dr)
1596 {
1597   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
1598   enum machine_mode mode = (int) TYPE_MODE (vectype);
1599
1600   if (aligned_access_p (dr))
1601     return dr_aligned;
1602
1603   /* Possibly unaligned access.  */
1604
1605   if (DR_IS_READ (dr))
1606     {
1607       if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1608           && (!targetm.vectorize.builtin_mask_for_load
1609               || targetm.vectorize.builtin_mask_for_load ()))
1610         return dr_unaligned_software_pipeline;
1611
1612       if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1613         /* Can't software pipeline the loads, but can at least do them.  */
1614         return dr_unaligned_supported;
1615     }
1616
1617   /* Unsupported.  */
1618   return dr_unaligned_unsupported;
1619 }
1620
1621
1622 /* Function vect_is_simple_use.
1623
1624    Input:
1625    LOOP - the loop that is being vectorized.
1626    OPERAND - operand of a stmt in LOOP.
1627    DEF - the defining stmt in case OPERAND is an SSA_NAME.
1628
1629    Returns whether a stmt with OPERAND can be vectorized.
1630    Supportable operands are constants, loop invariants, and operands that are
1631    defined by the current iteration of the loop. Unsupportable operands are
1632    those that are defined by a previous iteration of the loop (as is the case
1633    in reduction/induction computations).  */
1634
1635 bool
1636 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
1637                     tree *def, enum vect_def_type *dt)
1638 {
1639   basic_block bb;
1640   stmt_vec_info stmt_vinfo;
1641   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1642
1643   *def_stmt = NULL_TREE;
1644   *def = NULL_TREE;
1645
1646   if (vect_print_dump_info (REPORT_DETAILS))
1647     {
1648       fprintf (vect_dump, "vect_is_simple_use: operand ");
1649       print_generic_expr (vect_dump, operand, TDF_SLIM);
1650     }
1651
1652   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1653     {
1654       *dt = vect_constant_def;
1655       return true;
1656     }
1657
1658   if (TREE_CODE (operand) != SSA_NAME)
1659     {
1660       if (vect_print_dump_info (REPORT_DETAILS))
1661         fprintf (vect_dump, "not ssa-name.");
1662       return false;
1663     }
1664
1665   *def_stmt = SSA_NAME_DEF_STMT (operand);
1666   if (*def_stmt == NULL_TREE )
1667     {
1668       if (vect_print_dump_info (REPORT_DETAILS))
1669         fprintf (vect_dump, "no def_stmt.");
1670       return false;
1671     }
1672
1673   if (vect_print_dump_info (REPORT_DETAILS))
1674     {
1675       fprintf (vect_dump, "def_stmt: ");
1676       print_generic_expr (vect_dump, *def_stmt, TDF_SLIM);
1677     }
1678
1679   /* empty stmt is expected only in case of a function argument.
1680      (Otherwise - we expect a phi_node or a modify_expr).  */
1681   if (IS_EMPTY_STMT (*def_stmt))
1682     {
1683       tree arg = TREE_OPERAND (*def_stmt, 0);
1684       if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1685         {
1686           *def = operand;
1687           *dt = vect_invariant_def;
1688           return true;
1689         }
1690
1691       if (vect_print_dump_info (REPORT_DETAILS))
1692         fprintf (vect_dump, "Unexpected empty stmt.");
1693       return false;
1694     }
1695
1696   bb = bb_for_stmt (*def_stmt);
1697   if (!flow_bb_inside_loop_p (loop, bb))
1698     *dt = vect_invariant_def;
1699   else
1700     {
1701       stmt_vinfo = vinfo_for_stmt (*def_stmt);
1702       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
1703     }
1704
1705   if (*dt == vect_unknown_def_type)
1706     {
1707       if (vect_print_dump_info (REPORT_DETAILS))
1708         fprintf (vect_dump, "Unsupported pattern.");
1709       return false;
1710     }
1711
1712   /* stmts inside the loop that have been identified as performing
1713      a reduction operation cannot have uses in the loop.  */
1714   if (*dt == vect_reduction_def && TREE_CODE (*def_stmt) != PHI_NODE)
1715     {
1716       if (vect_print_dump_info (REPORT_DETAILS))
1717         fprintf (vect_dump, "reduction used in loop.");
1718       return false;
1719     }
1720
1721   if (vect_print_dump_info (REPORT_DETAILS))
1722     fprintf (vect_dump, "type of def: %d.",*dt);
1723
1724   switch (TREE_CODE (*def_stmt))
1725     {
1726     case PHI_NODE:
1727       *def = PHI_RESULT (*def_stmt);
1728       gcc_assert (*dt == vect_induction_def || *dt == vect_reduction_def
1729                   || *dt == vect_invariant_def);
1730       break;
1731
1732     case MODIFY_EXPR:
1733       *def = TREE_OPERAND (*def_stmt, 0);
1734       gcc_assert (*dt == vect_loop_def || *dt == vect_invariant_def);
1735       break;
1736
1737     default:
1738       if (vect_print_dump_info (REPORT_DETAILS))
1739         fprintf (vect_dump, "unsupported defining stmt: ");
1740       return false;
1741     }
1742
1743   if (*dt == vect_induction_def)
1744     {
1745       if (vect_print_dump_info (REPORT_DETAILS))
1746         fprintf (vect_dump, "induction not supported.");
1747       return false;
1748     }
1749
1750   return true;
1751 }
1752
1753
1754 /* Function reduction_code_for_scalar_code
1755
1756    Input:
1757    CODE - tree_code of a reduction operations.
1758
1759    Output:
1760    REDUC_CODE - the corresponding tree-code to be used to reduce the
1761       vector of partial results into a single scalar result (which
1762       will also reside in a vector).
1763
1764    Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise.  */
1765
1766 bool
1767 reduction_code_for_scalar_code (enum tree_code code,
1768                                 enum tree_code *reduc_code)
1769 {
1770   switch (code)
1771   {
1772   case MAX_EXPR:
1773     *reduc_code = REDUC_MAX_EXPR;
1774     return true;
1775
1776   case MIN_EXPR:
1777     *reduc_code = REDUC_MIN_EXPR;
1778     return true;
1779
1780   case PLUS_EXPR:
1781     *reduc_code = REDUC_PLUS_EXPR;
1782     return true;
1783
1784   default:
1785     return false;
1786   }
1787 }
1788
1789
1790 /* Function vect_is_simple_reduction
1791
1792    Detect a cross-iteration def-use cucle that represents a simple
1793    reduction computation. We look for the following pattern:
1794
1795    loop_header:
1796      a1 = phi < a0, a2 >
1797      a3 = ...
1798      a2 = operation (a3, a1)
1799
1800    such that:
1801    1. operation is commutative and associative and it is safe to
1802       change the order of the computation.
1803    2. no uses for a2 in the loop (a2 is used out of the loop)
1804    3. no uses of a1 in the loop besides the reduction operation.
1805
1806    Condition 1 is tested here.
1807    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.  */
1808
1809 tree
1810 vect_is_simple_reduction (struct loop *loop, tree phi)
1811 {
1812   edge latch_e = loop_latch_edge (loop);
1813   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
1814   tree def_stmt, def1, def2;
1815   enum tree_code code;
1816   int op_type;
1817   tree operation, op1, op2;
1818   tree type;
1819
1820   if (TREE_CODE (loop_arg) != SSA_NAME)
1821     {
1822       if (vect_print_dump_info (REPORT_DETAILS))
1823         {
1824           fprintf (vect_dump, "reduction: not ssa_name: ");
1825           print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
1826         }
1827       return NULL_TREE;
1828     }
1829
1830   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
1831   if (!def_stmt)
1832     {
1833       if (vect_print_dump_info (REPORT_DETAILS))
1834         fprintf (vect_dump, "reduction: no def_stmt.");
1835       return NULL_TREE;
1836     }
1837
1838   if (TREE_CODE (def_stmt) != MODIFY_EXPR)
1839     {
1840       if (vect_print_dump_info (REPORT_DETAILS))
1841         {
1842           print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1843         }
1844       return NULL_TREE;
1845     }
1846
1847   operation = TREE_OPERAND (def_stmt, 1);
1848   code = TREE_CODE (operation);
1849   if (!commutative_tree_code (code) || !associative_tree_code (code))
1850     {
1851       if (vect_print_dump_info (REPORT_DETAILS))
1852         {
1853           fprintf (vect_dump, "reduction: not commutative/associative: ");
1854           print_generic_expr (vect_dump, operation, TDF_SLIM);
1855         }
1856       return NULL_TREE;
1857     }
1858
1859   op_type = TREE_CODE_LENGTH (code);
1860   if (op_type != binary_op)
1861     {
1862       if (vect_print_dump_info (REPORT_DETAILS))
1863         {
1864           fprintf (vect_dump, "reduction: not binary operation: ");
1865           print_generic_expr (vect_dump, operation, TDF_SLIM);
1866         }
1867       return NULL_TREE;
1868     }
1869
1870   op1 = TREE_OPERAND (operation, 0);
1871   op2 = TREE_OPERAND (operation, 1);
1872   if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
1873     {
1874       if (vect_print_dump_info (REPORT_DETAILS))
1875         {
1876           fprintf (vect_dump, "reduction: uses not ssa_names: ");
1877           print_generic_expr (vect_dump, operation, TDF_SLIM);
1878         }
1879       return NULL_TREE;
1880     }
1881
1882   /* Check that it's ok to change the order of the computation.  */
1883   type = TREE_TYPE (operation);
1884   if (TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1))
1885       || TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2)))
1886     {
1887       if (vect_print_dump_info (REPORT_DETAILS))
1888         {
1889           fprintf (vect_dump, "reduction: multiple types: operation type: ");
1890           print_generic_expr (vect_dump, type, TDF_SLIM);
1891           fprintf (vect_dump, ", operands types: ");
1892           print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
1893           fprintf (vect_dump, ",");
1894           print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
1895         }
1896       return NULL_TREE;
1897     }
1898
1899   /* CHECKME: check for !flag_finite_math_only too?  */
1900   if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations)
1901     {
1902       /* Changing the order of operations changes the semantics.  */
1903       if (vect_print_dump_info (REPORT_DETAILS))
1904         {
1905           fprintf (vect_dump, "reduction: unsafe fp math optimization: ");
1906           print_generic_expr (vect_dump, operation, TDF_SLIM);
1907         }
1908       return NULL_TREE;
1909     }
1910   else if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) && flag_trapv)
1911     {
1912       /* Changing the order of operations changes the semantics.  */
1913       if (vect_print_dump_info (REPORT_DETAILS))
1914         {
1915           fprintf (vect_dump, "reduction: unsafe int math optimization: ");
1916           print_generic_expr (vect_dump, operation, TDF_SLIM);
1917         }
1918       return NULL_TREE;
1919     }
1920
1921   /* reduction is safe. we're dealing with one of the following:
1922      1) integer arithmetic and no trapv
1923      2) floating point arithmetic, and special flags permit this optimization.
1924    */
1925   def1 = SSA_NAME_DEF_STMT (op1);
1926   def2 = SSA_NAME_DEF_STMT (op2);
1927   if (!def1 || !def2)
1928     {
1929       if (vect_print_dump_info (REPORT_DETAILS))
1930         {
1931           fprintf (vect_dump, "reduction: no defs for operands: ");
1932           print_generic_expr (vect_dump, operation, TDF_SLIM);
1933         }
1934       return NULL_TREE;
1935     }
1936
1937   if (TREE_CODE (def1) == MODIFY_EXPR
1938       && flow_bb_inside_loop_p (loop, bb_for_stmt (def1))
1939       && def2 == phi)
1940     {
1941       if (vect_print_dump_info (REPORT_DETAILS))
1942         {
1943           fprintf (vect_dump, "detected reduction:");
1944           print_generic_expr (vect_dump, operation, TDF_SLIM);
1945         }
1946       return def_stmt;
1947     }
1948   else if (TREE_CODE (def2) == MODIFY_EXPR
1949       && flow_bb_inside_loop_p (loop, bb_for_stmt (def2))
1950       && def1 == phi)
1951     {
1952       /* Swap operands (just for simplicity - so that the rest of the code
1953          can assume that the reduction variable is always the last (second)
1954          argument).  */
1955       if (vect_print_dump_info (REPORT_DETAILS))
1956         {
1957           fprintf (vect_dump, "detected reduction: need to swap operands:");
1958           print_generic_expr (vect_dump, operation, TDF_SLIM);
1959         }
1960       swap_tree_operands (def_stmt, &TREE_OPERAND (operation, 0),
1961                                     &TREE_OPERAND (operation, 1));
1962       return def_stmt;
1963     }
1964   else
1965     {
1966       if (vect_print_dump_info (REPORT_DETAILS))
1967         {
1968           fprintf (vect_dump, "reduction: unknown pattern.");
1969           print_generic_expr (vect_dump, operation, TDF_SLIM);
1970         }
1971       return NULL_TREE;
1972     }
1973 }
1974
1975
1976 /* Function vect_is_simple_iv_evolution.
1977
1978    FORNOW: A simple evolution of an induction variables in the loop is
1979    considered a polynomial evolution with constant step.  */
1980
1981 bool
1982 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
1983                              tree * step)
1984 {
1985   tree init_expr;
1986   tree step_expr;
1987
1988   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
1989
1990   /* When there is no evolution in this loop, the evolution function
1991      is not "simple".  */
1992   if (evolution_part == NULL_TREE)
1993     return false;
1994
1995   /* When the evolution is a polynomial of degree >= 2
1996      the evolution function is not "simple".  */
1997   if (tree_is_chrec (evolution_part))
1998     return false;
1999
2000   step_expr = evolution_part;
2001   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2002                                                            loop_nb));
2003
2004   if (vect_print_dump_info (REPORT_DETAILS))
2005     {
2006       fprintf (vect_dump, "step: ");
2007       print_generic_expr (vect_dump, step_expr, TDF_SLIM);
2008       fprintf (vect_dump, ",  init: ");
2009       print_generic_expr (vect_dump, init_expr, TDF_SLIM);
2010     }
2011
2012   *init = init_expr;
2013   *step = step_expr;
2014
2015   if (TREE_CODE (step_expr) != INTEGER_CST)
2016     {
2017       if (vect_print_dump_info (REPORT_DETAILS))
2018         fprintf (vect_dump, "step unknown.");
2019       return false;
2020     }
2021
2022   return true;
2023 }
2024
2025
2026 /* Function vectorize_loops.
2027
2028    Entry Point to loop vectorization phase.  */
2029
2030 void
2031 vectorize_loops (struct loops *loops)
2032 {
2033   unsigned int i;
2034   unsigned int num_vectorized_loops = 0;
2035
2036   /* Fix the verbosity level if not defined explicitly by the user.  */
2037   vect_set_dump_settings ();
2038
2039   /* Allocate the bitmap that records which virtual variables that
2040      need to be renamed.  */
2041   vect_vnames_to_rename = BITMAP_ALLOC (NULL);
2042
2043   /*  ----------- Analyze loops. -----------  */
2044
2045   /* If some loop was duplicated, it gets bigger number
2046      than all previously defined loops. This fact allows us to run
2047      only over initial loops skipping newly generated ones.  */
2048   vect_loops_num = loops->num;
2049   for (i = 1; i < vect_loops_num; i++)
2050     {
2051       loop_vec_info loop_vinfo;
2052       struct loop *loop = loops->parray[i];
2053
2054       if (!loop)
2055         continue;
2056
2057       vect_loop_location = find_loop_location (loop);
2058       loop_vinfo = vect_analyze_loop (loop);
2059       loop->aux = loop_vinfo;
2060
2061       if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
2062         continue;
2063
2064       vect_transform_loop (loop_vinfo, loops);
2065       num_vectorized_loops++;
2066     }
2067
2068   if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
2069     fprintf (vect_dump, "vectorized %u loops in function.\n",
2070              num_vectorized_loops);
2071
2072   /*  ----------- Finalize. -----------  */
2073
2074   BITMAP_FREE (vect_vnames_to_rename);
2075
2076   for (i = 1; i < vect_loops_num; i++)
2077     {
2078       struct loop *loop = loops->parray[i];
2079       loop_vec_info loop_vinfo;
2080
2081       if (!loop)
2082         continue;
2083       loop_vinfo = loop->aux;
2084       destroy_loop_vec_info (loop_vinfo);
2085       loop->aux = NULL;
2086     }
2087 }