gcc/tree-vectorizer.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  20 02110-1301, USA.  */
  21
  22 /* Loop Vectorization Pass.
  23
  24    This pass tries to vectorize loops. This first implementation focuses on
  25    simple inner-most loops, with no conditional control flow, and a set of
  26    simple operations which vector form can be expressed using existing
  27    tree codes (PLUS, MULT etc).
  28
  29    For example, the vectorizer transforms the following simple loop:
  30
  31         short a[N]; short b[N]; short c[N]; int i;
  32
  33         for (i=0; i<N; i++){
  34           a[i] = b[i] + c[i];
  35         }
  36
  37    as if it was manually vectorized by rewriting the source code into:
  38
  39         typedef int __attribute__((mode(V8HI))) v8hi;
  40         short a[N];  short b[N]; short c[N];   int i;
  41         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  42         v8hi va, vb, vc;
  43
  44         for (i=0; i<N/8; i++){
  45           vb = pb[i];
  46           vc = pc[i];
  47           va = vb + vc;
  48           pa[i] = va;
  49         }
  50
  51         The main entry to this pass is vectorize_loops(), in which
  52    the vectorizer applies a set of analyses on a given set of loops,
  53    followed by the actual vectorization transformation for the loops that
  54    had successfully passed the analysis phase.
  55
  56         Throughout this pass we make a distinction between two types of
  57    data: scalars (which are represented by SSA_NAMES), and memory references
  58    ("data-refs"). These two types of data require different handling both
  59    during analysis and transformation. The types of data-refs that the
  60    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  61    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  62    accesses are required to have a  simple (consecutive) access pattern.
  63
  64    Analysis phase:
  65    ===============
  66         The driver for the analysis phase is vect_analyze_loop_nest().
  67    It applies a set of analyses, some of which rely on the scalar evolution
  68    analyzer (scev) developed by Sebastian Pop.
  69
  70         During the analysis phase the vectorizer records some information
  71    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  72    loop, as well as general information about the loop as a whole, which is
  73    recorded in a "loop_vec_info" struct attached to each loop.
  74
  75    Transformation phase:
  76    =====================
  77         The loop transformation phase scans all the stmts in the loop, and
  78    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  79    the loop that needs to be vectorized. It insert the vector code sequence
  80    just before the scalar stmt S, and records a pointer to the vector code
  81    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
  82    attached to S). This pointer will be used for the vectorization of following
  83    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
  84    otherwise, we rely on dead code elimination for removing it.
  85
  86         For example, say stmt S1 was vectorized into stmt VS1:
  87
  88    VS1: vb = px[i];
  89    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  90    S2:  a = b;
  91
  92    To vectorize stmt S2, the vectorizer first finds the stmt that defines
  93    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
  94    vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
  95    resulting sequence would be:
  96
  97    VS1: vb = px[i];
  98    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  99    VS2: va = vb;
 100    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 101
 102         Operands that are not SSA_NAMEs, are data-refs that appear in
 103    load/store operations (like 'x[i]' in S1), and are handled differently.
 104
 105    Target modeling:
 106    =================
 107         Currently the only target specific information that is used is the
 108    size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
 109    support different sizes of vectors, for now will need to specify one value
 110    for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
 111
 112         Since we only vectorize operations which vector form can be
 113    expressed using existing tree codes, to verify that an operation is
 114    supported, the vectorizer checks the relevant optab at the relevant
 115    machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
 116    the value found is CODE_FOR_nothing, then there's no target support, and
 117    we can't vectorize the stmt.
 118
 119    For additional information on this project see:
 120    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 121 */
 122
 123 #include "config.h"
 124 #include "system.h"
 125 #include "coretypes.h"
 126 #include "tm.h"
 127 #include "ggc.h"
 128 #include "tree.h"
 129 #include "target.h"
 130 #include "rtl.h"
 131 #include "basic-block.h"
 132 #include "diagnostic.h"
 133 #include "tree-flow.h"
 134 #include "tree-dump.h"
 135 #include "timevar.h"
 136 #include "cfgloop.h"
 137 #include "cfglayout.h"
 138 #include "expr.h"
 139 #include "optabs.h"
 140 #include "toplev.h"
 141 #include "tree-chrec.h"
 142 #include "tree-data-ref.h"
 143 #include "tree-scalar-evolution.h"
 144 #include "input.h"
 145 #include "tree-vectorizer.h"
 146 #include "tree-pass.h"
 147
 148 /*************************************************************************
 149   Simple Loop Peeling Utilities
 150  *************************************************************************/
 151 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
 152   (struct loop *, struct loops *, edge);
 153 static void slpeel_update_phis_for_duplicate_loop
 154   (struct loop *, struct loop *, bool after);
 155 static void slpeel_update_phi_nodes_for_guard1
 156   (edge, struct loop *, bool, basic_block *, bitmap *);
 157 static void slpeel_update_phi_nodes_for_guard2
 158   (edge, struct loop *, bool, basic_block *);
 159 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
 160
 161 static void rename_use_op (use_operand_p);
 162 static void rename_variables_in_bb (basic_block);
 163 static void rename_variables_in_loop (struct loop *);
 164
 165 /*************************************************************************
 166   General Vectorization Utilities
 167  *************************************************************************/
 168 static void vect_set_dump_settings (void);
 169
 170 /* vect_dump will be set to stderr or dump_file if exist.  */
 171 FILE *vect_dump;
 172
 173 /* vect_verbosity_level set to an invalid value
 174    to mark that it's uninitialized.  */
 175 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
 176
 177 /* Number of loops, at the beginning of vectorization.  */
 178 unsigned int vect_loops_num;
 179 \f
 180 /*************************************************************************
 181   Simple Loop Peeling Utilities
 182
 183   Utilities to support loop peeling for vectorization purposes.
 184  *************************************************************************/
 185
 186
 187 /* Renames the use *OP_P.  */
 188
 189 static void
 190 rename_use_op (use_operand_p op_p)
 191 {
 192   tree new_name;
 193
 194   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
 195     return;
 196
 197   new_name = get_current_def (USE_FROM_PTR (op_p));
 198
 199   /* Something defined outside of the loop.  */
 200   if (!new_name)
 201     return;
 202
 203   /* An ordinary ssa name defined in the loop.  */
 204
 205   SET_USE (op_p, new_name);
 206 }
 207
 208
 209 /* Renames the variables in basic block BB.  */
 210
 211 static void
 212 rename_variables_in_bb (basic_block bb)
 213 {
 214   tree phi;
 215   block_stmt_iterator bsi;
 216   tree stmt;
 217   use_operand_p use_p;
 218   ssa_op_iter iter;
 219   edge e;
 220   edge_iterator ei;
 221   struct loop *loop = bb->loop_father;
 222
 223   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 224     {
 225       stmt = bsi_stmt (bsi);
 226       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter,
 227                                  (SSA_OP_ALL_USES | SSA_OP_ALL_KILLS))
 228         rename_use_op (use_p);
 229     }
 230
 231   FOR_EACH_EDGE (e, ei, bb->succs)
 232     {
 233       if (!flow_bb_inside_loop_p (loop, e->dest))
 234         continue;
 235       for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
 236         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
 237     }
 238 }
 239
 240
 241 /* Renames variables in new generated LOOP.  */
 242
 243 static void
 244 rename_variables_in_loop (struct loop *loop)
 245 {
 246   unsigned i;
 247   basic_block *bbs;
 248
 249   bbs = get_loop_body (loop);
 250
 251   for (i = 0; i < loop->num_nodes; i++)
 252     rename_variables_in_bb (bbs[i]);
 253
 254   free (bbs);
 255 }
 256
 257
 258 /* Update the PHI nodes of NEW_LOOP.
 259
 260    NEW_LOOP is a duplicate of ORIG_LOOP.
 261    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 262    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 263    executes before it.  */
 264
 265 static void
 266 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 267                                        struct loop *new_loop, bool after)
 268 {
 269   tree new_ssa_name;
 270   tree phi_new, phi_orig;
 271   tree def;
 272   edge orig_loop_latch = loop_latch_edge (orig_loop);
 273   edge orig_entry_e = loop_preheader_edge (orig_loop);
 274   edge new_loop_exit_e = new_loop->single_exit;
 275   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 276   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 277
 278   /*
 279      step 1. For each loop-header-phi:
 280              Add the first phi argument for the phi in NEW_LOOP
 281             (the one associated with the entry of NEW_LOOP)
 282
 283      step 2. For each loop-header-phi:
 284              Add the second phi argument for the phi in NEW_LOOP
 285             (the one associated with the latch of NEW_LOOP)
 286
 287      step 3. Update the phis in the successor block of NEW_LOOP.
 288
 289         case 1: NEW_LOOP was placed before ORIG_LOOP:
 290                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 291                 Updating the phis in the successor block can therefore be done
 292                 along with the scanning of the loop header phis, because the
 293                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 294                 phi nodes, organized in the same order.
 295
 296         case 2: NEW_LOOP was placed after ORIG_LOOP:
 297                 The successor block of NEW_LOOP is the original exit block of
 298                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 299                 We postpone updating these phis to a later stage (when
 300                 loop guards are added).
 301    */
 302
 303
 304   /* Scan the phis in the headers of the old and new loops
 305      (they are organized in exactly the same order).  */
 306
 307   for (phi_new = phi_nodes (new_loop->header),
 308        phi_orig = phi_nodes (orig_loop->header);
 309        phi_new && phi_orig;
 310        phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
 311     {
 312       /* step 1.  */
 313       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 314       add_phi_arg (phi_new, def, new_loop_entry_e);
 315
 316       /* step 2.  */
 317       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 318       if (TREE_CODE (def) != SSA_NAME)
 319         continue;
 320
 321       new_ssa_name = get_current_def (def);
 322       if (!new_ssa_name)
 323         {
 324           /* This only happens if there are no definitions
 325              inside the loop. use the phi_result in this case.  */
 326           new_ssa_name = PHI_RESULT (phi_new);
 327         }
 328
 329       /* An ordinary ssa name defined in the loop.  */
 330       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
 331
 332       /* step 3 (case 1).  */
 333       if (!after)
 334         {
 335           gcc_assert (new_loop_exit_e == orig_entry_e);
 336           SET_PHI_ARG_DEF (phi_orig,
 337                            new_loop_exit_e->dest_idx,
 338                            new_ssa_name);
 339         }
 340     }
 341 }
 342
 343
 344 /* Update PHI nodes for a guard of the LOOP.
 345
 346    Input:
 347    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 348         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 349         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 350         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 351         We denote this bb NEW_MERGE_BB because before the guard code was added
 352         it had a single predecessor (the LOOP header), and now it became a merge
 353         point of two paths - the path that ends with the LOOP exit-edge, and
 354         the path that ends with GUARD_EDGE.
 355    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 356         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 357
 358    ===> The CFG before the guard-code was added:
 359         LOOP_header_bb:
 360           loop_body
 361           if (exit_loop) goto update_bb
 362           else           goto LOOP_header_bb
 363         update_bb:
 364
 365    ==> The CFG after the guard-code was added:
 366         guard_bb:
 367           if (LOOP_guard_condition) goto new_merge_bb
 368           else                      goto LOOP_header_bb
 369         LOOP_header_bb:
 370           loop_body
 371           if (exit_loop_condition) goto new_merge_bb
 372           else                     goto LOOP_header_bb
 373         new_merge_bb:
 374           goto update_bb
 375         update_bb:
 376
 377    ==> The CFG after this function:
 378         guard_bb:
 379           if (LOOP_guard_condition) goto new_merge_bb
 380           else                      goto LOOP_header_bb
 381         LOOP_header_bb:
 382           loop_body
 383           if (exit_loop_condition) goto new_exit_bb
 384           else                     goto LOOP_header_bb
 385         new_exit_bb:
 386         new_merge_bb:
 387           goto update_bb
 388         update_bb:
 389
 390    This function:
 391    1. creates and updates the relevant phi nodes to account for the new
 392       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 393       1.1. Create phi nodes at NEW_MERGE_BB.
 394       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 395            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 396    2. preserves loop-closed-ssa-form by creating the required phi nodes
 397       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 398
 399    There are two flavors to this function:
 400
 401    slpeel_update_phi_nodes_for_guard1:
 402      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 403      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 404      for variables that have phis in the loop header.
 405
 406    slpeel_update_phi_nodes_for_guard2:
 407      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 408      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 409      for variables that have phis in the loop exit.
 410
 411    I.E., the overall structure is:
 412
 413         loop1_preheader_bb:
 414                 guard1 (goto loop1/merg1_bb)
 415         loop1
 416         loop1_exit_bb:
 417                 guard2 (goto merge1_bb/merge2_bb)
 418         merge1_bb
 419         loop2
 420         loop2_exit_bb
 421         merge2_bb
 422         next_bb
 423
 424    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 425    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 426    that have phis in loop1->header).
 427
 428    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 429    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 430    that have phis in next_bb). It also adds some of these phis to
 431    loop1_exit_bb.
 432
 433    slpeel_update_phi_nodes_for_guard1 is always called before
 434    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 435    to create correct data-flow and loop-closed-ssa-form.
 436
 437    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 438    that change between iterations of a loop (and therefore have a phi-node
 439    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 440    phis for variables that are used out of the loop (and therefore have
 441    loop-closed exit phis). Some variables may be both updated between
 442    iterations and used after the loop. This is why in loop1_exit_bb we
 443    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 444    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 445
 446    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 447      an original loop. i.e., we have:
 448
 449            orig_loop
 450            guard_bb (goto LOOP/new_merge)
 451            new_loop <-- LOOP
 452            new_exit
 453            new_merge
 454            next_bb
 455
 456      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 457      have:
 458
 459            new_loop
 460            guard_bb (goto LOOP/new_merge)
 461            orig_loop <-- LOOP
 462            new_exit
 463            new_merge
 464            next_bb
 465
 466      The SSA names defined in the original loop have a current
 467      reaching definition that that records the corresponding new
 468      ssa-name used in the new duplicated loop copy.
 469   */
 470
 471 /* Function slpeel_update_phi_nodes_for_guard1
 472
 473    Input:
 474    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 475    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 476             information.
 477
 478    In the context of the overall structure, we have:
 479
 480         loop1_preheader_bb:
 481                 guard1 (goto loop1/merg1_bb)
 482 LOOP->  loop1
 483         loop1_exit_bb:
 484                 guard2 (goto merge1_bb/merge2_bb)
 485         merge1_bb
 486         loop2
 487         loop2_exit_bb
 488         merge2_bb
 489         next_bb
 490
 491    For each name updated between loop iterations (i.e - for each name that has
 492    an entry (loop-header) phi in LOOP) we create a new phi in:
 493    1. merge1_bb (to account for the edge from guard1)
 494    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 495 */
 496
 497 static void
 498 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 499                                     bool is_new_loop, basic_block *new_exit_bb,
 500                                     bitmap *defs)
 501 {
 502   tree orig_phi, new_phi;
 503   tree update_phi, update_phi2;
 504   tree guard_arg, loop_arg;
 505   basic_block new_merge_bb = guard_edge->dest;
 506   edge e = EDGE_SUCC (new_merge_bb, 0);
 507   basic_block update_bb = e->dest;
 508   basic_block orig_bb = loop->header;
 509   edge new_exit_e;
 510   tree current_new_name;
 511
 512   /* Create new bb between loop and new_merge_bb.  */
 513   *new_exit_bb = split_edge (loop->single_exit);
 514   add_bb_to_loop (*new_exit_bb, loop->outer);
 515
 516   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 517
 518   for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
 519        orig_phi && update_phi;
 520        orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
 521     {
 522       /** 1. Handle new-merge-point phis  **/
 523
 524       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 525       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 526                                  new_merge_bb);
 527
 528       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 529             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 530       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 531       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 532
 533       add_phi_arg (new_phi, loop_arg, new_exit_e);
 534       add_phi_arg (new_phi, guard_arg, guard_edge);
 535
 536       /* 1.3. Update phi in successor block.  */
 537       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 538                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 539       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 540       update_phi2 = new_phi;
 541
 542
 543       /** 2. Handle loop-closed-ssa-form phis  **/
 544
 545       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 546       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 547                                  *new_exit_bb);
 548
 549       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 550       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 551
 552       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 553       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 554       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 555
 556       /* 2.4. Record the newly created name with set_current_def.
 557          We want to find a name such that
 558                 name = get_current_def (orig_loop_name)
 559          and to set its current definition as follows:
 560                 set_current_def (name, new_phi_name)
 561
 562          If LOOP is a new loop then loop_arg is already the name we're
 563          looking for. If LOOP is the original loop, then loop_arg is
 564          the orig_loop_name and the relevant name is recorded in its
 565          current reaching definition.  */
 566       if (is_new_loop)
 567         current_new_name = loop_arg;
 568       else
 569         {
 570           current_new_name = get_current_def (loop_arg);
 571           /* current_def is not available only if the variable does not
 572              change inside the loop, in which case we also don't care
 573              about recording a current_def for it because we won't be
 574              trying to create loop-exit-phis for it.  */
 575           if (!current_new_name)
 576             continue;
 577         }
 578       gcc_assert (get_current_def (current_new_name) == NULL_TREE);
 579
 580       set_current_def (current_new_name, PHI_RESULT (new_phi));
 581       bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
 582     }
 583
 584   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 585 }
 586
 587
 588 /* Function slpeel_update_phi_nodes_for_guard2
 589
 590    Input:
 591    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 592
 593    In the context of the overall structure, we have:
 594
 595         loop1_preheader_bb:
 596                 guard1 (goto loop1/merg1_bb)
 597         loop1
 598         loop1_exit_bb:
 599                 guard2 (goto merge1_bb/merge2_bb)
 600         merge1_bb
 601 LOOP->  loop2
 602         loop2_exit_bb
 603         merge2_bb
 604         next_bb
 605
 606    For each name used out side the loop (i.e - for each name that has an exit
 607    phi in next_bb) we create a new phi in:
 608    1. merge2_bb (to account for the edge from guard_bb)
 609    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 610    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 611       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 612 */
 613
 614 static void
 615 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 616                                     bool is_new_loop, basic_block *new_exit_bb)
 617 {
 618   tree orig_phi, new_phi;
 619   tree update_phi, update_phi2;
 620   tree guard_arg, loop_arg;
 621   basic_block new_merge_bb = guard_edge->dest;
 622   edge e = EDGE_SUCC (new_merge_bb, 0);
 623   basic_block update_bb = e->dest;
 624   edge new_exit_e;
 625   tree orig_def, orig_def_new_name;
 626   tree new_name, new_name2;
 627   tree arg;
 628
 629   /* Create new bb between loop and new_merge_bb.  */
 630   *new_exit_bb = split_edge (loop->single_exit);
 631   add_bb_to_loop (*new_exit_bb, loop->outer);
 632
 633   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 634
 635   for (update_phi = phi_nodes (update_bb); update_phi;
 636        update_phi = PHI_CHAIN (update_phi))
 637     {
 638       orig_phi = update_phi;
 639       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 640       orig_def_new_name = get_current_def (orig_def);
 641       arg = NULL_TREE;
 642
 643       /** 1. Handle new-merge-point phis  **/
 644
 645       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 646       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 647                                  new_merge_bb);
 648
 649       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 650             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 651       new_name = orig_def;
 652       new_name2 = NULL_TREE;
 653       if (orig_def_new_name)
 654         {
 655           new_name = orig_def_new_name;
 656           /* Some variables have both loop-entry-phis and loop-exit-phis.
 657              Such variables were given yet newer names by phis placed in
 658              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 659              new_name2 = get_current_def (get_current_def (orig_name)).  */
 660           new_name2 = get_current_def (new_name);
 661         }
 662
 663       if (is_new_loop)
 664         {
 665           guard_arg = orig_def;
 666           loop_arg = new_name;
 667         }
 668       else
 669         {
 670           guard_arg = new_name;
 671           loop_arg = orig_def;
 672         }
 673       if (new_name2)
 674         guard_arg = new_name2;
 675
 676       add_phi_arg (new_phi, loop_arg, new_exit_e);
 677       add_phi_arg (new_phi, guard_arg, guard_edge);
 678
 679       /* 1.3. Update phi in successor block.  */
 680       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 681       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 682       update_phi2 = new_phi;
 683
 684
 685       /** 2. Handle loop-closed-ssa-form phis  **/
 686
 687       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 688       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 689                                  *new_exit_bb);
 690
 691       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 692       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 693
 694       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 695       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 696       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 697
 698
 699       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 700
 701       /* 3.1. Find the relevant names that need an exit-phi in
 702          GUARD_BB, i.e. names for which
 703          slpeel_update_phi_nodes_for_guard1 had not already created a
 704          phi node. This is the case for names that are used outside
 705          the loop (and therefore need an exit phi) but are not updated
 706          across loop iterations (and therefore don't have a
 707          loop-header-phi).
 708
 709          slpeel_update_phi_nodes_for_guard1 is responsible for
 710          creating loop-exit phis in GUARD_BB for names that have a
 711          loop-header-phi.  When such a phi is created we also record
 712          the new name in its current definition.  If this new name
 713          exists, then guard_arg was set to this new name (see 1.2
 714          above).  Therefore, if guard_arg is not this new name, this
 715          is an indication that an exit-phi in GUARD_BB was not yet
 716          created, so we take care of it here.  */
 717       if (guard_arg == new_name2)
 718         continue;
 719       arg = guard_arg;
 720
 721       /* 3.2. Generate new phi node in GUARD_BB:  */
 722       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 723                                  guard_edge->src);
 724
 725       /* 3.3. GUARD_BB has one incoming edge:  */
 726       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 727       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
 728
 729       /* 3.4. Update phi in successor of GUARD_BB:  */
 730       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 731                                                                 == guard_arg);
 732       SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
 733     }
 734
 735   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 736 }
 737
 738
 739 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 740    that starts at zero, increases by one and its limit is NITERS.
 741
 742    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 743
 744 void
 745 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 746 {
 747   tree indx_before_incr, indx_after_incr, cond_stmt, cond;
 748   tree orig_cond;
 749   edge exit_edge = loop->single_exit;
 750   block_stmt_iterator loop_cond_bsi;
 751   block_stmt_iterator incr_bsi;
 752   bool insert_after;
 753   tree begin_label = tree_block_label (loop->latch);
 754   tree exit_label = tree_block_label (loop->single_exit->dest);
 755   tree init = build_int_cst (TREE_TYPE (niters), 0);
 756   tree step = build_int_cst (TREE_TYPE (niters), 1);
 757   tree then_label;
 758   tree else_label;
 759   LOC loop_loc;
 760
 761   orig_cond = get_loop_exit_condition (loop);
 762   gcc_assert (orig_cond);
 763   loop_cond_bsi = bsi_for_stmt (orig_cond);
 764
 765   standard_iv_increment_position (loop, &incr_bsi, &insert_after);
 766   create_iv (init, step, NULL_TREE, loop,
 767              &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
 768
 769   if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
 770     {
 771       cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
 772       then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 773       else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 774     }
 775   else /* 'then' edge loops back.  */
 776     {
 777       cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
 778       then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 779       else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 780     }
 781
 782   cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
 783                      then_label, else_label);
 784   bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
 785
 786   /* Remove old loop exit test:  */
 787   bsi_remove (&loop_cond_bsi);
 788
 789   loop_loc = find_loop_location (loop);
 790   if (dump_file && (dump_flags & TDF_DETAILS))
 791     {
 792       if (loop_loc != UNKNOWN_LOC)
 793         fprintf (dump_file, "\nloop at %s:%d: ",
 794                  LOC_FILE (loop_loc), LOC_LINE (loop_loc));
 795       print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
 796     }
 797
 798   loop->nb_iterations = niters;
 799 }
 800
 801
 802 /* Given LOOP this function generates a new copy of it and puts it
 803    on E which is either the entry or exit of LOOP.  */
 804
 805 static struct loop *
 806 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
 807                                         edge e)
 808 {
 809   struct loop *new_loop;
 810   basic_block *new_bbs, *bbs;
 811   bool at_exit;
 812   bool was_imm_dom;
 813   basic_block exit_dest;
 814   tree phi, phi_arg;
 815
 816   at_exit = (e == loop->single_exit);
 817   if (!at_exit && e != loop_preheader_edge (loop))
 818     return NULL;
 819
 820   bbs = get_loop_body (loop);
 821
 822   /* Check whether duplication is possible.  */
 823   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 824     {
 825       free (bbs);
 826       return NULL;
 827     }
 828
 829   /* Generate new loop structure.  */
 830   new_loop = duplicate_loop (loops, loop, loop->outer);
 831   if (!new_loop)
 832     {
 833       free (bbs);
 834       return NULL;
 835     }
 836
 837   exit_dest = loop->single_exit->dest;
 838   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 839                                           exit_dest) == loop->header ?
 840                  true : false);
 841
 842   new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
 843
 844   copy_bbs (bbs, loop->num_nodes, new_bbs,
 845             &loop->single_exit, 1, &new_loop->single_exit, NULL);
 846
 847   /* Duplicating phi args at exit bbs as coming
 848      also from exit of duplicated loop.  */
 849   for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
 850     {
 851       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit);
 852       if (phi_arg)
 853         {
 854           edge new_loop_exit_edge;
 855
 856           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 857             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 858           else
 859             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 860
 861           add_phi_arg (phi, phi_arg, new_loop_exit_edge);
 862         }
 863     }
 864
 865   if (at_exit) /* Add the loop copy at exit.  */
 866     {
 867       redirect_edge_and_branch_force (e, new_loop->header);
 868       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 869       if (was_imm_dom)
 870         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 871     }
 872   else /* Add the copy at entry.  */
 873     {
 874       edge new_exit_e;
 875       edge entry_e = loop_preheader_edge (loop);
 876       basic_block preheader = entry_e->src;
 877
 878       if (!flow_bb_inside_loop_p (new_loop,
 879                                   EDGE_SUCC (new_loop->header, 0)->dest))
 880         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 881       else
 882         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 883
 884       redirect_edge_and_branch_force (new_exit_e, loop->header);
 885       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 886                                new_exit_e->src);
 887
 888       /* We have to add phi args to the loop->header here as coming
 889          from new_exit_e edge.  */
 890       for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 891         {
 892           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 893           if (phi_arg)
 894             add_phi_arg (phi, phi_arg, new_exit_e);
 895         }
 896
 897       redirect_edge_and_branch_force (entry_e, new_loop->header);
 898       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 899     }
 900
 901   free (new_bbs);
 902   free (bbs);
 903
 904   return new_loop;
 905 }
 906
 907
 908 /* Given the condition statement COND, put it as the last statement
 909    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 910    Assumes that this is the single exit of the guarded loop.
 911    Returns the skip edge.  */
 912
 913 static edge
 914 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
 915                         basic_block dom_bb)
 916 {
 917   block_stmt_iterator bsi;
 918   edge new_e, enter_e;
 919   tree cond_stmt, then_label, else_label;
 920
 921   enter_e = EDGE_SUCC (guard_bb, 0);
 922   enter_e->flags &= ~EDGE_FALLTHRU;
 923   enter_e->flags |= EDGE_FALSE_VALUE;
 924   bsi = bsi_last (guard_bb);
 925
 926   then_label = build1 (GOTO_EXPR, void_type_node,
 927                        tree_block_label (exit_bb));
 928   else_label = build1 (GOTO_EXPR, void_type_node,
 929                        tree_block_label (enter_e->dest));
 930   cond_stmt = build3 (COND_EXPR, void_type_node, cond,
 931                      then_label, else_label);
 932   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
 933   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 934   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 935   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 936   return new_e;
 937 }
 938
 939
 940 /* This function verifies that the following restrictions apply to LOOP:
 941    (1) it is innermost
 942    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 943    (3) it is single entry, single exit
 944    (4) its exit condition is the last stmt in the header
 945    (5) E is the entry/exit edge of LOOP.
 946  */
 947
 948 bool
 949 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
 950 {
 951   edge exit_e = loop->single_exit;
 952   edge entry_e = loop_preheader_edge (loop);
 953   tree orig_cond = get_loop_exit_condition (loop);
 954   block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
 955
 956   if (need_ssa_update_p ())
 957     return false;
 958
 959   if (loop->inner
 960       /* All loops have an outer scope; the only case loop->outer is NULL is for
 961          the function itself.  */
 962       || !loop->outer
 963       || loop->num_nodes != 2
 964       || !empty_block_p (loop->latch)
 965       || !loop->single_exit
 966       /* Verify that new loop exit condition can be trivially modified.  */
 967       || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
 968       || (e != exit_e && e != entry_e))
 969     return false;
 970
 971   return true;
 972 }
 973
 974 #ifdef ENABLE_CHECKING
 975 void
 976 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 977                                  struct loop *second_loop)
 978 {
 979   basic_block loop1_exit_bb = first_loop->single_exit->dest;
 980   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
 981   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
 982
 983   /* A guard that controls whether the second_loop is to be executed or skipped
 984      is placed in first_loop->exit.  first_loopt->exit therefore has two
 985      successors - one is the preheader of second_loop, and the other is a bb
 986      after second_loop.
 987    */
 988   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
 989
 990   /* 1. Verify that one of the successors of first_loopt->exit is the preheader
 991         of second_loop.  */
 992
 993   /* The preheader of new_loop is expected to have two predecessors:
 994      first_loop->exit and the block that precedes first_loop.  */
 995
 996   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
 997               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
 998                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
 999                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1000                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1001
1002   /* Verify that the other successor of first_loopt->exit is after the
1003      second_loop.  */
1004   /* TODO */
1005 }
1006 #endif
1007
1008 /* Function slpeel_tree_peel_loop_to_edge.
1009
1010    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1011    that is placed on the entry (exit) edge E of LOOP. After this transformation
1012    we have two loops one after the other - first-loop iterates FIRST_NITERS
1013    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1014
1015    Input:
1016    - LOOP: the loop to be peeled.
1017    - E: the exit or entry edge of LOOP.
1018         If it is the entry edge, we peel the first iterations of LOOP. In this
1019         case first-loop is LOOP, and second-loop is the newly created loop.
1020         If it is the exit edge, we peel the last iterations of LOOP. In this
1021         case, first-loop is the newly created loop, and second-loop is LOOP.
1022    - NITERS: the number of iterations that LOOP iterates.
1023    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1024    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1025         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1026         is false, the caller of this function may want to take care of this
1027         (this can be useful if we don't want new stmts added to first-loop).
1028
1029    Output:
1030    The function returns a pointer to the new loop-copy, or NULL if it failed
1031    to perform the transformation.
1032
1033    The function generates two if-then-else guards: one before the first loop,
1034    and the other before the second loop:
1035    The first guard is:
1036      if (FIRST_NITERS == 0) then skip the first loop,
1037      and go directly to the second loop.
1038    The second guard is:
1039      if (FIRST_NITERS == NITERS) then skip the second loop.
1040
1041    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1042    FORNOW the resulting code will not be in loop-closed-ssa form.
1043 */
1044
1045 struct loop*
1046 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
1047                                edge e, tree first_niters,
1048                                tree niters, bool update_first_loop_count)
1049 {
1050   struct loop *new_loop = NULL, *first_loop, *second_loop;
1051   edge skip_e;
1052   tree pre_condition;
1053   bitmap definitions;
1054   basic_block bb_before_second_loop, bb_after_second_loop;
1055   basic_block bb_before_first_loop;
1056   basic_block bb_between_loops;
1057   basic_block new_exit_bb;
1058   edge exit_e = loop->single_exit;
1059   LOC loop_loc;
1060
1061   if (!slpeel_can_duplicate_loop_p (loop, e))
1062     return NULL;
1063
1064   /* We have to initialize cfg_hooks. Then, when calling
1065    cfg_hooks->split_edge, the function tree_split_edge
1066    is actually called and, when calling cfg_hooks->duplicate_block,
1067    the function tree_duplicate_bb is called.  */
1068   tree_register_cfg_hooks ();
1069
1070
1071   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1072         Resulting CFG would be:
1073
1074         first_loop:
1075         do {
1076         } while ...
1077
1078         second_loop:
1079         do {
1080         } while ...
1081
1082         orig_exit_bb:
1083    */
1084
1085   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1086     {
1087       loop_loc = find_loop_location (loop);
1088       if (dump_file && (dump_flags & TDF_DETAILS))
1089         {
1090           if (loop_loc != UNKNOWN_LOC)
1091             fprintf (dump_file, "\n%s:%d: note: ",
1092                      LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1093           fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1094         }
1095       return NULL;
1096     }
1097
1098   if (e == exit_e)
1099     {
1100       /* NEW_LOOP was placed after LOOP.  */
1101       first_loop = loop;
1102       second_loop = new_loop;
1103     }
1104   else
1105     {
1106       /* NEW_LOOP was placed before LOOP.  */
1107       first_loop = new_loop;
1108       second_loop = loop;
1109     }
1110
1111   definitions = ssa_names_to_replace ();
1112   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1113   rename_variables_in_loop (new_loop);
1114
1115
1116   /* 2. Add the guard that controls whether the first loop is executed.
1117         Resulting CFG would be:
1118
1119         bb_before_first_loop:
1120         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1121                                GOTO first-loop
1122
1123         first_loop:
1124         do {
1125         } while ...
1126
1127         bb_before_second_loop:
1128
1129         second_loop:
1130         do {
1131         } while ...
1132
1133         orig_exit_bb:
1134    */
1135
1136   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1137   add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1138   bb_before_second_loop = split_edge (first_loop->single_exit);
1139   add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1140
1141   pre_condition =
1142     fold_build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1143   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1144                                   bb_before_second_loop, bb_before_first_loop);
1145   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1146                                       first_loop == new_loop,
1147                                       &new_exit_bb, &definitions);
1148
1149
1150   /* 3. Add the guard that controls whether the second loop is executed.
1151         Resulting CFG would be:
1152
1153         bb_before_first_loop:
1154         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1155                                GOTO first-loop
1156
1157         first_loop:
1158         do {
1159         } while ...
1160
1161         bb_between_loops:
1162         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1163                                     GOTO bb_before_second_loop
1164
1165         bb_before_second_loop:
1166
1167         second_loop:
1168         do {
1169         } while ...
1170
1171         bb_after_second_loop:
1172
1173         orig_exit_bb:
1174    */
1175
1176   bb_between_loops = new_exit_bb;
1177   bb_after_second_loop = split_edge (second_loop->single_exit);
1178   add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1179
1180   pre_condition =
1181         fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1182   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1183                                   bb_after_second_loop, bb_before_first_loop);
1184   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1185                                      second_loop == new_loop, &new_exit_bb);
1186
1187   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1188    */
1189   if (update_first_loop_count)
1190     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1191
1192   BITMAP_FREE (definitions);
1193   delete_update_ssa ();
1194
1195   return new_loop;
1196 }
1197
1198 /* Function vect_get_loop_location.
1199
1200    Extract the location of the loop in the source code.
1201    If the loop is not well formed for vectorization, an estimated
1202    location is calculated.
1203    Return the loop location if succeed and NULL if not.  */
1204
1205 LOC
1206 find_loop_location (struct loop *loop)
1207 {
1208   tree node = NULL_TREE;
1209   basic_block bb;
1210   block_stmt_iterator si;
1211
1212   if (!loop)
1213     return UNKNOWN_LOC;
1214
1215   node = get_loop_exit_condition (loop);
1216
1217   if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1218       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1219     return EXPR_LOC (node);
1220
1221   /* If we got here the loop is probably not "well formed",
1222      try to estimate the loop location */
1223
1224   if (!loop->header)
1225     return UNKNOWN_LOC;
1226
1227   bb = loop->header;
1228
1229   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1230     {
1231       node = bsi_stmt (si);
1232       if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1233         return EXPR_LOC (node);
1234     }
1235
1236   return UNKNOWN_LOC;
1237 }
1238
1239
1240 /*************************************************************************
1241   Vectorization Debug Information.
1242  *************************************************************************/
1243
1244 /* Function vect_set_verbosity_level.
1245
1246    Called from toplev.c upon detection of the
1247    -ftree-vectorizer-verbose=N option.  */
1248
1249 void
1250 vect_set_verbosity_level (const char *val)
1251 {
1252    unsigned int vl;
1253
1254    vl = atoi (val);
1255    if (vl < MAX_VERBOSITY_LEVEL)
1256      vect_verbosity_level = vl;
1257    else
1258      vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1259 }
1260
1261
1262 /* Function vect_set_dump_settings.
1263
1264    Fix the verbosity level of the vectorizer if the
1265    requested level was not set explicitly using the flag
1266    -ftree-vectorizer-verbose=N.
1267    Decide where to print the debugging information (dump_file/stderr).
1268    If the user defined the verbosity level, but there is no dump file,
1269    print to stderr, otherwise print to the dump file.  */
1270
1271 static void
1272 vect_set_dump_settings (void)
1273 {
1274   vect_dump = dump_file;
1275
1276   /* Check if the verbosity level was defined by the user:  */
1277   if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1278     {
1279       /* If there is no dump file, print to stderr.  */
1280       if (!dump_file)
1281         vect_dump = stderr;
1282       return;
1283     }
1284
1285   /* User didn't specify verbosity level:  */
1286   if (dump_file && (dump_flags & TDF_DETAILS))
1287     vect_verbosity_level = REPORT_DETAILS;
1288   else if (dump_file && (dump_flags & TDF_STATS))
1289     vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1290   else
1291     vect_verbosity_level = REPORT_NONE;
1292
1293   gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1294 }
1295
1296
1297 /* Function debug_loop_details.
1298
1299    For vectorization debug dumps.  */
1300
1301 bool
1302 vect_print_dump_info (enum verbosity_levels vl, LOC loc)
1303 {
1304   if (vl > vect_verbosity_level)
1305     return false;
1306
1307   if (loc == UNKNOWN_LOC)
1308     fprintf (vect_dump, "\n%s:%d: note: ",
1309                  DECL_SOURCE_FILE (current_function_decl),
1310                  DECL_SOURCE_LINE (current_function_decl));
1311   else
1312     fprintf (vect_dump, "\n%s:%d: note: ", LOC_FILE (loc), LOC_LINE (loc));
1313
1314
1315   return true;
1316 }
1317
1318
1319 /*************************************************************************
1320   Vectorization Utilities.
1321  *************************************************************************/
1322
1323 /* Function new_stmt_vec_info.
1324
1325    Create and initialize a new stmt_vec_info struct for STMT.  */
1326
1327 stmt_vec_info
1328 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1329 {
1330   stmt_vec_info res;
1331   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1332
1333   STMT_VINFO_TYPE (res) = undef_vec_info_type;
1334   STMT_VINFO_STMT (res) = stmt;
1335   STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1336   STMT_VINFO_RELEVANT_P (res) = 0;
1337   STMT_VINFO_LIVE_P (res) = 0;
1338   STMT_VINFO_VECTYPE (res) = NULL;
1339   STMT_VINFO_VEC_STMT (res) = NULL;
1340   STMT_VINFO_DATA_REF (res) = NULL;
1341   if (TREE_CODE (stmt) == PHI_NODE)
1342     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
1343   else
1344     STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
1345   STMT_VINFO_MEMTAG (res) = NULL;
1346   STMT_VINFO_PTR_INFO (res) = NULL;
1347   STMT_VINFO_SUBVARS (res) = NULL;
1348   STMT_VINFO_VECT_DR_BASE_ADDRESS (res) = NULL;
1349   STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1350   STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1351   STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1352   STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1353   STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
1354
1355   return res;
1356 }
1357
1358
1359 /* Function new_loop_vec_info.
1360
1361    Create and initialize a new loop_vec_info struct for LOOP, as well as
1362    stmt_vec_info structs for all the stmts in LOOP.  */
1363
1364 loop_vec_info
1365 new_loop_vec_info (struct loop *loop)
1366 {
1367   loop_vec_info res;
1368   basic_block *bbs;
1369   block_stmt_iterator si;
1370   unsigned int i;
1371
1372   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1373
1374   bbs = get_loop_body (loop);
1375
1376   /* Create stmt_info for all stmts in the loop.  */
1377   for (i = 0; i < loop->num_nodes; i++)
1378     {
1379       basic_block bb = bbs[i];
1380       tree phi;
1381
1382       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1383         {
1384           tree_ann_t ann = get_tree_ann (phi);
1385           set_stmt_info (ann, new_stmt_vec_info (phi, res));
1386         }
1387
1388       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1389         {
1390           tree stmt = bsi_stmt (si);
1391           stmt_ann_t ann;
1392
1393           ann = stmt_ann (stmt);
1394           set_stmt_info ((tree_ann_t)ann, new_stmt_vec_info (stmt, res));
1395         }
1396     }
1397
1398   LOOP_VINFO_LOOP (res) = loop;
1399   LOOP_VINFO_BBS (res) = bbs;
1400   LOOP_VINFO_EXIT_COND (res) = NULL;
1401   LOOP_VINFO_NITERS (res) = NULL;
1402   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1403   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
1404   LOOP_VINFO_VECT_FACTOR (res) = 0;
1405   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1406                            "loop_write_datarefs");
1407   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1408                            "loop_read_datarefs");
1409   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1410   LOOP_VINFO_LOC (res) = UNKNOWN_LOC;
1411
1412   return res;
1413 }
1414
1415
1416 /* Function destroy_loop_vec_info.
1417
1418    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1419    stmts in the loop.  */
1420
1421 void
1422 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1423 {
1424   struct loop *loop;
1425   basic_block *bbs;
1426   int nbbs;
1427   block_stmt_iterator si;
1428   int j;
1429
1430   if (!loop_vinfo)
1431     return;
1432
1433   loop = LOOP_VINFO_LOOP (loop_vinfo);
1434
1435   bbs = LOOP_VINFO_BBS (loop_vinfo);
1436   nbbs = loop->num_nodes;
1437
1438   for (j = 0; j < nbbs; j++)
1439     {
1440       basic_block bb = bbs[j];
1441       tree phi;
1442       stmt_vec_info stmt_info;
1443
1444       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1445         {
1446           tree_ann_t ann = get_tree_ann (phi);
1447
1448           stmt_info = vinfo_for_stmt (phi);
1449           free (stmt_info);
1450           set_stmt_info (ann, NULL);
1451         }
1452
1453       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1454         {
1455           tree stmt = bsi_stmt (si);
1456           stmt_ann_t ann = stmt_ann (stmt);
1457           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1458
1459           if (stmt_info)
1460             {
1461               VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1462               free (stmt_info);
1463               set_stmt_info ((tree_ann_t)ann, NULL);
1464             }
1465         }
1466     }
1467
1468   free (LOOP_VINFO_BBS (loop_vinfo));
1469   varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1470   varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1471
1472   free (loop_vinfo);
1473 }
1474
1475
1476 /* Function vect_strip_conversions
1477
1478    Strip conversions that don't narrow the mode.  */
1479
1480 tree
1481 vect_strip_conversion (tree expr)
1482 {
1483   tree to, ti, oprnd0;
1484
1485   while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
1486     {
1487       to = TREE_TYPE (expr);
1488       oprnd0 = TREE_OPERAND (expr, 0);
1489       ti = TREE_TYPE (oprnd0);
1490
1491       if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
1492         return NULL_TREE;
1493       if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
1494         return NULL_TREE;
1495
1496       expr = oprnd0;
1497     }
1498   return expr;
1499 }
1500
1501
1502 /* Function vect_force_dr_alignment_p.
1503
1504    Returns whether the alignment of a DECL can be forced to be aligned
1505    on ALIGNMENT bit boundary.  */
1506
1507 bool
1508 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1509 {
1510   if (TREE_CODE (decl) != VAR_DECL)
1511     return false;
1512
1513   if (DECL_EXTERNAL (decl))
1514     return false;
1515
1516   if (TREE_ASM_WRITTEN (decl))
1517     return false;
1518
1519   if (TREE_STATIC (decl))
1520     return (alignment <= MAX_OFILE_ALIGNMENT);
1521   else
1522     /* This is not 100% correct.  The absolute correct stack alignment
1523        is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1524        PREFERRED_STACK_BOUNDARY is honored by all translation units.
1525        However, until someone implements forced stack alignment, SSE
1526        isn't really usable without this.  */
1527     return (alignment <= PREFERRED_STACK_BOUNDARY);
1528 }
1529
1530
1531 /* Function get_vectype_for_scalar_type.
1532
1533    Returns the vector type corresponding to SCALAR_TYPE as supported
1534    by the target.  */
1535
1536 tree
1537 get_vectype_for_scalar_type (tree scalar_type)
1538 {
1539   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1540   int nbytes = GET_MODE_SIZE (inner_mode);
1541   int nunits;
1542   tree vectype;
1543
1544   if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD)
1545     return NULL_TREE;
1546
1547   /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1548      is expected.  */
1549   nunits = UNITS_PER_SIMD_WORD / nbytes;
1550
1551   vectype = build_vector_type (scalar_type, nunits);
1552   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1553     {
1554       fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1555       print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1556     }
1557
1558   if (!vectype)
1559     return NULL_TREE;
1560
1561   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1562     {
1563       fprintf (vect_dump, "vectype: ");
1564       print_generic_expr (vect_dump, vectype, TDF_SLIM);
1565     }
1566
1567   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1568       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
1569     {
1570       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1571         fprintf (vect_dump, "mode not supported by target.");
1572       return NULL_TREE;
1573     }
1574
1575   return vectype;
1576 }
1577
1578
1579 /* Function vect_supportable_dr_alignment
1580
1581    Return whether the data reference DR is supported with respect to its
1582    alignment.  */
1583
1584 enum dr_alignment_support
1585 vect_supportable_dr_alignment (struct data_reference *dr)
1586 {
1587   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
1588   enum machine_mode mode = (int) TYPE_MODE (vectype);
1589
1590   if (aligned_access_p (dr))
1591     return dr_aligned;
1592
1593   /* Possibly unaligned access.  */
1594
1595   if (DR_IS_READ (dr))
1596     {
1597       if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1598           && (!targetm.vectorize.builtin_mask_for_load
1599               || targetm.vectorize.builtin_mask_for_load ()))
1600         return dr_unaligned_software_pipeline;
1601
1602       if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1603         /* Can't software pipeline the loads, but can at least do them.  */
1604         return dr_unaligned_supported;
1605     }
1606
1607   /* Unsupported.  */
1608   return dr_unaligned_unsupported;
1609 }
1610
1611
1612 /* Function vect_is_simple_use.
1613
1614    Input:
1615    LOOP - the loop that is being vectorized.
1616    OPERAND - operand of a stmt in LOOP.
1617    DEF - the defining stmt in case OPERAND is an SSA_NAME.
1618
1619    Returns whether a stmt with OPERAND can be vectorized.
1620    Supportable operands are constants, loop invariants, and operands that are
1621    defined by the current iteration of the loop. Unsupportable operands are
1622    those that are defined by a previous iteration of the loop (as is the case
1623    in reduction/induction computations).  */
1624
1625 bool
1626 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
1627                     tree *def, enum vect_def_type *dt)
1628 {
1629   basic_block bb;
1630   stmt_vec_info stmt_vinfo;
1631   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1632
1633   *def_stmt = NULL_TREE;
1634   *def = NULL_TREE;
1635
1636   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1637     {
1638       fprintf (vect_dump, "vect_is_simple_use: operand ");
1639       print_generic_expr (vect_dump, operand, TDF_SLIM);
1640     }
1641
1642   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1643     {
1644       *dt = vect_constant_def;
1645       return true;
1646     }
1647
1648   if (TREE_CODE (operand) != SSA_NAME)
1649     {
1650       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1651         fprintf (vect_dump, "not ssa-name.");
1652       return false;
1653     }
1654
1655   *def_stmt = SSA_NAME_DEF_STMT (operand);
1656   if (*def_stmt == NULL_TREE )
1657     {
1658       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1659         fprintf (vect_dump, "no def_stmt.");
1660       return false;
1661     }
1662
1663   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1664     {
1665       fprintf (vect_dump, "def_stmt: ");
1666       print_generic_expr (vect_dump, *def_stmt, TDF_SLIM);
1667     }
1668
1669   /* empty stmt is expected only in case of a function argument.
1670      (Otherwise - we expect a phi_node or a modify_expr).  */
1671   if (IS_EMPTY_STMT (*def_stmt))
1672     {
1673       tree arg = TREE_OPERAND (*def_stmt, 0);
1674       if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1675         {
1676           *def = operand;
1677           *dt = vect_invariant_def;
1678           return true;
1679         }
1680
1681       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1682         fprintf (vect_dump, "Unexpected empty stmt.");
1683       return false;
1684     }
1685
1686   bb = bb_for_stmt (*def_stmt);
1687   if (!flow_bb_inside_loop_p (loop, bb))
1688     *dt = vect_invariant_def;
1689   else
1690     {
1691       stmt_vinfo = vinfo_for_stmt (*def_stmt);
1692       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
1693     }
1694
1695   if (*dt == vect_unknown_def_type)
1696     {
1697       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1698         fprintf (vect_dump, "Unsupported pattern.");
1699       return false;
1700     }
1701
1702   /* stmts inside the loop that have been identified as performing
1703      a reduction operation cannot have uses in the loop.  */
1704   if (*dt == vect_reduction_def && TREE_CODE (*def_stmt) != PHI_NODE)
1705     {
1706       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1707         fprintf (vect_dump, "reduction used in loop.");
1708       return false;
1709     }
1710
1711   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1712     fprintf (vect_dump, "type of def: %d.",*dt);
1713
1714   switch (TREE_CODE (*def_stmt))
1715     {
1716     case PHI_NODE:
1717       *def = PHI_RESULT (*def_stmt);
1718       gcc_assert (*dt == vect_induction_def || *dt == vect_reduction_def
1719                   || *dt == vect_invariant_def);
1720       break;
1721
1722     case MODIFY_EXPR:
1723       *def = TREE_OPERAND (*def_stmt, 0);
1724       gcc_assert (*dt == vect_loop_def || *dt == vect_invariant_def);
1725       break;
1726
1727     default:
1728       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1729         fprintf (vect_dump, "unsupported defining stmt: ");
1730       return false;
1731     }
1732
1733   if (*dt == vect_induction_def)
1734     {
1735       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1736         fprintf (vect_dump, "induction not supported.");
1737       return false;
1738     }
1739
1740   return true;
1741 }
1742
1743
1744 /* Function reduction_code_for_scalar_code
1745
1746    Input:
1747    CODE - tree_code of a reduction operations.
1748
1749    Output:
1750    REDUC_CODE - the corresponding tree-code to be used to reduce the
1751       vector of partial results into a single scalar result (which
1752       will also reside in a vector).
1753
1754    Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise.  */
1755
1756 bool
1757 reduction_code_for_scalar_code (enum tree_code code,
1758                                 enum tree_code *reduc_code)
1759 {
1760   switch (code)
1761   {
1762   case MAX_EXPR:
1763     *reduc_code = REDUC_MAX_EXPR;
1764     return true;
1765
1766   case MIN_EXPR:
1767     *reduc_code = REDUC_MIN_EXPR;
1768     return true;
1769
1770   case PLUS_EXPR:
1771     *reduc_code = REDUC_PLUS_EXPR;
1772     return true;
1773
1774   default:
1775     return false;
1776   }
1777 }
1778
1779
1780 /* Function vect_is_simple_reduction
1781
1782    Detect a cross-iteration def-use cucle that represents a simple
1783    reduction computation. We look for the following pattern:
1784
1785    loop_header:
1786      a1 = phi < a0, a2 >
1787      a3 = ...
1788      a2 = operation (a3, a1)
1789
1790    such that:
1791    1. operation is commutative and associative and it is safe to
1792       change the the order of the computation.
1793    2. no uses for a2 in the loop (a2 is used out of the loop)
1794    3. no uses of a1 in the loop besides the reduction operation.
1795
1796    Condition 1 is tested here.
1797    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.  */
1798
1799 tree
1800 vect_is_simple_reduction (struct loop *loop ATTRIBUTE_UNUSED,
1801                           tree phi ATTRIBUTE_UNUSED)
1802 {
1803   edge latch_e = loop_latch_edge (loop);
1804   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
1805   tree def_stmt, def1, def2;
1806   enum tree_code code;
1807   int op_type;
1808   tree operation, op1, op2;
1809   tree type;
1810
1811   if (TREE_CODE (loop_arg) != SSA_NAME)
1812     {
1813       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1814         {
1815           fprintf (vect_dump, "reduction: not ssa_name: ");
1816           print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
1817         }
1818       return NULL_TREE;
1819     }
1820
1821   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
1822   if (!def_stmt)
1823     {
1824       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1825         fprintf (vect_dump, "reduction: no def_stmt.");
1826       return NULL_TREE;
1827     }
1828
1829   if (TREE_CODE (def_stmt) != MODIFY_EXPR)
1830     {
1831       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1832         {
1833           print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1834         }
1835       return NULL_TREE;
1836     }
1837
1838   operation = TREE_OPERAND (def_stmt, 1);
1839   code = TREE_CODE (operation);
1840   if (!commutative_tree_code (code) || !associative_tree_code (code))
1841     {
1842       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1843         {
1844           fprintf (vect_dump, "reduction: not commutative/associative: ");
1845           print_generic_expr (vect_dump, operation, TDF_SLIM);
1846         }
1847       return NULL_TREE;
1848     }
1849
1850   op_type = TREE_CODE_LENGTH (code);
1851   if (op_type != binary_op)
1852     {
1853       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1854         {
1855           fprintf (vect_dump, "reduction: not binary operation: ");
1856           print_generic_expr (vect_dump, operation, TDF_SLIM);
1857         }
1858       return NULL_TREE;
1859     }
1860
1861   op1 = TREE_OPERAND (operation, 0);
1862   op2 = TREE_OPERAND (operation, 1);
1863   if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
1864     {
1865       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1866         {
1867           fprintf (vect_dump, "reduction: uses not ssa_names: ");
1868           print_generic_expr (vect_dump, operation, TDF_SLIM);
1869         }
1870       return NULL_TREE;
1871     }
1872
1873   /* Check that it's ok to change the order of the computation.  */
1874   type = TREE_TYPE (operation);
1875   if (TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1))
1876       || TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2)))
1877     {
1878       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1879         {
1880           fprintf (vect_dump, "reduction: multiple types: operation type: ");
1881           print_generic_expr (vect_dump, type, TDF_SLIM);
1882           fprintf (vect_dump, ", operands types: ");
1883           print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
1884           fprintf (vect_dump, ",");
1885           print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
1886         }
1887       return NULL_TREE;
1888     }
1889
1890   /* CHECKME: check for !flag_finite_math_only too?  */
1891   if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations)
1892     {
1893       /* Changing the order of operations changes the sematics.  */
1894       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1895         {
1896           fprintf (vect_dump, "reduction: unsafe fp math optimization: ");
1897           print_generic_expr (vect_dump, operation, TDF_SLIM);
1898         }
1899       return NULL_TREE;
1900     }
1901   else if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) && flag_trapv)
1902     {
1903       /* Changing the order of operations changes the sematics.  */
1904       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1905         {
1906           fprintf (vect_dump, "reduction: unsafe int math optimization: ");
1907           print_generic_expr (vect_dump, operation, TDF_SLIM);
1908         }
1909       return NULL_TREE;
1910     }
1911
1912   /* reduction is safe. we're dealing with one of the following:
1913      1) integer arithmetic and no trapv
1914      2) floating point arithmetic, and special flags permit this optimization.
1915    */
1916   def1 = SSA_NAME_DEF_STMT (op1);
1917   def2 = SSA_NAME_DEF_STMT (op2);
1918   if (!def1 || !def2)
1919     {
1920       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1921         {
1922           fprintf (vect_dump, "reduction: no defs for operands: ");
1923           print_generic_expr (vect_dump, operation, TDF_SLIM);
1924         }
1925       return NULL_TREE;
1926     }
1927
1928   if (TREE_CODE (def1) == MODIFY_EXPR
1929       && flow_bb_inside_loop_p (loop, bb_for_stmt (def1))
1930       && def2 == phi)
1931     {
1932       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1933         {
1934           fprintf (vect_dump, "detected reduction:");
1935           print_generic_expr (vect_dump, operation, TDF_SLIM);
1936         }
1937       return def_stmt;
1938     }
1939   else if (TREE_CODE (def2) == MODIFY_EXPR
1940       && flow_bb_inside_loop_p (loop, bb_for_stmt (def2))
1941       && def1 == phi)
1942     {
1943       use_operand_p use;
1944       ssa_op_iter iter;
1945
1946       /* Swap operands (just for simplicity - so that the rest of the code
1947          can assume that the reduction variable is always the last (second)
1948          argument).  */
1949       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1950         {
1951           fprintf (vect_dump, "detected reduction: need to swap operands:");
1952           print_generic_expr (vect_dump, operation, TDF_SLIM);
1953         }
1954
1955       /* CHECKME */
1956       FOR_EACH_SSA_USE_OPERAND (use, def_stmt, iter, SSA_OP_USE)
1957         {
1958           tree tuse = USE_FROM_PTR (use);
1959           if (tuse == op1)
1960             SET_USE (use, op2);
1961           else if (tuse == op2)
1962             SET_USE (use, op1);
1963         }
1964       return def_stmt;
1965     }
1966   else
1967     {
1968       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1969         {
1970           fprintf (vect_dump, "reduction: unknown pattern.");
1971           print_generic_expr (vect_dump, operation, TDF_SLIM);
1972         }
1973       return NULL_TREE;
1974     }
1975 }
1976
1977
1978 /* Function vect_is_simple_iv_evolution.
1979
1980    FORNOW: A simple evolution of an induction variables in the loop is
1981    considered a polynomial evolution with constant step.  */
1982
1983 bool
1984 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
1985                              tree * step)
1986 {
1987   tree init_expr;
1988   tree step_expr;
1989
1990   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
1991
1992   /* When there is no evolution in this loop, the evolution function
1993      is not "simple".  */
1994   if (evolution_part == NULL_TREE)
1995     return false;
1996
1997   /* When the evolution is a polynomial of degree >= 2
1998      the evolution function is not "simple".  */
1999   if (tree_is_chrec (evolution_part))
2000     return false;
2001
2002   step_expr = evolution_part;
2003   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2004                                                            loop_nb));
2005
2006   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2007     {
2008       fprintf (vect_dump, "step: ");
2009       print_generic_expr (vect_dump, step_expr, TDF_SLIM);
2010       fprintf (vect_dump, ",  init: ");
2011       print_generic_expr (vect_dump, init_expr, TDF_SLIM);
2012     }
2013
2014   *init = init_expr;
2015   *step = step_expr;
2016
2017   if (TREE_CODE (step_expr) != INTEGER_CST)
2018     {
2019       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2020         fprintf (vect_dump, "step unknown.");
2021       return false;
2022     }
2023
2024   return true;
2025 }
2026
2027
2028 /* Function vectorize_loops.
2029
2030    Entry Point to loop vectorization phase.  */
2031
2032 void
2033 vectorize_loops (struct loops *loops)
2034 {
2035   unsigned int i;
2036   unsigned int num_vectorized_loops = 0;
2037
2038   /* Fix the verbosity level if not defined explicitly by the user.  */
2039   vect_set_dump_settings ();
2040
2041   /*  ----------- Analyze loops. -----------  */
2042
2043   /* If some loop was duplicated, it gets bigger number
2044      than all previously defined loops. This fact allows us to run
2045      only over initial loops skipping newly generated ones.  */
2046   vect_loops_num = loops->num;
2047   for (i = 1; i < vect_loops_num; i++)
2048     {
2049       loop_vec_info loop_vinfo;
2050       struct loop *loop = loops->parray[i];
2051
2052       if (!loop)
2053         continue;
2054
2055       loop_vinfo = vect_analyze_loop (loop);
2056       loop->aux = loop_vinfo;
2057
2058       if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
2059         continue;
2060
2061       vect_transform_loop (loop_vinfo, loops);
2062       num_vectorized_loops++;
2063     }
2064
2065   if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, UNKNOWN_LOC))
2066     fprintf (vect_dump, "vectorized %u loops in function.\n",
2067              num_vectorized_loops);
2068
2069   /*  ----------- Finalize. -----------  */
2070
2071   for (i = 1; i < vect_loops_num; i++)
2072     {
2073       struct loop *loop = loops->parray[i];
2074       loop_vec_info loop_vinfo;
2075
2076       if (!loop)
2077         continue;
2078       loop_vinfo = loop->aux;
2079       destroy_loop_vec_info (loop_vinfo);
2080       loop->aux = NULL;
2081     }
2082 }