gcc/tree-vectorizer.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20 02111-1307, USA.  */
  21
  22 /* Loop Vectorization Pass.
  23
  24    This pass tries to vectorize loops. This first implementation focuses on
  25    simple inner-most loops, with no conditional control flow, and a set of
  26    simple operations which vector form can be expressed using existing
  27    tree codes (PLUS, MULT etc).
  28
  29    For example, the vectorizer transforms the following simple loop:
  30
  31         short a[N]; short b[N]; short c[N]; int i;
  32
  33         for (i=0; i<N; i++){
  34           a[i] = b[i] + c[i];
  35         }
  36
  37    as if it was manually vectorized by rewriting the source code into:
  38
  39         typedef int __attribute__((mode(V8HI))) v8hi;
  40         short a[N];  short b[N]; short c[N];   int i;
  41         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  42         v8hi va, vb, vc;
  43
  44         for (i=0; i<N/8; i++){
  45           vb = pb[i];
  46           vc = pc[i];
  47           va = vb + vc;
  48           pa[i] = va;
  49         }
  50
  51         The main entry to this pass is vectorize_loops(), in which
  52    the vectorizer applies a set of analyses on a given set of loops,
  53    followed by the actual vectorization transformation for the loops that
  54    had successfully passed the analysis phase.
  55
  56         Throughout this pass we make a distinction between two types of
  57    data: scalars (which are represented by SSA_NAMES), and memory references
  58    ("data-refs"). These two types of data require different handling both
  59    during analysis and transformation. The types of data-refs that the
  60    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  61    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  62    accesses are required to have a  simple (consecutive) access pattern.
  63
  64    Analysis phase:
  65    ===============
  66         The driver for the analysis phase is vect_analyze_loop_nest().
  67    It applies a set of analyses, some of which rely on the scalar evolution
  68    analyzer (scev) developed by Sebastian Pop.
  69
  70         During the analysis phase the vectorizer records some information
  71    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  72    loop, as well as general information about the loop as a whole, which is
  73    recorded in a "loop_vec_info" struct attached to each loop.
  74
  75    Transformation phase:
  76    =====================
  77         The loop transformation phase scans all the stmts in the loop, and
  78    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  79    the loop that needs to be vectorized. It insert the vector code sequence
  80    just before the scalar stmt S, and records a pointer to the vector code
  81    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
  82    attached to S). This pointer will be used for the vectorization of following
  83    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
  84    otherwise, we rely on dead code elimination for removing it.
  85
  86         For example, say stmt S1 was vectorized into stmt VS1:
  87
  88    VS1: vb = px[i];
  89    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  90    S2:  a = b;
  91
  92    To vectorize stmt S2, the vectorizer first finds the stmt that defines
  93    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
  94    vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
  95    resulting sequence would be:
  96
  97    VS1: vb = px[i];
  98    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  99    VS2: va = vb;
 100    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 101
 102         Operands that are not SSA_NAMEs, are data-refs that appear in
 103    load/store operations (like 'x[i]' in S1), and are handled differently.
 104
 105    Target modeling:
 106    =================
 107         Currently the only target specific information that is used is the
 108    size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
 109    support different sizes of vectors, for now will need to specify one value
 110    for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
 111
 112         Since we only vectorize operations which vector form can be
 113    expressed using existing tree codes, to verify that an operation is
 114    supported, the vectorizer checks the relevant optab at the relevant
 115    machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
 116    the value found is CODE_FOR_nothing, then there's no target support, and
 117    we can't vectorize the stmt.
 118
 119    For additional information on this project see:
 120    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 121 */
 122
 123 #include "config.h"
 124 #include "system.h"
 125 #include "coretypes.h"
 126 #include "tm.h"
 127 #include "ggc.h"
 128 #include "tree.h"
 129 #include "target.h"
 130 #include "rtl.h"
 131 #include "basic-block.h"
 132 #include "diagnostic.h"
 133 #include "tree-flow.h"
 134 #include "tree-dump.h"
 135 #include "timevar.h"
 136 #include "cfgloop.h"
 137 #include "cfglayout.h"
 138 #include "expr.h"
 139 #include "optabs.h"
 140 #include "toplev.h"
 141 #include "tree-chrec.h"
 142 #include "tree-data-ref.h"
 143 #include "tree-scalar-evolution.h"
 144 #include "input.h"
 145 #include "tree-vectorizer.h"
 146 #include "tree-pass.h"
 147
 148 /*************************************************************************
 149   Simple Loop Peeling Utilities
 150  *************************************************************************/
 151 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
 152   (struct loop *, struct loops *, edge);
 153 static void slpeel_update_phis_for_duplicate_loop
 154   (struct loop *, struct loop *, bool after);
 155 static void slpeel_update_phi_nodes_for_guard1
 156   (edge, struct loop *, bool, basic_block *, bitmap *);
 157 static void slpeel_update_phi_nodes_for_guard2
 158   (edge, struct loop *, bool, basic_block *);
 159 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
 160
 161 static void rename_use_op (use_operand_p);
 162 static void rename_variables_in_bb (basic_block);
 163 static void rename_variables_in_loop (struct loop *);
 164
 165 /*************************************************************************
 166   General Vectorization Utilities
 167  *************************************************************************/
 168 static void vect_set_dump_settings (void);
 169
 170 /* vect_dump will be set to stderr or dump_file if exist.  */
 171 FILE *vect_dump;
 172
 173 /* vect_verbosity_level set to an invalid value
 174    to mark that it's uninitialized.  */
 175 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
 176
 177 /* Number of loops, at the beginning of vectorization.  */
 178 unsigned int vect_loops_num;
 179 \f
 180 /*************************************************************************
 181   Simple Loop Peeling Utilities
 182
 183   Utilities to support loop peeling for vectorization purposes.
 184  *************************************************************************/
 185
 186
 187 /* Renames the use *OP_P.  */
 188
 189 static void
 190 rename_use_op (use_operand_p op_p)
 191 {
 192   tree new_name;
 193
 194   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
 195     return;
 196
 197   new_name = get_current_def (USE_FROM_PTR (op_p));
 198
 199   /* Something defined outside of the loop.  */
 200   if (!new_name)
 201     return;
 202
 203   /* An ordinary ssa name defined in the loop.  */
 204
 205   SET_USE (op_p, new_name);
 206 }
 207
 208
 209 /* Renames the variables in basic block BB.  */
 210
 211 static void
 212 rename_variables_in_bb (basic_block bb)
 213 {
 214   tree phi;
 215   block_stmt_iterator bsi;
 216   tree stmt;
 217   use_operand_p use_p;
 218   ssa_op_iter iter;
 219   edge e;
 220   edge_iterator ei;
 221   struct loop *loop = bb->loop_father;
 222
 223   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 224     {
 225       stmt = bsi_stmt (bsi);
 226       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter,
 227                                  (SSA_OP_ALL_USES | SSA_OP_ALL_KILLS))
 228         rename_use_op (use_p);
 229     }
 230
 231   FOR_EACH_EDGE (e, ei, bb->succs)
 232     {
 233       if (!flow_bb_inside_loop_p (loop, e->dest))
 234         continue;
 235       for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
 236         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
 237     }
 238 }
 239
 240
 241 /* Renames variables in new generated LOOP.  */
 242
 243 static void
 244 rename_variables_in_loop (struct loop *loop)
 245 {
 246   unsigned i;
 247   basic_block *bbs;
 248
 249   bbs = get_loop_body (loop);
 250
 251   for (i = 0; i < loop->num_nodes; i++)
 252     rename_variables_in_bb (bbs[i]);
 253
 254   free (bbs);
 255 }
 256
 257
 258 /* Update the PHI nodes of NEW_LOOP.
 259
 260    NEW_LOOP is a duplicate of ORIG_LOOP.
 261    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 262    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 263    executes before it.  */
 264
 265 static void
 266 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 267                                        struct loop *new_loop, bool after)
 268 {
 269   tree new_ssa_name;
 270   tree phi_new, phi_orig;
 271   tree def;
 272   edge orig_loop_latch = loop_latch_edge (orig_loop);
 273   edge orig_entry_e = loop_preheader_edge (orig_loop);
 274   edge new_loop_exit_e = new_loop->single_exit;
 275   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 276   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 277
 278   /*
 279      step 1. For each loop-header-phi:
 280              Add the first phi argument for the phi in NEW_LOOP
 281             (the one associated with the entry of NEW_LOOP)
 282
 283      step 2. For each loop-header-phi:
 284              Add the second phi argument for the phi in NEW_LOOP
 285             (the one associated with the latch of NEW_LOOP)
 286
 287      step 3. Update the phis in the successor block of NEW_LOOP.
 288
 289         case 1: NEW_LOOP was placed before ORIG_LOOP:
 290                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 291                 Updating the phis in the successor block can therefore be done
 292                 along with the scanning of the loop header phis, because the
 293                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 294                 phi nodes, organized in the same order.
 295
 296         case 2: NEW_LOOP was placed after ORIG_LOOP:
 297                 The successor block of NEW_LOOP is the original exit block of
 298                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 299                 We postpone updating these phis to a later stage (when
 300                 loop guards are added).
 301    */
 302
 303
 304   /* Scan the phis in the headers of the old and new loops
 305      (they are organized in exactly the same order).  */
 306
 307   for (phi_new = phi_nodes (new_loop->header),
 308        phi_orig = phi_nodes (orig_loop->header);
 309        phi_new && phi_orig;
 310        phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
 311     {
 312       /* step 1.  */
 313       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 314       add_phi_arg (phi_new, def, new_loop_entry_e);
 315
 316       /* step 2.  */
 317       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 318       if (TREE_CODE (def) != SSA_NAME)
 319         continue;
 320
 321       new_ssa_name = get_current_def (def);
 322       if (!new_ssa_name)
 323         /* Something defined outside of the loop.  */
 324         continue;
 325
 326       /* An ordinary ssa name defined in the loop.  */
 327       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
 328
 329       /* step 3 (case 1).  */
 330       if (!after)
 331         {
 332           gcc_assert (new_loop_exit_e == orig_entry_e);
 333           SET_PHI_ARG_DEF (phi_orig,
 334                            new_loop_exit_e->dest_idx,
 335                            new_ssa_name);
 336         }
 337     }
 338 }
 339
 340
 341 /* Update PHI nodes for a guard of the LOOP.
 342
 343    Input:
 344    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 345         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 346         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 347         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 348         We denote this bb NEW_MERGE_BB because before the guard code was added
 349         it had a single predecessor (the LOOP header), and now it became a merge
 350         point of two paths - the path that ends with the LOOP exit-edge, and
 351         the path that ends with GUARD_EDGE.
 352    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 353         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 354
 355    ===> The CFG before the guard-code was added:
 356         LOOP_header_bb:
 357           loop_body
 358           if (exit_loop) goto update_bb
 359           else           goto LOOP_header_bb
 360         update_bb:
 361
 362    ==> The CFG after the guard-code was added:
 363         guard_bb:
 364           if (LOOP_guard_condition) goto new_merge_bb
 365           else                      goto LOOP_header_bb
 366         LOOP_header_bb:
 367           loop_body
 368           if (exit_loop_condition) goto new_merge_bb
 369           else                     goto LOOP_header_bb
 370         new_merge_bb:
 371           goto update_bb
 372         update_bb:
 373
 374    ==> The CFG after this function:
 375         guard_bb:
 376           if (LOOP_guard_condition) goto new_merge_bb
 377           else                      goto LOOP_header_bb
 378         LOOP_header_bb:
 379           loop_body
 380           if (exit_loop_condition) goto new_exit_bb
 381           else                     goto LOOP_header_bb
 382         new_exit_bb:
 383         new_merge_bb:
 384           goto update_bb
 385         update_bb:
 386
 387    This function:
 388    1. creates and updates the relevant phi nodes to account for the new
 389       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 390       1.1. Create phi nodes at NEW_MERGE_BB.
 391       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 392            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 393    2. preserves loop-closed-ssa-form by creating the required phi nodes
 394       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 395
 396    There are two flavors to this function:
 397
 398    slpeel_update_phi_nodes_for_guard1:
 399      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 400      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 401      for variables that have phis in the loop header.
 402
 403    slpeel_update_phi_nodes_for_guard2:
 404      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 405      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 406      for variables that have phis in the loop exit.
 407
 408    I.E., the overall structure is:
 409
 410         loop1_preheader_bb:
 411                 guard1 (goto loop1/merg1_bb)
 412         loop1
 413         loop1_exit_bb:
 414                 guard2 (goto merge1_bb/merge2_bb)
 415         merge1_bb
 416         loop2
 417         loop2_exit_bb
 418         merge2_bb
 419         next_bb
 420
 421    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 422    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 423    that have phis in loop1->header).
 424
 425    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 426    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 427    that have phis in next_bb). It also adds some of these phis to
 428    loop1_exit_bb.
 429
 430    slpeel_update_phi_nodes_for_guard1 is always called before
 431    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 432    to create correct data-flow and loop-closed-ssa-form.
 433
 434    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 435    that change between iterations of a loop (and therefore have a phi-node
 436    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 437    phis for variables that are used out of the loop (and therefore have
 438    loop-closed exit phis). Some variables may be both updated between
 439    iterations and used after the loop. This is why in loop1_exit_bb we
 440    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 441    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 442
 443    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 444      an original loop. i.e., we have:
 445
 446            orig_loop
 447            guard_bb (goto LOOP/new_merge)
 448            new_loop <-- LOOP
 449            new_exit
 450            new_merge
 451            next_bb
 452
 453      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 454      have:
 455
 456            new_loop
 457            guard_bb (goto LOOP/new_merge)
 458            orig_loop <-- LOOP
 459            new_exit
 460            new_merge
 461            next_bb
 462
 463      The SSA names defined in the original loop have a current
 464      reaching definition that that records the corresponding new
 465      ssa-name used in the new duplicated loop copy.
 466   */
 467
 468 /* Function slpeel_update_phi_nodes_for_guard1
 469
 470    Input:
 471    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 472    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 473             information.
 474
 475    In the context of the overall structure, we have:
 476
 477         loop1_preheader_bb:
 478                 guard1 (goto loop1/merg1_bb)
 479 LOOP->  loop1
 480         loop1_exit_bb:
 481                 guard2 (goto merge1_bb/merge2_bb)
 482         merge1_bb
 483         loop2
 484         loop2_exit_bb
 485         merge2_bb
 486         next_bb
 487
 488    For each name updated between loop iterations (i.e - for each name that has
 489    an entry (loop-header) phi in LOOP) we create a new phi in:
 490    1. merge1_bb (to account for the edge from guard1)
 491    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 492 */
 493
 494 static void
 495 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 496                                     bool is_new_loop, basic_block *new_exit_bb,
 497                                     bitmap *defs)
 498 {
 499   tree orig_phi, new_phi;
 500   tree update_phi, update_phi2;
 501   tree guard_arg, loop_arg;
 502   basic_block new_merge_bb = guard_edge->dest;
 503   edge e = EDGE_SUCC (new_merge_bb, 0);
 504   basic_block update_bb = e->dest;
 505   basic_block orig_bb = loop->header;
 506   edge new_exit_e;
 507   tree current_new_name;
 508
 509   /* Create new bb between loop and new_merge_bb.  */
 510   *new_exit_bb = split_edge (loop->single_exit);
 511   add_bb_to_loop (*new_exit_bb, loop->outer);
 512
 513   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 514
 515   for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
 516        orig_phi && update_phi;
 517        orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
 518     {
 519       /** 1. Handle new-merge-point phis  **/
 520
 521       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 522       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 523                                  new_merge_bb);
 524
 525       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 526             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 527       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 528       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 529
 530       add_phi_arg (new_phi, loop_arg, new_exit_e);
 531       add_phi_arg (new_phi, guard_arg, guard_edge);
 532
 533       /* 1.3. Update phi in successor block.  */
 534       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 535                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 536       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 537       update_phi2 = new_phi;
 538
 539
 540       /** 2. Handle loop-closed-ssa-form phis  **/
 541
 542       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 543       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 544                                  *new_exit_bb);
 545
 546       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 547       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 548
 549       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 550       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 551       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 552
 553       /* 2.4. Record the newly created name with set_current_def.
 554          We want to find a name such that
 555                 name = get_current_def (orig_loop_name)
 556          and to set its current definition as follows:
 557                 set_current_def (name, new_phi_name)
 558
 559          If LOOP is a new loop then loop_arg is already the name we're
 560          looking for. If LOOP is the original loop, then loop_arg is
 561          the orig_loop_name and the relevant name is recorded in its
 562          current reaching definition.  */
 563       if (is_new_loop)
 564         current_new_name = loop_arg;
 565       else
 566         {
 567           current_new_name = get_current_def (loop_arg);
 568           gcc_assert (current_new_name);
 569         }
 570 #ifdef ENABLE_CHECKING
 571       gcc_assert (get_current_def (current_new_name) == NULL_TREE);
 572 #endif
 573
 574       set_current_def (current_new_name, PHI_RESULT (new_phi));
 575       bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
 576     }
 577
 578   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 579 }
 580
 581
 582 /* Function slpeel_update_phi_nodes_for_guard2
 583
 584    Input:
 585    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 586
 587    In the context of the overall structure, we have:
 588
 589         loop1_preheader_bb:
 590                 guard1 (goto loop1/merg1_bb)
 591         loop1
 592         loop1_exit_bb:
 593                 guard2 (goto merge1_bb/merge2_bb)
 594         merge1_bb
 595 LOOP->  loop2
 596         loop2_exit_bb
 597         merge2_bb
 598         next_bb
 599
 600    For each name used out side the loop (i.e - for each name that has an exit
 601    phi in next_bb) we create a new phi in:
 602    1. merge2_bb (to account for the edge from guard_bb)
 603    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 604    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 605       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 606 */
 607
 608 static void
 609 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 610                                     bool is_new_loop, basic_block *new_exit_bb)
 611 {
 612   tree orig_phi, new_phi;
 613   tree update_phi, update_phi2;
 614   tree guard_arg, loop_arg;
 615   basic_block new_merge_bb = guard_edge->dest;
 616   edge e = EDGE_SUCC (new_merge_bb, 0);
 617   basic_block update_bb = e->dest;
 618   edge new_exit_e;
 619   tree orig_def, orig_def_new_name;
 620   tree new_name, new_name2;
 621   tree arg;
 622
 623   /* Create new bb between loop and new_merge_bb.  */
 624   *new_exit_bb = split_edge (loop->single_exit);
 625   add_bb_to_loop (*new_exit_bb, loop->outer);
 626
 627   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 628
 629   for (update_phi = phi_nodes (update_bb); update_phi;
 630        update_phi = PHI_CHAIN (update_phi))
 631     {
 632       orig_phi = update_phi;
 633       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 634       orig_def_new_name = get_current_def (orig_def);
 635       arg = NULL_TREE;
 636
 637       /** 1. Handle new-merge-point phis  **/
 638
 639       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 640       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 641                                  new_merge_bb);
 642
 643       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 644             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 645       new_name = orig_def;
 646       new_name2 = NULL_TREE;
 647       if (orig_def_new_name)
 648         {
 649           new_name = orig_def_new_name;
 650           /* Some variables have both loop-entry-phis and loop-exit-phis.
 651              Such variables were given yet newer names by phis placed in
 652              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 653              new_name2 = get_current_def (get_current_def (orig_name)).  */
 654           new_name2 = get_current_def (new_name);
 655         }
 656
 657       if (is_new_loop)
 658         {
 659           guard_arg = orig_def;
 660           loop_arg = new_name;
 661         }
 662       else
 663         {
 664           guard_arg = new_name;
 665           loop_arg = orig_def;
 666         }
 667       if (new_name2)
 668         guard_arg = new_name2;
 669
 670       add_phi_arg (new_phi, loop_arg, new_exit_e);
 671       add_phi_arg (new_phi, guard_arg, guard_edge);
 672
 673       /* 1.3. Update phi in successor block.  */
 674       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 675       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 676       update_phi2 = new_phi;
 677
 678
 679       /** 2. Handle loop-closed-ssa-form phis  **/
 680
 681       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 682       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 683                                  *new_exit_bb);
 684
 685       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 686       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 687
 688       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 689       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 690       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 691
 692
 693       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 694
 695       /* 3.1. Find the relevant names that need an exit-phi in
 696          GUARD_BB, i.e. names for which
 697          slpeel_update_phi_nodes_for_guard1 had not already created a
 698          phi node. This is the case for names that are used outside
 699          the loop (and therefore need an exit phi) but are not updated
 700          across loop iterations (and therefore don't have a
 701          loop-header-phi).
 702
 703          slpeel_update_phi_nodes_for_guard1 is responsible for
 704          creating loop-exit phis in GUARD_BB for names that have a
 705          loop-header-phi.  When such a phi is created we also record
 706          the new name in its current definition.  If this new name
 707          exists, then guard_arg was set to this new name (see 1.2
 708          above).  Therefore, if guard_arg is not this new name, this
 709          is an indication that an exit-phi in GUARD_BB was not yet
 710          created, so we take care of it here.  */
 711       if (guard_arg == new_name2)
 712         continue;
 713       arg = guard_arg;
 714
 715       /* 3.2. Generate new phi node in GUARD_BB:  */
 716       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 717                                  guard_edge->src);
 718
 719       /* 3.3. GUARD_BB has one incoming edge:  */
 720       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 721       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
 722
 723       /* 3.4. Update phi in successor of GUARD_BB:  */
 724       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 725                                                                 == guard_arg);
 726       SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
 727     }
 728
 729   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 730 }
 731
 732
 733 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 734    that starts at zero, increases by one and its limit is NITERS.
 735
 736    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 737
 738 void
 739 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 740 {
 741   tree indx_before_incr, indx_after_incr, cond_stmt, cond;
 742   tree orig_cond;
 743   edge exit_edge = loop->single_exit;
 744   block_stmt_iterator loop_cond_bsi;
 745   block_stmt_iterator incr_bsi;
 746   bool insert_after;
 747   tree begin_label = tree_block_label (loop->latch);
 748   tree exit_label = tree_block_label (loop->single_exit->dest);
 749   tree init = build_int_cst (TREE_TYPE (niters), 0);
 750   tree step = build_int_cst (TREE_TYPE (niters), 1);
 751   tree then_label;
 752   tree else_label;
 753   LOC loop_loc;
 754
 755   orig_cond = get_loop_exit_condition (loop);
 756 #ifdef ENABLE_CHECKING
 757   gcc_assert (orig_cond);
 758 #endif
 759   loop_cond_bsi = bsi_for_stmt (orig_cond);
 760
 761   standard_iv_increment_position (loop, &incr_bsi, &insert_after);
 762   create_iv (init, step, NULL_TREE, loop,
 763              &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
 764
 765   if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
 766     {
 767       cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
 768       then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 769       else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 770     }
 771   else /* 'then' edge loops back.  */
 772     {
 773       cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
 774       then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 775       else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 776     }
 777
 778   cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
 779                      then_label, else_label);
 780   bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
 781
 782   /* Remove old loop exit test:  */
 783   bsi_remove (&loop_cond_bsi);
 784
 785   loop_loc = find_loop_location (loop);
 786   if (dump_file && (dump_flags & TDF_DETAILS))
 787     {
 788       if (loop_loc != UNKNOWN_LOC)
 789         fprintf (dump_file, "\nloop at %s:%d: ",
 790                  LOC_FILE (loop_loc), LOC_LINE (loop_loc));
 791       print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
 792     }
 793
 794   loop->nb_iterations = niters;
 795 }
 796
 797
 798 /* Given LOOP this function generates a new copy of it and puts it
 799    on E which is either the entry or exit of LOOP.  */
 800
 801 static struct loop *
 802 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
 803                                         edge e)
 804 {
 805   struct loop *new_loop;
 806   basic_block *new_bbs, *bbs;
 807   bool at_exit;
 808   bool was_imm_dom;
 809   basic_block exit_dest;
 810   tree phi, phi_arg;
 811
 812   at_exit = (e == loop->single_exit);
 813   if (!at_exit && e != loop_preheader_edge (loop))
 814     return NULL;
 815
 816   bbs = get_loop_body (loop);
 817
 818   /* Check whether duplication is possible.  */
 819   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 820     {
 821       free (bbs);
 822       return NULL;
 823     }
 824
 825   /* Generate new loop structure.  */
 826   new_loop = duplicate_loop (loops, loop, loop->outer);
 827   if (!new_loop)
 828     {
 829       free (bbs);
 830       return NULL;
 831     }
 832
 833   exit_dest = loop->single_exit->dest;
 834   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 835                                           exit_dest) == loop->header ?
 836                  true : false);
 837
 838   new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
 839
 840   copy_bbs (bbs, loop->num_nodes, new_bbs,
 841             &loop->single_exit, 1, &new_loop->single_exit, NULL);
 842
 843   /* Duplicating phi args at exit bbs as coming
 844      also from exit of duplicated loop.  */
 845   for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
 846     {
 847       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit);
 848       if (phi_arg)
 849         {
 850           edge new_loop_exit_edge;
 851
 852           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 853             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 854           else
 855             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 856
 857           add_phi_arg (phi, phi_arg, new_loop_exit_edge);
 858         }
 859     }
 860
 861   if (at_exit) /* Add the loop copy at exit.  */
 862     {
 863       redirect_edge_and_branch_force (e, new_loop->header);
 864       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 865       if (was_imm_dom)
 866         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 867     }
 868   else /* Add the copy at entry.  */
 869     {
 870       edge new_exit_e;
 871       edge entry_e = loop_preheader_edge (loop);
 872       basic_block preheader = entry_e->src;
 873
 874       if (!flow_bb_inside_loop_p (new_loop,
 875                                   EDGE_SUCC (new_loop->header, 0)->dest))
 876         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 877       else
 878         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 879
 880       redirect_edge_and_branch_force (new_exit_e, loop->header);
 881       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 882                                new_exit_e->src);
 883
 884       /* We have to add phi args to the loop->header here as coming
 885          from new_exit_e edge.  */
 886       for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 887         {
 888           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 889           if (phi_arg)
 890             add_phi_arg (phi, phi_arg, new_exit_e);
 891         }
 892
 893       redirect_edge_and_branch_force (entry_e, new_loop->header);
 894       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 895     }
 896
 897   free (new_bbs);
 898   free (bbs);
 899
 900   return new_loop;
 901 }
 902
 903
 904 /* Given the condition statement COND, put it as the last statement
 905    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 906    Assumes that this is the single exit of the guarded loop.
 907    Returns the skip edge.  */
 908
 909 static edge
 910 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
 911                         basic_block dom_bb)
 912 {
 913   block_stmt_iterator bsi;
 914   edge new_e, enter_e;
 915   tree cond_stmt, then_label, else_label;
 916
 917   enter_e = EDGE_SUCC (guard_bb, 0);
 918   enter_e->flags &= ~EDGE_FALLTHRU;
 919   enter_e->flags |= EDGE_FALSE_VALUE;
 920   bsi = bsi_last (guard_bb);
 921
 922   then_label = build1 (GOTO_EXPR, void_type_node,
 923                        tree_block_label (exit_bb));
 924   else_label = build1 (GOTO_EXPR, void_type_node,
 925                        tree_block_label (enter_e->dest));
 926   cond_stmt = build3 (COND_EXPR, void_type_node, cond,
 927                      then_label, else_label);
 928   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
 929   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 930   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 931   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 932   return new_e;
 933 }
 934
 935
 936 /* This function verifies that the following restrictions apply to LOOP:
 937    (1) it is innermost
 938    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 939    (3) it is single entry, single exit
 940    (4) its exit condition is the last stmt in the header
 941    (5) E is the entry/exit edge of LOOP.
 942  */
 943
 944 bool
 945 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
 946 {
 947   edge exit_e = loop->single_exit;
 948   edge entry_e = loop_preheader_edge (loop);
 949   tree orig_cond = get_loop_exit_condition (loop);
 950   block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
 951
 952   if (need_ssa_update_p ())
 953     return false;
 954
 955   if (loop->inner
 956       /* All loops have an outer scope; the only case loop->outer is NULL is for
 957          the function itself.  */
 958       || !loop->outer
 959       || loop->num_nodes != 2
 960       || !empty_block_p (loop->latch)
 961       || !loop->single_exit
 962       /* Verify that new loop exit condition can be trivially modified.  */
 963       || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
 964       || (e != exit_e && e != entry_e))
 965     return false;
 966
 967   return true;
 968 }
 969
 970 #ifdef ENABLE_CHECKING
 971 void
 972 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 973                                  struct loop *second_loop)
 974 {
 975   basic_block loop1_exit_bb = first_loop->single_exit->dest;
 976   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
 977   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
 978
 979   /* A guard that controls whether the second_loop is to be executed or skipped
 980      is placed in first_loop->exit.  first_loopt->exit therefore has two
 981      successors - one is the preheader of second_loop, and the other is a bb
 982      after second_loop.
 983    */
 984   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
 985
 986   /* 1. Verify that one of the successors of first_loopt->exit is the preheader
 987         of second_loop.  */
 988
 989   /* The preheader of new_loop is expected to have two predecessors:
 990      first_loop->exit and the block that precedes first_loop.  */
 991
 992   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
 993               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
 994                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
 995                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
 996                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
 997
 998   /* Verify that the other successor of first_loopt->exit is after the
 999      second_loop.  */
1000   /* TODO */
1001 }
1002 #endif
1003
1004 /* Function slpeel_tree_peel_loop_to_edge.
1005
1006    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1007    that is placed on the entry (exit) edge E of LOOP. After this transformation
1008    we have two loops one after the other - first-loop iterates FIRST_NITERS
1009    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1010
1011    Input:
1012    - LOOP: the loop to be peeled.
1013    - E: the exit or entry edge of LOOP.
1014         If it is the entry edge, we peel the first iterations of LOOP. In this
1015         case first-loop is LOOP, and second-loop is the newly created loop.
1016         If it is the exit edge, we peel the last iterations of LOOP. In this
1017         case, first-loop is the newly created loop, and second-loop is LOOP.
1018    - NITERS: the number of iterations that LOOP iterates.
1019    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1020    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1021         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1022         is false, the caller of this function may want to take care of this
1023         (this can be useful if we don't want new stmts added to first-loop).
1024
1025    Output:
1026    The function returns a pointer to the new loop-copy, or NULL if it failed
1027    to perform the transformation.
1028
1029    The function generates two if-then-else guards: one before the first loop,
1030    and the other before the second loop:
1031    The first guard is:
1032      if (FIRST_NITERS == 0) then skip the first loop,
1033      and go directly to the second loop.
1034    The second guard is:
1035      if (FIRST_NITERS == NITERS) then skip the second loop.
1036
1037    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1038    FORNOW the resulting code will not be in loop-closed-ssa form.
1039 */
1040
1041 struct loop*
1042 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
1043                                edge e, tree first_niters,
1044                                tree niters, bool update_first_loop_count)
1045 {
1046   struct loop *new_loop = NULL, *first_loop, *second_loop;
1047   edge skip_e;
1048   tree pre_condition;
1049   bitmap definitions;
1050   basic_block bb_before_second_loop, bb_after_second_loop;
1051   basic_block bb_before_first_loop;
1052   basic_block bb_between_loops;
1053   basic_block new_exit_bb;
1054   edge exit_e = loop->single_exit;
1055   LOC loop_loc;
1056
1057   if (!slpeel_can_duplicate_loop_p (loop, e))
1058     return NULL;
1059
1060   /* We have to initialize cfg_hooks. Then, when calling
1061    cfg_hooks->split_edge, the function tree_split_edge
1062    is actually called and, when calling cfg_hooks->duplicate_block,
1063    the function tree_duplicate_bb is called.  */
1064   tree_register_cfg_hooks ();
1065
1066
1067   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1068         Resulting CFG would be:
1069
1070         first_loop:
1071         do {
1072         } while ...
1073
1074         second_loop:
1075         do {
1076         } while ...
1077
1078         orig_exit_bb:
1079    */
1080
1081   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1082     {
1083       loop_loc = find_loop_location (loop);
1084       if (dump_file && (dump_flags & TDF_DETAILS))
1085         {
1086           if (loop_loc != UNKNOWN_LOC)
1087             fprintf (dump_file, "\n%s:%d: note: ",
1088                      LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1089           fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1090         }
1091       return NULL;
1092     }
1093
1094   if (e == exit_e)
1095     {
1096       /* NEW_LOOP was placed after LOOP.  */
1097       first_loop = loop;
1098       second_loop = new_loop;
1099     }
1100   else
1101     {
1102       /* NEW_LOOP was placed before LOOP.  */
1103       first_loop = new_loop;
1104       second_loop = loop;
1105     }
1106
1107   definitions = ssa_names_to_replace ();
1108   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1109   rename_variables_in_loop (new_loop);
1110
1111
1112   /* 2. Add the guard that controls whether the first loop is executed.
1113         Resulting CFG would be:
1114
1115         bb_before_first_loop:
1116         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1117                                GOTO first-loop
1118
1119         first_loop:
1120         do {
1121         } while ...
1122
1123         bb_before_second_loop:
1124
1125         second_loop:
1126         do {
1127         } while ...
1128
1129         orig_exit_bb:
1130    */
1131
1132   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1133   add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1134   bb_before_second_loop = split_edge (first_loop->single_exit);
1135   add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1136
1137   pre_condition =
1138     fold (build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node));
1139   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1140                                   bb_before_second_loop, bb_before_first_loop);
1141   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1142                                       first_loop == new_loop,
1143                                       &new_exit_bb, &definitions);
1144
1145
1146   /* 3. Add the guard that controls whether the second loop is executed.
1147         Resulting CFG would be:
1148
1149         bb_before_first_loop:
1150         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1151                                GOTO first-loop
1152
1153         first_loop:
1154         do {
1155         } while ...
1156
1157         bb_between_loops:
1158         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1159                                     GOTO bb_before_second_loop
1160
1161         bb_before_second_loop:
1162
1163         second_loop:
1164         do {
1165         } while ...
1166
1167         bb_after_second_loop:
1168
1169         orig_exit_bb:
1170    */
1171
1172   bb_between_loops = new_exit_bb;
1173   bb_after_second_loop = split_edge (second_loop->single_exit);
1174   add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1175
1176   pre_condition =
1177         fold (build2 (EQ_EXPR, boolean_type_node, first_niters, niters));
1178   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1179                                   bb_after_second_loop, bb_before_first_loop);
1180   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1181                                      second_loop == new_loop, &new_exit_bb);
1182
1183   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1184    */
1185   if (update_first_loop_count)
1186     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1187
1188   BITMAP_FREE (definitions);
1189   delete_update_ssa ();
1190
1191   return new_loop;
1192 }
1193
1194 /* Function vect_get_loop_location.
1195
1196    Extract the location of the loop in the source code.
1197    If the loop is not well formed for vectorization, an estimated
1198    location is calculated.
1199    Return the loop location if succeed and NULL if not.  */
1200
1201 LOC
1202 find_loop_location (struct loop *loop)
1203 {
1204   tree node = NULL_TREE;
1205   basic_block bb;
1206   block_stmt_iterator si;
1207
1208   if (!loop)
1209     return UNKNOWN_LOC;
1210
1211   node = get_loop_exit_condition (loop);
1212
1213   if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1214       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1215     return EXPR_LOC (node);
1216
1217   /* If we got here the loop is probably not "well formed",
1218      try to estimate the loop location */
1219
1220   if (!loop->header)
1221     return UNKNOWN_LOC;
1222
1223   bb = loop->header;
1224
1225   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1226     {
1227       node = bsi_stmt (si);
1228       if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1229         return EXPR_LOC (node);
1230     }
1231
1232   return UNKNOWN_LOC;
1233 }
1234
1235
1236 /*************************************************************************
1237   Vectorization Debug Information.
1238  *************************************************************************/
1239
1240 /* Function vect_set_verbosity_level.
1241
1242    Called from toplev.c upon detection of the
1243    -ftree-vectorizer-verbose=N option.  */
1244
1245 void
1246 vect_set_verbosity_level (const char *val)
1247 {
1248    unsigned int vl;
1249
1250    vl = atoi (val);
1251    if (vl < MAX_VERBOSITY_LEVEL)
1252      vect_verbosity_level = vl;
1253    else
1254      vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1255 }
1256
1257
1258 /* Function vect_set_dump_settings.
1259
1260    Fix the verbosity level of the vectorizer if the
1261    requested level was not set explicitly using the flag
1262    -ftree-vectorizer-verbose=N.
1263    Decide where to print the debugging information (dump_file/stderr).
1264    If the user defined the verbosity level, but there is no dump file,
1265    print to stderr, otherwise print to the dump file.  */
1266
1267 static void
1268 vect_set_dump_settings (void)
1269 {
1270   vect_dump = dump_file;
1271
1272   /* Check if the verbosity level was defined by the user:  */
1273   if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1274     {
1275       /* If there is no dump file, print to stderr.  */
1276       if (!dump_file)
1277         vect_dump = stderr;
1278       return;
1279     }
1280
1281   /* User didn't specify verbosity level:  */
1282   if (dump_file && (dump_flags & TDF_DETAILS))
1283     vect_verbosity_level = REPORT_DETAILS;
1284   else if (dump_file && (dump_flags & TDF_STATS))
1285     vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1286   else
1287     vect_verbosity_level = REPORT_NONE;
1288
1289   gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1290 }
1291
1292
1293 /* Function debug_loop_details.
1294
1295    For vectorization debug dumps.  */
1296
1297 bool
1298 vect_print_dump_info (enum verbosity_levels vl, LOC loc)
1299 {
1300   if (vl > vect_verbosity_level)
1301     return false;
1302
1303   if (loc == UNKNOWN_LOC)
1304     fprintf (vect_dump, "\n%s:%d: note: ",
1305                  DECL_SOURCE_FILE (current_function_decl),
1306                  DECL_SOURCE_LINE (current_function_decl));
1307   else
1308     fprintf (vect_dump, "\n%s:%d: note: ", LOC_FILE (loc), LOC_LINE (loc));
1309
1310
1311   return true;
1312 }
1313
1314
1315 /*************************************************************************
1316   Vectorization Utilities.
1317  *************************************************************************/
1318
1319 /* Function new_stmt_vec_info.
1320
1321    Create and initialize a new stmt_vec_info struct for STMT.  */
1322
1323 stmt_vec_info
1324 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1325 {
1326   stmt_vec_info res;
1327   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1328
1329   STMT_VINFO_TYPE (res) = undef_vec_info_type;
1330   STMT_VINFO_STMT (res) = stmt;
1331   STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1332   STMT_VINFO_RELEVANT_P (res) = 0;
1333   STMT_VINFO_VECTYPE (res) = NULL;
1334   STMT_VINFO_VEC_STMT (res) = NULL;
1335   STMT_VINFO_DATA_REF (res) = NULL;
1336   STMT_VINFO_MEMTAG (res) = NULL;
1337   STMT_VINFO_PTR_INFO (res) = NULL;
1338   STMT_VINFO_SUBVARS (res) = NULL;
1339   STMT_VINFO_VECT_DR_BASE_ADDRESS (res) = NULL;
1340   STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1341   STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1342   STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1343   STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1344
1345   return res;
1346 }
1347
1348
1349 /* Function new_loop_vec_info.
1350
1351    Create and initialize a new loop_vec_info struct for LOOP, as well as
1352    stmt_vec_info structs for all the stmts in LOOP.  */
1353
1354 loop_vec_info
1355 new_loop_vec_info (struct loop *loop)
1356 {
1357   loop_vec_info res;
1358   basic_block *bbs;
1359   block_stmt_iterator si;
1360   unsigned int i;
1361
1362   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1363
1364   bbs = get_loop_body (loop);
1365
1366   /* Create stmt_info for all stmts in the loop.  */
1367   for (i = 0; i < loop->num_nodes; i++)
1368     {
1369       basic_block bb = bbs[i];
1370       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1371         {
1372           tree stmt = bsi_stmt (si);
1373           stmt_ann_t ann;
1374
1375           ann = stmt_ann (stmt);
1376           set_stmt_info (ann, new_stmt_vec_info (stmt, res));
1377         }
1378     }
1379
1380   LOOP_VINFO_LOOP (res) = loop;
1381   LOOP_VINFO_BBS (res) = bbs;
1382   LOOP_VINFO_EXIT_COND (res) = NULL;
1383   LOOP_VINFO_NITERS (res) = NULL;
1384   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1385   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
1386   LOOP_VINFO_VECT_FACTOR (res) = 0;
1387   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1388                            "loop_write_datarefs");
1389   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1390                            "loop_read_datarefs");
1391   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1392   LOOP_VINFO_LOC (res) = UNKNOWN_LOC;
1393
1394   return res;
1395 }
1396
1397
1398 /* Function destroy_loop_vec_info.
1399
1400    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1401    stmts in the loop.  */
1402
1403 void
1404 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1405 {
1406   struct loop *loop;
1407   basic_block *bbs;
1408   int nbbs;
1409   block_stmt_iterator si;
1410   int j;
1411
1412   if (!loop_vinfo)
1413     return;
1414
1415   loop = LOOP_VINFO_LOOP (loop_vinfo);
1416
1417   bbs = LOOP_VINFO_BBS (loop_vinfo);
1418   nbbs = loop->num_nodes;
1419
1420   for (j = 0; j < nbbs; j++)
1421     {
1422       basic_block bb = bbs[j];
1423       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1424         {
1425           tree stmt = bsi_stmt (si);
1426           stmt_ann_t ann = stmt_ann (stmt);
1427           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1428           free (stmt_info);
1429           set_stmt_info (ann, NULL);
1430         }
1431     }
1432
1433   free (LOOP_VINFO_BBS (loop_vinfo));
1434   varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1435   varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1436
1437   free (loop_vinfo);
1438 }
1439
1440
1441 /* Function vect_strip_conversions
1442
1443    Strip conversions that don't narrow the mode.  */
1444
1445 tree
1446 vect_strip_conversion (tree expr)
1447 {
1448   tree to, ti, oprnd0;
1449
1450   while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
1451     {
1452       to = TREE_TYPE (expr);
1453       oprnd0 = TREE_OPERAND (expr, 0);
1454       ti = TREE_TYPE (oprnd0);
1455
1456       if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
1457         return NULL_TREE;
1458       if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
1459         return NULL_TREE;
1460
1461       expr = oprnd0;
1462     }
1463   return expr;
1464 }
1465
1466
1467 /* Function vect_force_dr_alignment_p.
1468
1469    Returns whether the alignment of a DECL can be forced to be aligned
1470    on ALIGNMENT bit boundary.  */
1471
1472 bool
1473 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1474 {
1475   if (TREE_CODE (decl) != VAR_DECL)
1476     return false;
1477
1478   if (DECL_EXTERNAL (decl))
1479     return false;
1480
1481   if (TREE_ASM_WRITTEN (decl))
1482     return false;
1483
1484   if (TREE_STATIC (decl))
1485     return (alignment <= MAX_OFILE_ALIGNMENT);
1486   else
1487     /* This is not 100% correct.  The absolute correct stack alignment
1488        is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1489        PREFERRED_STACK_BOUNDARY is honored by all translation units.
1490        However, until someone implements forced stack alignment, SSE
1491        isn't really usable without this.  */
1492     return (alignment <= PREFERRED_STACK_BOUNDARY);
1493 }
1494
1495
1496 /* Function get_vectype_for_scalar_type.
1497
1498    Returns the vector type corresponding to SCALAR_TYPE as supported
1499    by the target.  */
1500
1501 tree
1502 get_vectype_for_scalar_type (tree scalar_type)
1503 {
1504   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1505   int nbytes = GET_MODE_SIZE (inner_mode);
1506   int nunits;
1507   tree vectype;
1508
1509   if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD)
1510     return NULL_TREE;
1511
1512   /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1513      is expected.  */
1514   nunits = UNITS_PER_SIMD_WORD / nbytes;
1515
1516   vectype = build_vector_type (scalar_type, nunits);
1517   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1518     {
1519       fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1520       print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1521     }
1522
1523   if (!vectype)
1524     return NULL_TREE;
1525
1526   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1527     {
1528       fprintf (vect_dump, "vectype: ");
1529       print_generic_expr (vect_dump, vectype, TDF_SLIM);
1530     }
1531
1532   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1533       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
1534     {
1535       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1536         fprintf (vect_dump, "mode not supported by target.");
1537       return NULL_TREE;
1538     }
1539
1540   return vectype;
1541 }
1542
1543
1544 /* Function vect_supportable_dr_alignment
1545
1546    Return whether the data reference DR is supported with respect to its
1547    alignment.  */
1548
1549 enum dr_alignment_support
1550 vect_supportable_dr_alignment (struct data_reference *dr)
1551 {
1552   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
1553   enum machine_mode mode = (int) TYPE_MODE (vectype);
1554
1555   if (aligned_access_p (dr))
1556     return dr_aligned;
1557
1558   /* Possibly unaligned access.  */
1559
1560   if (DR_IS_READ (dr))
1561     {
1562       if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1563           && (!targetm.vectorize.builtin_mask_for_load
1564               || targetm.vectorize.builtin_mask_for_load ()))
1565         return dr_unaligned_software_pipeline;
1566
1567       if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1568         /* Can't software pipeline the loads, but can at least do them.  */
1569         return dr_unaligned_supported;
1570     }
1571
1572   /* Unsupported.  */
1573   return dr_unaligned_unsupported;
1574 }
1575
1576
1577 /* Function vect_is_simple_use.
1578
1579    Input:
1580    LOOP - the loop that is being vectorized.
1581    OPERAND - operand of a stmt in LOOP.
1582    DEF - the defining stmt in case OPERAND is an SSA_NAME.
1583
1584    Returns whether a stmt with OPERAND can be vectorized.
1585    Supportable operands are constants, loop invariants, and operands that are
1586    defined by the current iteration of the loop. Unsupportable operands are
1587    those that are defined by a previous iteration of the loop (as is the case
1588    in reduction/induction computations).  */
1589
1590 bool
1591 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def)
1592 {
1593   tree def_stmt;
1594   basic_block bb;
1595   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1596
1597   if (def)
1598     *def = NULL_TREE;
1599
1600   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1601     return true;
1602
1603   if (TREE_CODE (operand) != SSA_NAME)
1604     return false;
1605
1606   def_stmt = SSA_NAME_DEF_STMT (operand);
1607   if (def_stmt == NULL_TREE )
1608     {
1609       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1610         fprintf (vect_dump, "no def_stmt.");
1611       return false;
1612     }
1613
1614   /* empty stmt is expected only in case of a function argument.
1615      (Otherwise - we expect a phi_node or a modify_expr).  */
1616   if (IS_EMPTY_STMT (def_stmt))
1617     {
1618       tree arg = TREE_OPERAND (def_stmt, 0);
1619       if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1620         return true;
1621       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1622         {
1623           fprintf (vect_dump, "Unexpected empty stmt: ");
1624           print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1625         }
1626       return false;
1627     }
1628
1629   /* phi_node inside the loop indicates an induction/reduction pattern.
1630      This is not supported yet.  */
1631   bb = bb_for_stmt (def_stmt);
1632   if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
1633     {
1634       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1635         fprintf (vect_dump, "reduction/induction - unsupported.");
1636       return false; /* FORNOW: not supported yet.  */
1637     }
1638
1639   /* Expecting a modify_expr or a phi_node.  */
1640   if (TREE_CODE (def_stmt) == MODIFY_EXPR
1641       || TREE_CODE (def_stmt) == PHI_NODE)
1642     {
1643       if (def)
1644         *def = def_stmt;
1645       return true;
1646     }
1647
1648   return false;
1649 }
1650
1651
1652 /* Function vect_is_simple_iv_evolution.
1653
1654    FORNOW: A simple evolution of an induction variables in the loop is
1655    considered a polynomial evolution with constant step.  */
1656
1657 bool
1658 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
1659                              tree * step)
1660 {
1661   tree init_expr;
1662   tree step_expr;
1663
1664   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
1665
1666   /* When there is no evolution in this loop, the evolution function
1667      is not "simple".  */
1668   if (evolution_part == NULL_TREE)
1669     return false;
1670
1671   /* When the evolution is a polynomial of degree >= 2
1672      the evolution function is not "simple".  */
1673   if (tree_is_chrec (evolution_part))
1674     return false;
1675
1676   step_expr = evolution_part;
1677   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1678                                                            loop_nb));
1679
1680   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1681     {
1682       fprintf (vect_dump, "step: ");
1683       print_generic_expr (vect_dump, step_expr, TDF_SLIM);
1684       fprintf (vect_dump, ",  init: ");
1685       print_generic_expr (vect_dump, init_expr, TDF_SLIM);
1686     }
1687
1688   *init = init_expr;
1689   *step = step_expr;
1690
1691   if (TREE_CODE (step_expr) != INTEGER_CST)
1692     {
1693       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1694         fprintf (vect_dump, "step unknown.");
1695       return false;
1696     }
1697
1698   return true;
1699 }
1700
1701
1702 /* Function vectorize_loops.
1703
1704    Entry Point to loop vectorization phase.  */
1705
1706 void
1707 vectorize_loops (struct loops *loops)
1708 {
1709   unsigned int i;
1710   unsigned int num_vectorized_loops = 0;
1711
1712   /* Fix the verbosity level if not defined explicitly by the user.  */
1713   vect_set_dump_settings ();
1714
1715   /*  ----------- Analyze loops. -----------  */
1716
1717   /* If some loop was duplicated, it gets bigger number
1718      than all previously defined loops. This fact allows us to run
1719      only over initial loops skipping newly generated ones.  */
1720   vect_loops_num = loops->num;
1721   for (i = 1; i < vect_loops_num; i++)
1722     {
1723       loop_vec_info loop_vinfo;
1724       struct loop *loop = loops->parray[i];
1725
1726       if (!loop)
1727         continue;
1728
1729       loop_vinfo = vect_analyze_loop (loop);
1730       loop->aux = loop_vinfo;
1731
1732       if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
1733         continue;
1734
1735       vect_transform_loop (loop_vinfo, loops);
1736       num_vectorized_loops++;
1737     }
1738
1739   if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, UNKNOWN_LOC))
1740     fprintf (vect_dump, "vectorized %u loops in function.\n",
1741              num_vectorized_loops);
1742
1743   /*  ----------- Finalize. -----------  */
1744
1745   for (i = 1; i < vect_loops_num; i++)
1746     {
1747       struct loop *loop = loops->parray[i];
1748       loop_vec_info loop_vinfo;
1749
1750       if (!loop)
1751         continue;
1752       loop_vinfo = loop->aux;
1753       destroy_loop_vec_info (loop_vinfo);
1754       loop->aux = NULL;
1755     }
1756 }