gcc/tree-vectorizer.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003, 2004 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20 02111-1307, USA.  */
  21
  22 /* Loop Vectorization Pass.
  23
  24    This pass tries to vectorize loops. This first implementation focuses on
  25    simple inner-most loops, with no conditional control flow, and a set of
  26    simple operations which vector form can be expressed using existing
  27    tree codes (PLUS, MULT etc).
  28
  29    For example, the vectorizer transforms the following simple loop:
  30
  31         short a[N]; short b[N]; short c[N]; int i;
  32
  33         for (i=0; i<N; i++){
  34           a[i] = b[i] + c[i];
  35         }
  36
  37    as if it was manually vectorized by rewriting the source code into:
  38
  39         typedef int __attribute__((mode(V8HI))) v8hi;
  40         short a[N];  short b[N]; short c[N];   int i;
  41         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  42         v8hi va, vb, vc;
  43
  44         for (i=0; i<N/8; i++){
  45           vb = pb[i];
  46           vc = pc[i];
  47           va = vb + vc;
  48           pa[i] = va;
  49         }
  50
  51         The main entry to this pass is vectorize_loops(), in which
  52    the vectorizer applies a set of analyses on a given set of loops,
  53    followed by the actual vectorization transformation for the loops that
  54    had successfully passed the analysis phase.
  55
  56         Throughout this pass we make a distinction between two types of
  57    data: scalars (which are represented by SSA_NAMES), and memory references
  58    ("data-refs"). These two types of data require different handling both
  59    during analysis and transformation. The types of data-refs that the
  60    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  61    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  62    accesses are required to have a  simple (consecutive) access pattern.
  63
  64    Analysis phase:
  65    ===============
  66         The driver for the analysis phase is vect_analyze_loop_nest().
  67    It applies a set of analyses, some of which rely on the scalar evolution
  68    analyzer (scev) developed by Sebastian Pop.
  69
  70         During the analysis phase the vectorizer records some information
  71    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  72    loop, as well as general information about the loop as a whole, which is
  73    recorded in a "loop_vec_info" struct attached to each loop.
  74
  75    Transformation phase:
  76    =====================
  77         The loop transformation phase scans all the stmts in the loop, and
  78    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  79    the loop that needs to be vectorized. It insert the vector code sequence
  80    just before the scalar stmt S, and records a pointer to the vector code
  81    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
  82    attached to S). This pointer will be used for the vectorization of following
  83    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
  84    otherwise, we rely on dead code elimination for removing it.
  85
  86         For example, say stmt S1 was vectorized into stmt VS1:
  87
  88    VS1: vb = px[i];
  89    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  90    S2:  a = b;
  91
  92    To vectorize stmt S2, the vectorizer first finds the stmt that defines
  93    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
  94    vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
  95    resulting sequence would be:
  96
  97    VS1: vb = px[i];
  98    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  99    VS2: va = vb;
 100    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 101
 102         Operands that are not SSA_NAMEs, are data-refs that appear in
 103    load/store operations (like 'x[i]' in S1), and are handled differently.
 104
 105    Target modeling:
 106    =================
 107         Currently the only target specific information that is used is the
 108    size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
 109    support different sizes of vectors, for now will need to specify one value
 110    for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
 111
 112         Since we only vectorize operations which vector form can be
 113    expressed using existing tree codes, to verify that an operation is
 114    supported, the vectorizer checks the relevant optab at the relevant
 115    machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
 116    the value found is CODE_FOR_nothing, then there's no target support, and
 117    we can't vectorize the stmt.
 118
 119    For additional information on this project see:
 120    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 121 */
 122
 123 #include "config.h"
 124 #include "system.h"
 125 #include "coretypes.h"
 126 #include "tm.h"
 127 #include "errors.h"
 128 #include "ggc.h"
 129 #include "tree.h"
 130 #include "target.h"
 131
 132 #include "rtl.h"
 133 #include "basic-block.h"
 134 #include "diagnostic.h"
 135 #include "tree-flow.h"
 136 #include "tree-dump.h"
 137 #include "timevar.h"
 138 #include "cfgloop.h"
 139 #include "cfglayout.h"
 140 #include "expr.h"
 141 #include "optabs.h"
 142 #include "toplev.h"
 143 #include "tree-chrec.h"
 144 #include "tree-data-ref.h"
 145 #include "tree-scalar-evolution.h"
 146 #include "tree-vectorizer.h"
 147 #include "tree-pass.h"
 148 #include "langhooks.h"
 149
 150
 151 /*************************************************************************
 152   Simple Loop Peeling Utilities
 153  *************************************************************************/
 154
 155 /* Entry point for peeling of simple loops.
 156    Peel the first/last iterations of a loop.
 157    It can be used outside of the vectorizer for loops that are simple enough
 158    (see function documentation).  In the vectorizer it is used to peel the
 159    last few iterations when the loop bound is unknown or does not evenly
 160    divide by the vectorization factor, and to peel the first few iterations
 161    to force the alignment of data references in the loop.  */
 162 struct loop *slpeel_tree_peel_loop_to_edge
 163   (struct loop *, struct loops *, edge, tree, tree, bool);
 164 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
 165   (struct loop *, struct loops *, edge);
 166 static void slpeel_update_phis_for_duplicate_loop
 167   (struct loop *, struct loop *, bool after);
 168 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
 169 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
 170 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
 171 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
 172 static void allocate_new_names (bitmap);
 173 static void rename_use_op (use_operand_p);
 174 static void rename_def_op (def_operand_p, tree);
 175 static void rename_variables_in_bb (basic_block);
 176 static void free_new_names (bitmap);
 177 static void rename_variables_in_loop (struct loop *);
 178 #ifdef ENABLE_CHECKING
 179 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
 180 #endif
 181
 182
 183 /*************************************************************************
 184   Vectorization Utilities.
 185  *************************************************************************/
 186
 187 /* Main analysis functions.  */
 188 static loop_vec_info vect_analyze_loop (struct loop *);
 189 static loop_vec_info vect_analyze_loop_form (struct loop *);
 190 static bool vect_analyze_data_refs (loop_vec_info);
 191 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
 192 static bool vect_analyze_scalar_cycles (loop_vec_info);
 193 static bool vect_analyze_data_ref_accesses (loop_vec_info);
 194 static bool vect_analyze_data_refs_alignment (loop_vec_info);
 195 static bool vect_compute_data_refs_alignment (loop_vec_info);
 196 static bool vect_analyze_operations (loop_vec_info);
 197
 198 /* Main code transformation functions.  */
 199 static void vect_transform_loop (loop_vec_info, struct loops *);
 200 static bool vect_transform_stmt (tree, block_stmt_iterator *);
 201 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
 202 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
 203 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
 204 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
 205 static enum dr_alignment_support vect_supportable_dr_alignment
 206   (struct data_reference *);
 207 static void vect_align_data_ref (tree);
 208 static void vect_enhance_data_refs_alignment (loop_vec_info);
 209
 210 /* Utility functions for the analyses.  */
 211 static bool vect_is_simple_use (tree , struct loop *, tree *);
 212 static bool exist_non_indexing_operands_for_use_p (tree, tree);
 213 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
 214 static void vect_mark_relevant (varray_type *, tree);
 215 static bool vect_stmt_relevant_p (tree, loop_vec_info);
 216 static tree vect_get_loop_niters (struct loop *, tree *);
 217 static bool vect_compute_data_ref_alignment (struct data_reference *);
 218 static bool vect_analyze_data_ref_access (struct data_reference *);
 219 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
 220 static struct data_reference * vect_analyze_pointer_ref_access
 221   (tree, tree, bool);
 222 static bool vect_can_advance_ivs_p (struct loop *);
 223 static tree vect_get_base_and_offset (struct data_reference *, tree, tree,
 224                                       loop_vec_info, tree *, tree *, tree *,
 225                                       bool*);
 226 static struct data_reference * vect_analyze_pointer_ref_access
 227   (tree, tree, bool);
 228 static tree vect_get_ptr_offset (tree, tree, tree *);
 229 static tree vect_get_memtag_and_dr
 230   (tree, tree, bool, loop_vec_info, tree, struct data_reference **);
 231 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
 232                                       tree *, tree *);
 233
 234 /* Utility functions for the code transformation.  */
 235 static tree vect_create_destination_var (tree, tree);
 236 static tree vect_create_data_ref_ptr
 237   (tree, block_stmt_iterator *, tree, tree *, bool);
 238 static tree vect_create_index_for_vector_ref
 239   (struct loop *, block_stmt_iterator *);
 240 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
 241 static tree get_vectype_for_scalar_type (tree);
 242 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
 243 static tree vect_get_vec_def_for_operand (tree, tree);
 244 static tree vect_init_vector (tree, tree);
 245 static void vect_finish_stmt_generation
 246   (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
 247
 248 /* Utility function dealing with loop peeling (not peeling itself).  */
 249 static void vect_generate_tmps_on_preheader
 250   (loop_vec_info, tree *, tree *, tree *);
 251 static tree vect_build_loop_niters (loop_vec_info);
 252 static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge);
 253 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
 254 static void vect_update_inits_of_dr (struct data_reference *, tree niters);
 255 static void vect_update_inits_of_drs (loop_vec_info, tree);
 256 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
 257 static void vect_do_peeling_for_loop_bound
 258   (loop_vec_info, tree *, struct loops *);
 259
 260 /* Utilities for creation and deletion of vec_info structs.  */
 261 loop_vec_info new_loop_vec_info (struct loop *loop);
 262 void destroy_loop_vec_info (loop_vec_info);
 263 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
 264
 265 static bool vect_debug_stats (struct loop *loop);
 266 static bool vect_debug_details (struct loop *loop);
 267
 268 \f
 269 /*************************************************************************
 270   Simple Loop Peeling Utilities
 271
 272   Utilities to support loop peeling for vectorization purposes.
 273  *************************************************************************/
 274
 275
 276 /* For each definition in DEFINITIONS this function allocates
 277    new ssa name.  */
 278
 279 static void
 280 allocate_new_names (bitmap definitions)
 281 {
 282   unsigned ver;
 283   bitmap_iterator bi;
 284
 285   EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
 286     {
 287       tree def = ssa_name (ver);
 288       tree *new_name_ptr = xmalloc (sizeof (tree));
 289
 290       bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
 291
 292       *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
 293       SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
 294
 295       SSA_NAME_AUX (def) = new_name_ptr;
 296     }
 297 }
 298
 299
 300 /* Renames the use *OP_P.  */
 301
 302 static void
 303 rename_use_op (use_operand_p op_p)
 304 {
 305   tree *new_name_ptr;
 306
 307   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
 308     return;
 309
 310   new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
 311
 312   /* Something defined outside of the loop.  */
 313   if (!new_name_ptr)
 314     return;
 315
 316   /* An ordinary ssa name defined in the loop.  */
 317
 318   SET_USE (op_p, *new_name_ptr);
 319 }
 320
 321
 322 /* Renames the def *OP_P in statement STMT.  */
 323
 324 static void
 325 rename_def_op (def_operand_p op_p, tree stmt)
 326 {
 327   tree *new_name_ptr;
 328
 329   if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
 330     return;
 331
 332   new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
 333
 334   /* Something defined outside of the loop.  */
 335   if (!new_name_ptr)
 336     return;
 337
 338   /* An ordinary ssa name defined in the loop.  */
 339
 340   SET_DEF (op_p, *new_name_ptr);
 341   SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
 342 }
 343
 344
 345 /* Renames the variables in basic block BB.  */
 346
 347 static void
 348 rename_variables_in_bb (basic_block bb)
 349 {
 350   tree phi;
 351   block_stmt_iterator bsi;
 352   tree stmt;
 353   stmt_ann_t ann;
 354   use_optype uses;
 355   vuse_optype vuses;
 356   def_optype defs;
 357   v_may_def_optype v_may_defs;
 358   v_must_def_optype v_must_defs;
 359   unsigned i;
 360   edge e;
 361   edge_iterator ei;
 362   struct loop *loop = bb->loop_father;
 363
 364   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
 365     rename_def_op (PHI_RESULT_PTR (phi), phi);
 366
 367   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 368     {
 369       stmt = bsi_stmt (bsi);
 370       get_stmt_operands (stmt);
 371       ann = stmt_ann (stmt);
 372
 373       uses = USE_OPS (ann);
 374       for (i = 0; i < NUM_USES (uses); i++)
 375         rename_use_op (USE_OP_PTR (uses, i));
 376
 377       defs = DEF_OPS (ann);
 378       for (i = 0; i < NUM_DEFS (defs); i++)
 379         rename_def_op (DEF_OP_PTR (defs, i), stmt);
 380
 381       vuses = VUSE_OPS (ann);
 382       for (i = 0; i < NUM_VUSES (vuses); i++)
 383         rename_use_op (VUSE_OP_PTR (vuses, i));
 384
 385       v_may_defs = V_MAY_DEF_OPS (ann);
 386       for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
 387         {
 388           rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
 389           rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
 390         }
 391
 392       v_must_defs = V_MUST_DEF_OPS (ann);
 393       for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
 394         {
 395           rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
 396           rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
 397         }
 398     }
 399
 400   FOR_EACH_EDGE (e, ei, bb->succs)
 401     {
 402       if (!flow_bb_inside_loop_p (loop, e->dest))
 403         continue;
 404       for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
 405         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
 406     }
 407 }
 408
 409
 410 /* Releases the structures holding the new ssa names.  */
 411
 412 static void
 413 free_new_names (bitmap definitions)
 414 {
 415   unsigned ver;
 416   bitmap_iterator bi;
 417
 418   EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
 419     {
 420       tree def = ssa_name (ver);
 421
 422       if (SSA_NAME_AUX (def))
 423         {
 424           free (SSA_NAME_AUX (def));
 425           SSA_NAME_AUX (def) = NULL;
 426         }
 427     }
 428 }
 429
 430
 431 /* Renames variables in new generated LOOP.  */
 432
 433 static void
 434 rename_variables_in_loop (struct loop *loop)
 435 {
 436   unsigned i;
 437   basic_block *bbs;
 438
 439   bbs = get_loop_body (loop);
 440
 441   for (i = 0; i < loop->num_nodes; i++)
 442     rename_variables_in_bb (bbs[i]);
 443
 444   free (bbs);
 445 }
 446
 447
 448 /* Update the PHI nodes of NEW_LOOP.
 449
 450    NEW_LOOP is a duplicate of ORIG_LOOP.
 451    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 452    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 453    executes before it.  */
 454
 455 static void
 456 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 457                                        struct loop *new_loop, bool after)
 458 {
 459   tree *new_name_ptr, new_ssa_name;
 460   tree phi_new, phi_orig;
 461   tree def;
 462   edge orig_loop_latch = loop_latch_edge (orig_loop);
 463   edge orig_entry_e = loop_preheader_edge (orig_loop);
 464   edge new_loop_exit_e = new_loop->exit_edges[0];
 465   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 466   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 467
 468   /*
 469      step 1. For each loop-header-phi:
 470              Add the first phi argument for the phi in NEW_LOOP
 471             (the one associated with the entry of NEW_LOOP)
 472
 473      step 2. For each loop-header-phi:
 474              Add the second phi argument for the phi in NEW_LOOP
 475             (the one associated with the latch of NEW_LOOP)
 476
 477      step 3. Update the phis in the successor block of NEW_LOOP.
 478
 479         case 1: NEW_LOOP was placed before ORIG_LOOP:
 480                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 481                 Updating the phis in the successor block can therefore be done
 482                 along with the scanning of the loop header phis, because the
 483                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 484                 phi nodes, organized in the same order.
 485
 486         case 2: NEW_LOOP was placed after ORIG_LOOP:
 487                 The successor block of NEW_LOOP is the original exit block of
 488                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 489                 We postpone updating these phis to a later stage (when
 490                 loop guards are added).
 491    */
 492
 493
 494   /* Scan the phis in the headers of the old and new loops
 495      (they are organized in exactly the same order).  */
 496
 497   for (phi_new = phi_nodes (new_loop->header),
 498        phi_orig = phi_nodes (orig_loop->header);
 499        phi_new && phi_orig;
 500        phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
 501     {
 502       /* step 1.  */
 503       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 504       add_phi_arg (phi_new, def, new_loop_entry_e);
 505
 506       /* step 2.  */
 507       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 508       if (TREE_CODE (def) != SSA_NAME)
 509         continue;
 510
 511       new_name_ptr = SSA_NAME_AUX (def);
 512       if (!new_name_ptr)
 513         /* Something defined outside of the loop.  */
 514         continue;
 515
 516       /* An ordinary ssa name defined in the loop.  */
 517       new_ssa_name = *new_name_ptr;
 518       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
 519
 520       /* step 3 (case 1).  */
 521       if (!after)
 522         {
 523           gcc_assert (new_loop_exit_e == orig_entry_e);
 524           SET_PHI_ARG_DEF (phi_orig,
 525                            phi_arg_from_edge (phi_orig, new_loop_exit_e),
 526                            new_ssa_name);
 527         }
 528     }
 529 }
 530
 531
 532 /* Update PHI nodes for a guard of the LOOP.
 533
 534    Input:
 535    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 536         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 537         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 538         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 539         We denote this bb NEW_MERGE_BB because it had a single predecessor (the
 540         LOOP header) before the guard code was added, and now it became a merge
 541         point of two paths - the path that ends with the LOOP exit-edge, and
 542         the path that ends with GUARD_EDGE.
 543
 544         This function creates and updates the relevant phi nodes to account for
 545         the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
 546         1. Create phi nodes at NEW_MERGE_BB.
 547         2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 548            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 549            was added:
 550
 551         ===> The CFG before the guard-code was added:
 552         LOOP_header_bb:
 553           if (exit_loop) goto update_bb : LOOP_header_bb
 554         update_bb:
 555
 556         ==> The CFG after the guard-code was added:
 557         guard_bb:
 558           if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
 559         LOOP_header_bb:
 560           if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
 561         new_merge_bb:
 562           goto update_bb
 563         update_bb:
 564
 565    - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
 566         UPDATE_BB are loop entry phis, like the phis in the LOOP header,
 567         organized in the same order.
 568         If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
 569         loop exit phis.
 570
 571    - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
 572         "original" loop).  FALSE if LOOP is an original loop (not a newly
 573         created copy).  The SSA_NAME_AUX fields of the defs in the original
 574         loop are the corresponding new ssa-names used in the new duplicated
 575         loop copy.  IS_NEW_LOOP indicates which of the two args of the phi
 576         nodes in UPDATE_BB takes the original ssa-name, and which takes the
 577         new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
 578         the LOOP-exit-edge takes the new-name, and the phi-arg that is
 579         associated with GUARD_EDGE takes the original name.  If IS_NEW_LOOP is
 580         FALSE, it's the other way around.
 581   */
 582
 583 static void
 584 slpeel_update_phi_nodes_for_guard (edge guard_edge,
 585                                    struct loop *loop,
 586                                    bool entry_phis,
 587                                    bool is_new_loop)
 588 {
 589   tree orig_phi, new_phi, update_phi;
 590   tree guard_arg, loop_arg;
 591   basic_block new_merge_bb = guard_edge->dest;
 592   edge e = EDGE_SUCC (new_merge_bb, 0);
 593   basic_block update_bb = e->dest;
 594   basic_block orig_bb = (entry_phis ? loop->header : update_bb);
 595
 596   for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
 597        orig_phi && update_phi;
 598        orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
 599     {
 600       /* 1. Generate new phi node in NEW_MERGE_BB:  */
 601       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 602                                  new_merge_bb);
 603
 604       /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 605             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 606       if (entry_phis)
 607         {
 608           loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
 609                                             EDGE_SUCC (loop->latch, 0));
 610           guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
 611         }
 612       else /* exit phis */
 613         {
 614           tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 615           tree *new_name_ptr = SSA_NAME_AUX (orig_def);
 616           tree new_name;
 617
 618           if (new_name_ptr)
 619             new_name = *new_name_ptr;
 620           else
 621             /* Something defined outside of the loop  */
 622             new_name = orig_def;
 623
 624           if (is_new_loop)
 625             {
 626               guard_arg = orig_def;
 627               loop_arg = new_name;
 628             }
 629           else
 630             {
 631               guard_arg = new_name;
 632               loop_arg = orig_def;
 633             }
 634         }
 635       add_phi_arg (new_phi, loop_arg, loop->exit_edges[0]);
 636       add_phi_arg (new_phi, guard_arg, guard_edge);
 637
 638       /* 3. Update phi in successor block.  */
 639       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 640                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 641       SET_PHI_ARG_DEF (update_phi, phi_arg_from_edge (update_phi, e),
 642                        PHI_RESULT (new_phi));
 643     }
 644
 645   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 646 }
 647
 648
 649 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 650    that starts at zero, increases by one and its limit is NITERS.
 651
 652    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 653
 654 static void
 655 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 656 {
 657   tree indx_before_incr, indx_after_incr, cond_stmt, cond;
 658   tree orig_cond;
 659   edge exit_edge = loop->exit_edges[0];
 660   block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
 661   tree begin_label = tree_block_label (loop->latch);
 662   tree exit_label = tree_block_label (loop->single_exit->dest);
 663   tree init = build_int_cst (TREE_TYPE (niters), 0);
 664   tree step = build_int_cst (TREE_TYPE (niters), 1);
 665   tree then_label;
 666   tree else_label;
 667
 668   orig_cond = get_loop_exit_condition (loop);
 669   gcc_assert (orig_cond);
 670   create_iv (init, step, NULL_TREE, loop,
 671              &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
 672
 673   /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
 674      back to the exit condition statement.  */
 675   bsi_next (&loop_exit_bsi);
 676   gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
 677
 678   if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
 679     {
 680       cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
 681       then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 682       else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 683     }
 684   else /* 'then' edge loops back.  */
 685     {
 686       cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
 687       then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 688       else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 689     }
 690
 691   cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
 692                      then_label, else_label);
 693   bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
 694
 695   /* Remove old loop exit test:  */
 696   bsi_remove (&loop_exit_bsi);
 697
 698   if (vect_debug_stats (loop) || vect_debug_details (loop))
 699     print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
 700
 701   loop->nb_iterations = niters;
 702 }
 703
 704
 705 /* Given LOOP this function generates a new copy of it and puts it
 706    on E which is either the entry or exit of LOOP.  */
 707
 708 static struct loop *
 709 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
 710                                         edge e)
 711 {
 712   struct loop *new_loop;
 713   basic_block *new_bbs, *bbs;
 714   bool at_exit;
 715   bool was_imm_dom;
 716   basic_block exit_dest;
 717   tree phi, phi_arg;
 718
 719   at_exit = (e == loop->exit_edges[0]);
 720   if (!at_exit && e != loop_preheader_edge (loop))
 721     {
 722       if (dump_file && (dump_flags & TDF_DETAILS))
 723           fprintf (dump_file, "Edge is not an entry nor an exit edge.\n");
 724       return NULL;
 725     }
 726
 727   bbs = get_loop_body (loop);
 728
 729   /* Check whether duplication is possible.  */
 730   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 731     {
 732       if (vect_debug_stats (loop) || vect_debug_details (loop))
 733           fprintf (dump_file, "Cannot copy basic blocks.\n");
 734       free (bbs);
 735       return NULL;
 736     }
 737
 738   /* Generate new loop structure.  */
 739   new_loop = duplicate_loop (loops, loop, loop->outer);
 740   if (!new_loop)
 741     {
 742       if (vect_debug_stats (loop) || vect_debug_details (loop))
 743           fprintf (dump_file, "duplicate_loop returns NULL.\n");
 744       free (bbs);
 745       return NULL;
 746     }
 747
 748   exit_dest = loop->exit_edges[0]->dest;
 749   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 750                                           exit_dest) == loop->header ?
 751                  true : false);
 752
 753   new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
 754
 755   copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
 756
 757   /* Duplicating phi args at exit bbs as coming
 758      also from exit of duplicated loop.  */
 759   for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
 760     {
 761       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
 762       if (phi_arg)
 763         {
 764           edge new_loop_exit_edge;
 765
 766           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 767             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 768           else
 769             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 770
 771           add_phi_arg (phi, phi_arg, new_loop_exit_edge);
 772         }
 773     }
 774
 775   if (at_exit) /* Add the loop copy at exit.  */
 776     {
 777       redirect_edge_and_branch_force (e, new_loop->header);
 778       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 779       if (was_imm_dom)
 780         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 781     }
 782   else /* Add the copy at entry.  */
 783     {
 784       edge new_exit_e;
 785       edge entry_e = loop_preheader_edge (loop);
 786       basic_block preheader = entry_e->src;
 787
 788       if (!flow_bb_inside_loop_p (new_loop,
 789                                   EDGE_SUCC (new_loop->header, 0)->dest))
 790         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 791       else
 792         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 793
 794       redirect_edge_and_branch_force (new_exit_e, loop->header);
 795       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 796                                new_exit_e->src);
 797
 798       /* We have to add phi args to the loop->header here as coming
 799          from new_exit_e edge.  */
 800       for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 801         {
 802           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 803           if (phi_arg)
 804             add_phi_arg (phi, phi_arg, new_exit_e);
 805         }
 806
 807       redirect_edge_and_branch_force (entry_e, new_loop->header);
 808       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 809     }
 810
 811   flow_loop_scan (new_loop, LOOP_ALL);
 812   flow_loop_scan (loop, LOOP_ALL);
 813   free (new_bbs);
 814   free (bbs);
 815
 816   return new_loop;
 817 }
 818
 819
 820 /* Given the condition statement COND, put it as the last statement
 821    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 822    Assumes that this is the single exit of the guarded loop.
 823    Returns the skip edge.  */
 824
 825 static edge
 826 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
 827                         basic_block dom_bb)
 828 {
 829   block_stmt_iterator bsi;
 830   edge new_e, enter_e;
 831   tree cond_stmt, then_label, else_label;
 832
 833   enter_e = EDGE_SUCC (guard_bb, 0);
 834   enter_e->flags &= ~EDGE_FALLTHRU;
 835   enter_e->flags |= EDGE_FALSE_VALUE;
 836   bsi = bsi_last (guard_bb);
 837
 838   then_label = build1 (GOTO_EXPR, void_type_node,
 839                        tree_block_label (exit_bb));
 840   else_label = build1 (GOTO_EXPR, void_type_node,
 841                        tree_block_label (enter_e->dest));
 842   cond_stmt = build3 (COND_EXPR, void_type_node, cond,
 843                      then_label, else_label);
 844   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
 845   /* Add new edge to connect entry block to the second loop.  */
 846   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 847   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 848   return new_e;
 849 }
 850
 851
 852 /* This function verifies that the following restrictions apply to LOOP:
 853    (1) it is innermost
 854    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 855    (3) it is single entry, single exit
 856    (4) its exit condition is the last stmt in the header
 857    (5) E is the entry/exit edge of LOOP.
 858  */
 859
 860 static bool
 861 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
 862 {
 863   edge exit_e = loop->exit_edges [0];
 864   edge entry_e = loop_preheader_edge (loop);
 865   tree orig_cond = get_loop_exit_condition (loop);
 866   block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
 867
 868   if (any_marked_for_rewrite_p ())
 869     return false;
 870
 871   if (loop->inner
 872       /* All loops have an outer scope; the only case loop->outer is NULL is for
 873          the function itself.  */
 874       || !loop->outer
 875       || loop->num_nodes != 2
 876       || !empty_block_p (loop->latch)
 877       || loop->num_exits != 1
 878       || loop->num_entries != 1
 879       /* Verify that new loop exit condition can be trivially modified.  */
 880       || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
 881       || (e != exit_e && e != entry_e))
 882     return false;
 883
 884   return true;
 885 }
 886
 887 #ifdef ENABLE_CHECKING
 888 static void
 889 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 890                                  struct loop *second_loop)
 891 {
 892   basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
 893   basic_block loop2_entry_bb = second_loop->pre_header;
 894   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
 895
 896   /* A guard that controls whether the second_loop is to be executed or skipped
 897      is placed in first_loop->exit.  first_loopt->exit therefore has two
 898      successors - one is the preheader of second_loop, and the other is a bb
 899      after second_loop.
 900    */
 901   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
 902
 903
 904   /* 1. Verify that one of the successors of first_loopt->exit is the preheader
 905         of second_loop.  */
 906
 907   /* The preheader of new_loop is expected to have two predessors:
 908      first_loop->exit and the block that precedes first_loop.  */
 909
 910   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
 911               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
 912                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
 913                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
 914                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
 915
 916   /* Verify that the other successor of first_loopt->exit is after the
 917      second_loop.  */
 918   /* TODO */
 919 }
 920 #endif
 921
 922 /* Function slpeel_tree_peel_loop_to_edge.
 923
 924    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
 925    that is placed on the entry (exit) edge E of LOOP. After this transformation
 926    we have two loops one after the other - first-loop iterates FIRST_NITERS
 927    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
 928
 929    Input:
 930    - LOOP: the loop to be peeled.
 931    - E: the exit or entry edge of LOOP.
 932         If it is the entry edge, we peel the first iterations of LOOP. In this
 933         case first-loop is LOOP, and second-loop is the newly created loop.
 934         If it is the exit edge, we peel the last iterations of LOOP. In this
 935         case, first-loop is the newly created loop, and second-loop is LOOP.
 936    - NITERS: the number of iterations that LOOP iterates.
 937    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
 938    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
 939         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
 940         is false, the caller of this function may want to take care of this
 941         (this can be useful if we don't want new stmts added to first-loop).
 942
 943    Output:
 944    The function returns a pointer to the new loop-copy, or NULL if it failed
 945    to perform the transformation.
 946
 947    The function generates two if-then-else guards: one before the first loop,
 948    and the other before the second loop:
 949    The first guard is:
 950      if (FIRST_NITERS == 0) then skip the first loop,
 951      and go directly to the second loop.
 952    The second guard is:
 953      if (FIRST_NITERS == NITERS) then skip the second loop.
 954
 955    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
 956    FORNOW the resulting code will not be in loop-closed-ssa form.
 957 */
 958
 959 struct loop*
 960 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
 961                                edge e, tree first_niters,
 962                                tree niters, bool update_first_loop_count)
 963 {
 964   struct loop *new_loop = NULL, *first_loop, *second_loop;
 965   edge skip_e;
 966   tree pre_condition;
 967   bitmap definitions;
 968   basic_block bb_before_second_loop, bb_after_second_loop;
 969   basic_block bb_before_first_loop;
 970   basic_block bb_between_loops;
 971   edge exit_e = loop->exit_edges [0];
 972
 973   if (!slpeel_can_duplicate_loop_p (loop, e))
 974     return NULL;
 975
 976   /* We have to initialize cfg_hooks. Then, when calling
 977    cfg_hooks->split_edge, the function tree_split_edge
 978    is actually called and, when calling cfg_hooks->duplicate_block,
 979    the function tree_duplicate_bb is called.  */
 980   tree_register_cfg_hooks ();
 981
 982
 983   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
 984         Resulting CFG would be:
 985
 986         first_loop:
 987         do {
 988         } while ...
 989
 990         second_loop:
 991         do {
 992         } while ...
 993
 994         orig_exit_bb:
 995    */
 996
 997   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
 998     {
 999       if (vect_debug_stats (loop) || vect_debug_details (loop))
1000         fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1001       return NULL;
1002     }
1003
1004   if (e == exit_e)
1005     {
1006       /* NEW_LOOP was placed after LOOP.  */
1007       first_loop = loop;
1008       second_loop = new_loop;
1009     }
1010   else
1011     {
1012       /* NEW_LOOP was placed before LOOP.  */
1013       first_loop = new_loop;
1014       second_loop = loop;
1015     }
1016
1017   definitions = marked_ssa_names ();
1018   allocate_new_names (definitions);
1019   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1020   rename_variables_in_loop (new_loop);
1021
1022
1023   /* 2. Add the guard that controls whether the first loop is executed.
1024         Resulting CFG would be:
1025
1026         bb_before_first_loop:
1027         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1028                                GOTO first-loop
1029
1030         first_loop:
1031         do {
1032         } while ...
1033
1034         bb_before_second_loop:
1035
1036         second_loop:
1037         do {
1038         } while ...
1039
1040         orig_exit_bb:
1041    */
1042
1043   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1044   add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1045   bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1046   add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1047   flow_loop_scan (first_loop, LOOP_ALL);
1048   flow_loop_scan (second_loop, LOOP_ALL);
1049
1050   pre_condition =
1051         build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1052   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1053                                   bb_before_second_loop, bb_before_first_loop);
1054   slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1055                                      first_loop == new_loop);
1056
1057
1058   /* 3. Add the guard that controls whether the second loop is executed.
1059         Resulting CFG would be:
1060
1061         bb_before_first_loop:
1062         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1063                                GOTO first-loop
1064
1065         first_loop:
1066         do {
1067         } while ...
1068
1069         bb_between_loops:
1070         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1071                                     GOTO bb_before_second_loop
1072
1073         bb_before_second_loop:
1074
1075         second_loop:
1076         do {
1077         } while ...
1078
1079         bb_after_second_loop:
1080
1081         orig_exit_bb:
1082    */
1083
1084   bb_between_loops = split_edge (first_loop->exit_edges[0]);
1085   add_bb_to_loop (bb_between_loops, first_loop->outer);
1086   bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1087   add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1088   flow_loop_scan (first_loop, LOOP_ALL);
1089   flow_loop_scan (second_loop, LOOP_ALL);
1090
1091   pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1092   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1093                                   bb_after_second_loop, bb_before_first_loop);
1094   slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1095                                      second_loop == new_loop);
1096
1097   /* Flow loop scan does not update loop->single_exit field.  */
1098   first_loop->single_exit = first_loop->exit_edges[0];
1099   second_loop->single_exit = second_loop->exit_edges[0];
1100
1101   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1102    */
1103   if (update_first_loop_count)
1104     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1105
1106   free_new_names (definitions);
1107   BITMAP_XFREE (definitions);
1108   unmark_all_for_rewrite ();
1109
1110   return new_loop;
1111 }
1112
1113 \f
1114 /* Here the proper Vectorizer starts.  */
1115
1116 /*************************************************************************
1117   Vectorization Utilities.
1118  *************************************************************************/
1119
1120 /* Function new_stmt_vec_info.
1121
1122    Create and initialize a new stmt_vec_info struct for STMT.  */
1123
1124 stmt_vec_info
1125 new_stmt_vec_info (tree stmt, struct loop *loop)
1126 {
1127   stmt_vec_info res;
1128   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1129
1130   STMT_VINFO_TYPE (res) = undef_vec_info_type;
1131   STMT_VINFO_STMT (res) = stmt;
1132   STMT_VINFO_LOOP (res) = loop;
1133   STMT_VINFO_RELEVANT_P (res) = 0;
1134   STMT_VINFO_VECTYPE (res) = NULL;
1135   STMT_VINFO_VEC_STMT (res) = NULL;
1136   STMT_VINFO_DATA_REF (res) = NULL;
1137   STMT_VINFO_MEMTAG (res) = NULL;
1138   STMT_VINFO_VECT_DR_BASE (res) = NULL;
1139   STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1140   STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1141   STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1142   STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1143
1144   return res;
1145 }
1146
1147
1148 /* Function new_loop_vec_info.
1149
1150    Create and initialize a new loop_vec_info struct for LOOP, as well as
1151    stmt_vec_info structs for all the stmts in LOOP.  */
1152
1153 loop_vec_info
1154 new_loop_vec_info (struct loop *loop)
1155 {
1156   loop_vec_info res;
1157   basic_block *bbs;
1158   block_stmt_iterator si;
1159   unsigned int i;
1160
1161   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1162
1163   bbs = get_loop_body (loop);
1164
1165   /* Create stmt_info for all stmts in the loop.  */
1166   for (i = 0; i < loop->num_nodes; i++)
1167     {
1168       basic_block bb = bbs[i];
1169       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1170         {
1171           tree stmt = bsi_stmt (si);
1172           stmt_ann_t ann;
1173
1174           get_stmt_operands (stmt);
1175           ann = stmt_ann (stmt);
1176           set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1177         }
1178     }
1179
1180   LOOP_VINFO_LOOP (res) = loop;
1181   LOOP_VINFO_BBS (res) = bbs;
1182   LOOP_VINFO_EXIT_COND (res) = NULL;
1183   LOOP_VINFO_NITERS (res) = NULL;
1184   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1185   LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1186   LOOP_VINFO_VECT_FACTOR (res) = 0;
1187   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1188                            "loop_write_datarefs");
1189   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1190                            "loop_read_datarefs");
1191   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1192
1193   return res;
1194 }
1195
1196
1197 /* Function destroy_loop_vec_info.
1198
1199    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1200    stmts in the loop.  */
1201
1202 void
1203 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1204 {
1205   struct loop *loop;
1206   basic_block *bbs;
1207   int nbbs;
1208   block_stmt_iterator si;
1209   int j;
1210
1211   if (!loop_vinfo)
1212     return;
1213
1214   loop = LOOP_VINFO_LOOP (loop_vinfo);
1215
1216   bbs = LOOP_VINFO_BBS (loop_vinfo);
1217   nbbs = loop->num_nodes;
1218
1219   for (j = 0; j < nbbs; j++)
1220     {
1221       basic_block bb = bbs[j];
1222       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1223         {
1224           tree stmt = bsi_stmt (si);
1225           stmt_ann_t ann = stmt_ann (stmt);
1226           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1227           free (stmt_info);
1228           set_stmt_info (ann, NULL);
1229         }
1230     }
1231
1232   free (LOOP_VINFO_BBS (loop_vinfo));
1233   varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1234   varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1235
1236   free (loop_vinfo);
1237 }
1238
1239
1240 /* Function debug_loop_stats.
1241
1242    For vectorization statistics dumps.  */
1243
1244 static bool
1245 vect_debug_stats (struct loop *loop)
1246 {
1247   basic_block bb;
1248   block_stmt_iterator si;
1249   tree node = NULL_TREE;
1250
1251   if (!dump_file || !(dump_flags & TDF_STATS))
1252     return false;
1253
1254   if (!loop)
1255     {
1256       fprintf (dump_file, "\n");
1257       return true;
1258     }
1259
1260   if (!loop->header)
1261     return false;
1262
1263   bb = loop->header;
1264
1265   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1266     {
1267       node = bsi_stmt (si);
1268       if (node && EXPR_P (node) && EXPR_LOCUS (node))
1269         break;
1270     }
1271
1272   if (node && EXPR_P (node) && EXPR_LOCUS (node)
1273       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1274     {
1275       fprintf (dump_file, "\nloop at %s:%d: ",
1276         EXPR_FILENAME (node), EXPR_LINENO (node));
1277       return true;
1278     }
1279
1280   return false;
1281 }
1282
1283
1284 /* Function debug_loop_details.
1285
1286    For vectorization debug dumps.  */
1287
1288 static bool
1289 vect_debug_details (struct loop *loop)
1290 {
1291    basic_block bb;
1292    block_stmt_iterator si;
1293    tree node = NULL_TREE;
1294
1295   if (!dump_file || !(dump_flags & TDF_DETAILS))
1296     return false;
1297
1298   if (!loop)
1299     {
1300       fprintf (dump_file, "\n");
1301       return true;
1302     }
1303
1304   if (!loop->header)
1305     return false;
1306
1307   bb = loop->header;
1308
1309   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1310     {
1311       node = bsi_stmt (si);
1312       if (node && EXPR_P (node) && EXPR_LOCUS (node))
1313         break;
1314     }
1315
1316   if (node && EXPR_P (node) && EXPR_LOCUS (node)
1317       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1318     {
1319       fprintf (dump_file, "\nloop at %s:%d: ",
1320                EXPR_FILENAME (node), EXPR_LINENO (node));
1321       return true;
1322     }
1323
1324   return false;
1325 }
1326
1327
1328 /* Function vect_get_ptr_offset
1329
1330    Compute the OFFSET modulo vector-type alignment of pointer REF in bits.  */
1331
1332 static tree
1333 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1334                      tree vectype ATTRIBUTE_UNUSED,
1335                      tree *offset ATTRIBUTE_UNUSED)
1336 {
1337   /* TODO: Use alignment information.  */
1338   return NULL_TREE;
1339 }
1340
1341
1342 /* Function vect_analyze_offset_expr
1343
1344    Given an offset expression EXPR received from get_inner_reference, analyze
1345    it and create an expression for INITIAL_OFFSET by substituting the variables
1346    of EXPR with initial_condition of the corresponding access_fn in the loop.
1347    E.g.,
1348       for i
1349          for (j = 3; j < N; j++)
1350             a[j].b[i][j] = 0;
1351
1352    For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
1353    subsituted, since its access_fn in the inner loop is i. 'j' will be
1354    substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
1355    C` =  3 * C_j + C.
1356
1357    Compute MISALIGN (the misalignment of the data reference initial access from
1358    its base) if possible. Misalignment can be calculated only if all the
1359    variables can be substitued with constants, or if a variable is multiplied
1360    by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
1361    be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
1362    of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
1363    VECTYPE_ALIGNMENT computation in the caller of this function).
1364
1365    STEP is an evolution of the data reference in this loop in bytes.
1366    In the above example, STEP is C_j.
1367
1368    Return FALSE, if the analysis fails, e.g., there is no access_fn for a
1369    variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
1370    are NULL_TREEs. Otherwise, return TRUE.
1371
1372 */
1373
1374 static bool
1375 vect_analyze_offset_expr (tree expr,
1376                           struct loop *loop,
1377                           tree vectype_alignment,
1378                           tree *initial_offset,
1379                           tree *misalign,
1380                           tree *step)
1381 {
1382   tree oprnd0;
1383   tree oprnd1;
1384   tree left_offset = size_zero_node;
1385   tree right_offset = size_zero_node;
1386   tree left_misalign = size_zero_node;
1387   tree right_misalign = size_zero_node;
1388   tree left_step = size_zero_node;
1389   tree right_step = size_zero_node;
1390   enum tree_code code;
1391   tree init, evolution, def_stmt;
1392
1393   /* Strip conversions that don't narrow the mode.  */
1394   while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
1395     {
1396       tree to, ti;
1397
1398       to = TREE_TYPE (expr);
1399       oprnd0 = TREE_OPERAND (expr, 0);
1400       ti = TREE_TYPE (oprnd0);
1401
1402       if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
1403         return false;
1404       if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
1405         return false;
1406
1407       expr = oprnd0;
1408     }
1409
1410   *step = NULL_TREE;
1411   *misalign = NULL_TREE;
1412   *initial_offset = NULL_TREE;
1413
1414   /* Stop conditions:
1415      1. Constant.  */
1416   if (TREE_CONSTANT (expr))
1417     {
1418       *initial_offset = fold_convert (sizetype, expr);
1419       *misalign = fold_convert (sizetype, expr);
1420       *step = size_zero_node;
1421       return true;
1422     }
1423
1424   /* 2. Variable. Try to substitute with initial_condition of the corresponding
1425      access_fn in the current loop.  */
1426   if (SSA_VAR_P (expr))
1427     {
1428       tree access_fn = analyze_scalar_evolution (loop, expr);
1429
1430       if (access_fn == chrec_dont_know)
1431         /* No access_fn.  */
1432         return false;
1433
1434       init = initial_condition_in_loop_num (access_fn, loop->num);
1435       if (init == expr)
1436         {
1437           def_stmt = SSA_NAME_DEF_STMT (init);
1438           if (def_stmt
1439               && !IS_EMPTY_STMT (def_stmt)
1440               && flow_bb_inside_loop_p (loop, bb_for_stmt (def_stmt)))
1441             /* Not enough information: may be not loop invariant.
1442                E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
1443                initial_condition is D, but it depends on i - loop's induction
1444                variable.  */
1445             return false;
1446         }
1447
1448       evolution = evolution_part_in_loop_num (access_fn, loop->num);
1449       if (evolution && TREE_CODE (evolution) != INTEGER_CST)
1450         /* Evolution is not constant.  */
1451         return false;
1452
1453       if (TREE_CONSTANT (init))
1454         *misalign = fold_convert (sizetype, init);
1455       else
1456         /* Not constant, misalignment cannot be calculated.  */
1457         *misalign = NULL_TREE;
1458
1459       *initial_offset = fold_convert (sizetype, init);
1460
1461       *step = evolution ? fold_convert (sizetype, evolution) : size_zero_node;
1462       return true;
1463     }
1464
1465   /* Recursive computation.  */
1466   oprnd0 = TREE_OPERAND (expr, 0);
1467   oprnd1 = TREE_OPERAND (expr, 1);
1468
1469   if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
1470                                 &left_misalign, &left_step)
1471       || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
1472                                     &right_offset, &right_misalign, &right_step))
1473       return false;
1474
1475   /* The type of the operation: plus, minus or mult.  */
1476   code = TREE_CODE (expr);
1477   switch (code)
1478     {
1479     case MULT_EXPR:
1480       if (!TREE_CONSTANT (right_offset))
1481         /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
1482            sized types.
1483            FORNOW: We don't support such cases.  */
1484         return false;
1485
1486       /* Misalignment computation.  */
1487       if (SSA_VAR_P (left_offset))
1488         {
1489           /* If the left side contains variable that cannot be substituted with
1490              constant, we check if the right side is a multiple of ALIGNMENT.  */
1491           if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
1492                                          vectype_alignment)))
1493             *misalign = size_zero_node;
1494           else
1495             /* If the remainder is not zero or the right side isn't constant, we
1496                can't compute  misalignment.  */
1497             *misalign = NULL_TREE;
1498         }
1499       else
1500         {
1501           /* The left operand was successfully substituted with constant.  */
1502           if (left_misalign)
1503             /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
1504                NULL_TREE.  */
1505             *misalign  = size_binop (code, left_misalign, right_misalign);
1506           else
1507             *misalign = NULL_TREE;
1508         }
1509
1510       /* Step calculation.  */
1511       /* Multiply the step by the right operand.  */
1512       *step  = size_binop (MULT_EXPR, left_step, right_offset);
1513       break;
1514
1515     case PLUS_EXPR:
1516     case MINUS_EXPR:
1517       /* Combine the recursive calculations for step and misalignment.  */
1518       *step = size_binop (code, left_step, right_step);
1519
1520       if (left_misalign && right_misalign)
1521         *misalign  = size_binop (code, left_misalign, right_misalign);
1522       else
1523         *misalign = NULL_TREE;
1524
1525       break;
1526
1527     default:
1528       gcc_unreachable ();
1529     }
1530
1531   /* Compute offset.  */
1532   *initial_offset = fold_convert (sizetype,
1533                                   fold (build2 (code, TREE_TYPE (left_offset),
1534                                                 left_offset,
1535                                                 right_offset)));
1536   return true;
1537 }
1538
1539
1540 /* Function vect_get_base_and_offset
1541
1542    Return the BASE of the data reference EXPR.
1543    If VECTYPE is given, also compute the INITIAL_OFFSET from BASE, MISALIGN and
1544    STEP.
1545    E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1546    'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1547    instantiated with initial_conditions of access_functions of variables,
1548    modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1549
1550    Function get_inner_reference is used for the above in case of ARRAY_REF and
1551    COMPONENT_REF.
1552
1553    Input:
1554    EXPR - the memory reference that is being analyzed
1555    DR - the data_reference struct of the _original_ memory reference
1556         (Note: DR_REF (DR) is not necessarily EXPR)
1557    VECTYPE - the type that defines the alignment (i.e, we compute
1558              alignment relative to TYPE_ALIGN(VECTYPE))
1559
1560    Output:
1561    BASE (returned value) - the base of the data reference EXPR.
1562                            E.g, if EXPR is a.b[k].c[i][j] the returned
1563                            base is a.
1564    INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1565    MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1566               computation is impossible
1567    STEP - evolution of the DR_REF in the loop
1568    BASE_ALIGNED_P - indicates if BASE is aligned
1569
1570    If something unexpected is encountered (an unsupported form of data-ref),
1571    then NULL_TREE is returned.  */
1572
1573 static tree
1574 vect_get_base_and_offset (struct data_reference *dr,
1575                           tree expr,
1576                           tree vectype,
1577                           loop_vec_info loop_vinfo,
1578                           tree *initial_offset,
1579                           tree *misalign,
1580                           tree *step,
1581                           bool *base_aligned_p)
1582 {
1583   tree this_offset = size_zero_node;
1584   tree this_misalign = size_zero_node;
1585   tree this_step = size_zero_node;
1586   tree base = NULL_TREE;
1587   tree next_ref;
1588   tree oprnd0, oprnd1;
1589   enum tree_code code = TREE_CODE (expr);
1590   HOST_WIDE_INT pbitsize;
1591   HOST_WIDE_INT pbitpos;
1592   tree poffset;
1593   enum machine_mode pmode;
1594   int punsignedp, pvolatilep;
1595   tree bit_pos_in_bytes;
1596   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1597
1598   *base_aligned_p = false;
1599
1600   switch (code)
1601     {
1602     /* These cases end the recursion:  */
1603     case VAR_DECL:
1604     case PARM_DECL:
1605       *initial_offset = size_zero_node;
1606       *step = size_zero_node;
1607       *misalign = size_zero_node;
1608       if (DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1609         *base_aligned_p = true;
1610       return expr;
1611
1612     case SSA_NAME:
1613       if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1614         return NULL_TREE;
1615
1616       if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1617         {
1618           base = vect_get_ptr_offset (expr, vectype, misalign);
1619           if (base)
1620             *base_aligned_p = true;
1621         }
1622       else
1623         {
1624           *base_aligned_p = true;
1625           *misalign = size_zero_node;
1626         }
1627       *initial_offset = size_zero_node;
1628       *step = size_zero_node;
1629       return expr;
1630
1631     case INTEGER_CST:
1632       *initial_offset = fold_convert (sizetype, expr);
1633       *misalign = fold_convert (sizetype, expr);
1634       *step = size_zero_node;
1635       return expr;
1636
1637     /* These cases continue the recursion:  */
1638     case ADDR_EXPR:
1639       oprnd0 = TREE_OPERAND (expr, 0);
1640       next_ref = oprnd0;
1641       break;
1642
1643     case INDIRECT_REF:
1644       oprnd0 = TREE_OPERAND (expr, 0);
1645       next_ref = oprnd0;
1646       break;
1647
1648     case PLUS_EXPR:
1649     case MINUS_EXPR:
1650       oprnd0 = TREE_OPERAND (expr, 0);
1651       oprnd1 = TREE_OPERAND (expr, 1);
1652
1653       /* In case we have a PLUS_EXPR of the form
1654          (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1655          This is verified in vect_get_memtag_and_dr.  */
1656       base = vect_get_base_and_offset (dr, oprnd1, vectype, loop_vinfo,
1657                                        &this_offset, &this_misalign,
1658                                        &this_step, base_aligned_p);
1659       /* Offset was already computed in vect_analyze_pointer_ref_access.  */
1660       this_offset = size_zero_node;
1661
1662       if (!base)
1663         this_misalign = NULL_TREE;
1664
1665       next_ref = oprnd0;
1666       break;
1667
1668     default:
1669       if (!handled_component_p (expr))
1670         /* Unsupported expression.  */
1671         return NULL_TREE;
1672
1673       /* Find the base and the offset from it.  */
1674       next_ref = get_inner_reference (expr, &pbitsize, &pbitpos, &poffset,
1675                                       &pmode, &punsignedp, &pvolatilep, false);
1676       if (!next_ref)
1677         return NULL_TREE;
1678
1679       if (poffset
1680           && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1681                                         &this_offset, &this_misalign,
1682                                         &this_step))
1683         {
1684           /* Failed to compute offset or step.  */
1685           *step = NULL_TREE;
1686           *initial_offset = NULL_TREE;
1687           *misalign = NULL_TREE;
1688           return NULL_TREE;
1689         }
1690
1691       /* Add bit position to OFFSET and MISALIGN.  */
1692
1693       bit_pos_in_bytes = size_int (pbitpos/BITS_PER_UNIT);
1694       /* Check that there is no remainder in bits.  */
1695       if (pbitpos%BITS_PER_UNIT)
1696         {
1697           if (vect_debug_details (NULL))
1698             fprintf (dump_file, "bit offset alignment.");
1699           return NULL_TREE;
1700         }
1701       this_offset = fold (size_binop (PLUS_EXPR, bit_pos_in_bytes,
1702                                       fold_convert (sizetype, this_offset)));
1703       if (this_misalign)
1704         this_misalign = size_binop (PLUS_EXPR, this_misalign, bit_pos_in_bytes);
1705
1706       /* Continue the recursion to refine the base (get_inner_reference returns
1707          &a for &a[i], and not a).  */
1708       break;
1709     }
1710
1711   base = vect_get_base_and_offset (dr, next_ref, vectype, loop_vinfo,
1712                                    initial_offset, misalign, step,
1713                                    base_aligned_p);
1714   if (base)
1715     {
1716       /* Combine the results.  */
1717       if (this_misalign && *misalign)
1718         *misalign = size_binop (PLUS_EXPR, *misalign, this_misalign);
1719       else
1720         *misalign = NULL_TREE;
1721
1722       *step = size_binop (PLUS_EXPR, *step, this_step);
1723
1724       *initial_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (*initial_offset),
1725                                       *initial_offset, this_offset));
1726
1727       if (vect_debug_details (NULL))
1728         {
1729           print_generic_expr (dump_file, expr, TDF_SLIM);
1730           fprintf (dump_file, "\n --> total offset for ref: ");
1731           print_generic_expr (dump_file, *initial_offset, TDF_SLIM);
1732           fprintf (dump_file, "\n --> total misalign for ref: ");
1733           print_generic_expr (dump_file, *misalign, TDF_SLIM);
1734           fprintf (dump_file, "\n --> total step for ref: ");
1735           print_generic_expr (dump_file, *step, TDF_SLIM);
1736         }
1737     }
1738   return base;
1739 }
1740
1741
1742 /* Function vect_force_dr_alignment_p.
1743
1744    Returns whether the alignment of a DECL can be forced to be aligned
1745    on ALIGNMENT bit boundary.  */
1746
1747 static bool
1748 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1749 {
1750   if (TREE_CODE (decl) != VAR_DECL)
1751     return false;
1752
1753   if (DECL_EXTERNAL (decl))
1754     return false;
1755
1756   if (TREE_ASM_WRITTEN (decl))
1757     return false;
1758
1759   if (TREE_STATIC (decl))
1760     return (alignment <= MAX_OFILE_ALIGNMENT);
1761   else
1762     /* This is not 100% correct.  The absolute correct stack alignment
1763        is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1764        PREFERRED_STACK_BOUNDARY is honored by all translation units.
1765        However, until someone implements forced stack alignment, SSE
1766        isn't really usable without this.  */
1767     return (alignment <= PREFERRED_STACK_BOUNDARY);
1768 }
1769
1770
1771 /* Function vect_get_new_vect_var.
1772
1773    Returns a name for a new variable. The current naming scheme appends the
1774    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1775    the name of vectorizer generated variables, and appends that to NAME if
1776    provided.  */
1777
1778 static tree
1779 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1780 {
1781   const char *prefix;
1782   int prefix_len;
1783   tree new_vect_var;
1784
1785   if (var_kind == vect_simple_var)
1786     prefix = "vect_";
1787   else
1788     prefix = "vect_p";
1789
1790   prefix_len = strlen (prefix);
1791
1792   if (name)
1793     new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1794   else
1795     new_vect_var = create_tmp_var (type, prefix);
1796
1797   return new_vect_var;
1798 }
1799
1800
1801 /* Function vect_create_index_for_vector_ref.
1802
1803    Create (and return) an index variable, along with it's update chain in the
1804    loop. This variable will be used to access a memory location in a vector
1805    operation.
1806
1807    Input:
1808    LOOP: The loop being vectorized.
1809    BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1810         function can be added here, or in the loop pre-header.
1811
1812    Output:
1813    Return an index that will be used to index a vector array.  It is expected
1814    that a pointer to the first vector will be used as the base address for the
1815    indexed reference.
1816
1817    FORNOW: we are not trying to be efficient, just creating a new index each
1818    time from scratch.  At this time all vector references could use the same
1819    index.
1820
1821    TODO: create only one index to be used by all vector references.  Record
1822    the index in the LOOP_VINFO the first time this procedure is called and
1823    return it on subsequent calls.  The increment of this index must be placed
1824    just before the conditional expression that ends the single block loop.  */
1825
1826 static tree
1827 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1828 {
1829   tree init, step;
1830   tree indx_before_incr, indx_after_incr;
1831
1832   /* It is assumed that the base pointer used for vectorized access contains
1833      the address of the first vector.  Therefore the index used for vectorized
1834      access must be initialized to zero and incremented by 1.  */
1835
1836   init = integer_zero_node;
1837   step = integer_one_node;
1838
1839   /* Assuming that bsi_insert is used with BSI_NEW_STMT  */
1840   create_iv (init, step, NULL_TREE, loop, bsi, false,
1841         &indx_before_incr, &indx_after_incr);
1842
1843   return indx_before_incr;
1844 }
1845
1846
1847 /* Function vect_create_addr_base_for_vector_ref.
1848
1849    Create an expression that computes the address of the first memory location
1850    that will be accessed for a data reference.
1851
1852    Input:
1853    STMT: The statement containing the data reference.
1854    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1855    OFFSET: Optional. If supplied, it is be added to the initial address.
1856
1857    Output:
1858    1. Return an SSA_NAME whose value is the address of the memory location of
1859       the first vector of the data reference.
1860    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1861       these statement(s) which define the returned SSA_NAME.
1862
1863    FORNOW: We are only handling array accesses with step 1.  */
1864
1865 static tree
1866 vect_create_addr_base_for_vector_ref (tree stmt,
1867                                       tree *new_stmt_list,
1868                                       tree offset)
1869 {
1870   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1871   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1872   tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1873   tree base_name = unshare_expr (DR_BASE_NAME (dr));
1874   tree ref = DR_REF (dr);
1875   tree scalar_type = TREE_TYPE (ref);
1876   tree scalar_ptr_type = build_pointer_type (scalar_type);
1877   tree vec_stmt;
1878   tree new_temp;
1879   tree addr_base, addr_expr;
1880   tree dest, new_stmt;
1881   tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
1882
1883   if (TREE_CODE (TREE_TYPE (data_ref_base)) != POINTER_TYPE)
1884     /* After the analysis stage, we expect to get here only with RECORD_TYPE
1885        and ARRAY_TYPE. */
1886     /* Add '&' to ref_base.  */
1887     data_ref_base = build_fold_addr_expr (data_ref_base);
1888   else
1889     {
1890       /* Create '(scalar_type*) base' for pointers.  */
1891       tree dest, new_stmt, new_temp, vec_stmt, tmp_base;
1892       tree scalar_array_type = build_array_type (scalar_type, 0);
1893       tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1894       tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1895       add_referenced_tmp_var (array_ptr);
1896
1897       dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1898       add_referenced_tmp_var (dest);
1899       tmp_base = force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1900       append_to_statement_list_force (new_stmt,  new_stmt_list);
1901
1902       vec_stmt = fold_convert (scalar_array_ptr_type, tmp_base);
1903       vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1904       new_temp = make_ssa_name (array_ptr, vec_stmt);
1905       TREE_OPERAND (vec_stmt, 0) = new_temp;
1906       append_to_statement_list_force (vec_stmt,  new_stmt_list);
1907       data_ref_base = new_temp;
1908     }
1909
1910   /* Create base_offset */
1911   dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
1912   add_referenced_tmp_var (dest);
1913   base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
1914   append_to_statement_list_force (new_stmt, new_stmt_list);
1915
1916   if (offset)
1917     {
1918       tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
1919       add_referenced_tmp_var (tmp);
1920       offset = fold (build2 (MULT_EXPR, TREE_TYPE (offset), offset,
1921                              STMT_VINFO_VECT_STEP (stmt_info)));
1922       base_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (base_offset), base_offset,
1923                                   offset));
1924       base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
1925       append_to_statement_list_force (new_stmt, new_stmt_list);
1926     }
1927
1928   /* base + base_offset */
1929   addr_base = fold (build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
1930                             base_offset));
1931
1932   /* addr_expr = addr_base */
1933   addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1934                                      get_name (base_name));
1935   add_referenced_tmp_var (addr_expr);
1936   vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1937   new_temp = make_ssa_name (addr_expr, vec_stmt);
1938   TREE_OPERAND (vec_stmt, 0) = new_temp;
1939   append_to_statement_list_force (vec_stmt, new_stmt_list);
1940
1941   if (vect_debug_details (NULL))
1942     {
1943       fprintf (dump_file, "created ");
1944       print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
1945       fprintf (dump_file, "\n");
1946     }
1947   return new_temp;
1948 }
1949
1950
1951 /* Function get_vectype_for_scalar_type.
1952
1953    Returns the vector type corresponding to SCALAR_TYPE as supported
1954    by the target.  */
1955
1956 static tree
1957 get_vectype_for_scalar_type (tree scalar_type)
1958 {
1959   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1960   int nbytes = GET_MODE_SIZE (inner_mode);
1961   int nunits;
1962   tree vectype;
1963
1964   if (nbytes == 0)
1965     return NULL_TREE;
1966
1967   /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1968      is expected.  */
1969   nunits = UNITS_PER_SIMD_WORD / nbytes;
1970
1971   vectype = build_vector_type (scalar_type, nunits);
1972   if (vect_debug_details (NULL))
1973     {
1974       fprintf (dump_file, "get vectype with %d units of type ", nunits);
1975       print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1976     }
1977
1978   if (!vectype)
1979     return NULL_TREE;
1980
1981   if (vect_debug_details (NULL))
1982     {
1983       fprintf (dump_file, "vectype: ");
1984       print_generic_expr (dump_file, vectype, TDF_SLIM);
1985     }
1986
1987   if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1988     {
1989       /* TODO: tree-complex.c sometimes can parallelize operations
1990          on generic vectors.  We can vectorize the loop in that case,
1991          but then we should re-run the lowering pass.  */
1992       if (vect_debug_details (NULL))
1993         fprintf (dump_file, "mode not supported by target.");
1994       return NULL_TREE;
1995     }
1996
1997   return vectype;
1998 }
1999
2000
2001 /* Function vect_align_data_ref.
2002
2003    Handle mislignment of a memory accesses.
2004
2005    FORNOW: Can't handle misaligned accesses.
2006    Make sure that the dataref is aligned.  */
2007
2008 static void
2009 vect_align_data_ref (tree stmt)
2010 {
2011   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2012   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2013
2014   /* FORNOW: can't handle misaligned accesses;
2015              all accesses expected to be aligned.  */
2016   gcc_assert (aligned_access_p (dr));
2017 }
2018
2019
2020 /* Function vect_create_data_ref_ptr.
2021
2022    Create a memory reference expression for vector access, to be used in a
2023    vector load/store stmt. The reference is based on a new pointer to vector
2024    type (vp).
2025
2026    Input:
2027    1. STMT: a stmt that references memory. Expected to be of the form
2028          MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
2029    2. BSI: block_stmt_iterator where new stmts can be added.
2030    3. OFFSET (optional): an offset to be added to the initial address accessed
2031         by the data-ref in STMT.
2032    4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2033         pointing to the initial address.
2034
2035    Output:
2036    1. Declare a new ptr to vector_type, and have it point to the base of the
2037       data reference (initial addressed accessed by the data reference).
2038       For example, for vector of type V8HI, the following code is generated:
2039
2040       v8hi *vp;
2041       vp = (v8hi *)initial_address;
2042
2043       if OFFSET is not supplied:
2044          initial_address = &a[init];
2045       if OFFSET is supplied:
2046          initial_address = &a[init + OFFSET];
2047
2048       Return the initial_address in INITIAL_ADDRESS.
2049
2050    2. Create a data-reference in the loop based on the new vector pointer vp,
2051       and using a new index variable 'idx' as follows:
2052
2053       vp' = vp + update
2054
2055       where if ONLY_INIT is true:
2056          update = zero
2057       and otherwise
2058          update = idx + vector_type_size
2059
2060       Return the pointer vp'.
2061
2062
2063    FORNOW: handle only aligned and consecutive accesses.  */
2064
2065 static tree
2066 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
2067                           tree *initial_address, bool only_init)
2068 {
2069   tree base_name;
2070   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2071   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2072   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2073   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2074   tree vect_ptr_type;
2075   tree vect_ptr;
2076   tree tag;
2077   v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2078   v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2079   vuse_optype vuses = STMT_VUSE_OPS (stmt);
2080   int nvuses, nv_may_defs, nv_must_defs;
2081   int i;
2082   tree new_temp;
2083   tree vec_stmt;
2084   tree new_stmt_list = NULL_TREE;
2085   tree idx;
2086   edge pe = loop_preheader_edge (loop);
2087   basic_block new_bb;
2088   tree vect_ptr_init;
2089   tree vectype_size;
2090   tree ptr_update;
2091   tree data_ref_ptr;
2092   tree type, tmp, size;
2093
2094   base_name = unshare_expr (DR_BASE_NAME (dr));
2095   if (vect_debug_details (NULL))
2096     {
2097       tree data_ref_base = base_name;
2098       fprintf (dump_file, "create array_ref of type: ");
2099       print_generic_expr (dump_file, vectype, TDF_SLIM);
2100       if (TREE_CODE (data_ref_base) == VAR_DECL)
2101         fprintf (dump_file, "\nvectorizing a one dimensional array ref: ");
2102       else if (TREE_CODE (data_ref_base) == ARRAY_REF)
2103         fprintf (dump_file, "\nvectorizing a multidimensional array ref: ");
2104       else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2105         fprintf (dump_file, "\nvectorizing a record based array ref: ");
2106       else if (TREE_CODE (data_ref_base) == SSA_NAME)
2107         fprintf (dump_file, "\nvectorizing a pointer ref: ");
2108       print_generic_expr (dump_file, base_name, TDF_SLIM);
2109     }
2110
2111   /** (1) Create the new vector-pointer variable:  **/
2112
2113   vect_ptr_type = build_pointer_type (vectype);
2114   vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2115                                     get_name (base_name));
2116   add_referenced_tmp_var (vect_ptr);
2117
2118
2119   /** (2) Handle aliasing information of the new vector-pointer:  **/
2120
2121   tag = STMT_VINFO_MEMTAG (stmt_info);
2122   gcc_assert (tag);
2123   get_var_ann (vect_ptr)->type_mem_tag = tag;
2124
2125   /* Mark for renaming all aliased variables
2126      (i.e, the may-aliases of the type-mem-tag).  */
2127   nvuses = NUM_VUSES (vuses);
2128   nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
2129   nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
2130   for (i = 0; i < nvuses; i++)
2131     {
2132       tree use = VUSE_OP (vuses, i);
2133       if (TREE_CODE (use) == SSA_NAME)
2134         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
2135     }
2136   for (i = 0; i < nv_may_defs; i++)
2137     {
2138       tree def = V_MAY_DEF_RESULT (v_may_defs, i);
2139       if (TREE_CODE (def) == SSA_NAME)
2140         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2141     }
2142   for (i = 0; i < nv_must_defs; i++)
2143     {
2144       tree def = V_MUST_DEF_RESULT (v_must_defs, i);
2145       if (TREE_CODE (def) == SSA_NAME)
2146         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2147     }
2148
2149
2150   /** (3) Calculate the initial address the vector-pointer, and set
2151           the vector-pointer to point to it before the loop:  **/
2152
2153   /* Create: (&(base[init_val+offset]) in the loop preheader.  */
2154   new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2155                                                    offset);
2156   pe = loop_preheader_edge (loop);
2157   new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
2158   gcc_assert (!new_bb);
2159   *initial_address = new_temp;
2160
2161   /* Create: p = (vectype *) initial_base  */
2162   vec_stmt = fold_convert (vect_ptr_type, new_temp);
2163   vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2164   new_temp = make_ssa_name (vect_ptr, vec_stmt);
2165   TREE_OPERAND (vec_stmt, 0) = new_temp;
2166   new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
2167   gcc_assert (!new_bb);
2168   vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
2169
2170
2171   /** (4) Handle the updating of the vector-pointer inside the loop: **/
2172
2173   if (only_init) /* No update in loop is required.  */
2174     return vect_ptr_init;
2175
2176   idx = vect_create_index_for_vector_ref (loop, bsi);
2177
2178   /* Create: update = idx * vectype_size  */
2179   tmp = create_tmp_var (integer_type_node, "update");
2180   add_referenced_tmp_var (tmp);
2181   size = TYPE_SIZE (vect_ptr_type);
2182   type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2183   ptr_update = create_tmp_var (type, "update");
2184   add_referenced_tmp_var (ptr_update);
2185   vectype_size = TYPE_SIZE_UNIT (vectype);
2186   vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
2187   vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
2188   new_temp = make_ssa_name (tmp, vec_stmt);
2189   TREE_OPERAND (vec_stmt, 0) = new_temp;
2190   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2191   vec_stmt = fold_convert (type, new_temp);
2192   vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
2193   new_temp = make_ssa_name (ptr_update, vec_stmt);
2194   TREE_OPERAND (vec_stmt, 0) = new_temp;
2195   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2196
2197   /* Create: data_ref_ptr = vect_ptr_init + update  */
2198   vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
2199   vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2200   new_temp = make_ssa_name (vect_ptr, vec_stmt);
2201   TREE_OPERAND (vec_stmt, 0) = new_temp;
2202   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2203   data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
2204
2205   return data_ref_ptr;
2206 }
2207
2208
2209 /* Function vect_create_destination_var.
2210
2211    Create a new temporary of type VECTYPE.  */
2212
2213 static tree
2214 vect_create_destination_var (tree scalar_dest, tree vectype)
2215 {
2216   tree vec_dest;
2217   const char *new_name;
2218
2219   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2220
2221   new_name = get_name (scalar_dest);
2222   if (!new_name)
2223     new_name = "var_";
2224   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
2225   add_referenced_tmp_var (vec_dest);
2226
2227   return vec_dest;
2228 }
2229
2230
2231 /* Function vect_init_vector.
2232
2233    Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2234    the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2235    used in the vectorization of STMT.  */
2236
2237 static tree
2238 vect_init_vector (tree stmt, tree vector_var)
2239 {
2240   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2241   struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2242   tree new_var;
2243   tree init_stmt;
2244   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2245   tree vec_oprnd;
2246   edge pe;
2247   tree new_temp;
2248   basic_block new_bb;
2249
2250   new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2251   add_referenced_tmp_var (new_var);
2252
2253   init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2254   new_temp = make_ssa_name (new_var, init_stmt);
2255   TREE_OPERAND (init_stmt, 0) = new_temp;
2256
2257   pe = loop_preheader_edge (loop);
2258   new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2259   gcc_assert (!new_bb);
2260
2261   if (vect_debug_details (NULL))
2262     {
2263       fprintf (dump_file, "created new init_stmt: ");
2264       print_generic_expr (dump_file, init_stmt, TDF_SLIM);
2265     }
2266
2267   vec_oprnd = TREE_OPERAND (init_stmt, 0);
2268   return vec_oprnd;
2269 }
2270
2271
2272 /* Function vect_get_vec_def_for_operand.
2273
2274    OP is an operand in STMT. This function returns a (vector) def that will be
2275    used in the vectorized stmt for STMT.
2276
2277    In the case that OP is an SSA_NAME which is defined in the loop, then
2278    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2279
2280    In case OP is an invariant or constant, a new stmt that creates a vector def
2281    needs to be introduced.  */
2282
2283 static tree
2284 vect_get_vec_def_for_operand (tree op, tree stmt)
2285 {
2286   tree vec_oprnd;
2287   tree vec_stmt;
2288   tree def_stmt;
2289   stmt_vec_info def_stmt_info = NULL;
2290   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2291   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2292   int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2293   struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2294   basic_block bb;
2295   tree vec_inv;
2296   tree t = NULL_TREE;
2297   tree def;
2298   int i;
2299
2300   if (vect_debug_details (NULL))
2301     {
2302       fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2303       print_generic_expr (dump_file, op, TDF_SLIM);
2304     }
2305
2306   /** ===> Case 1: operand is a constant.  **/
2307
2308   if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2309     {
2310       /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
2311
2312       tree vec_cst;
2313
2314       /* Build a tree with vector elements.  */
2315       if (vect_debug_details (NULL))
2316         fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2317
2318       for (i = nunits - 1; i >= 0; --i)
2319         {
2320           t = tree_cons (NULL_TREE, op, t);
2321         }
2322       vec_cst = build_vector (vectype, t);
2323       return vect_init_vector (stmt, vec_cst);
2324     }
2325
2326   gcc_assert (TREE_CODE (op) == SSA_NAME);
2327
2328   /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it.  **/
2329
2330   def_stmt = SSA_NAME_DEF_STMT (op);
2331   def_stmt_info = vinfo_for_stmt (def_stmt);
2332
2333   if (vect_debug_details (NULL))
2334     {
2335       fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2336       print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2337     }
2338
2339
2340   /** ==> Case 2.1: operand is defined inside the loop.  **/
2341
2342   if (def_stmt_info)
2343     {
2344       /* Get the def from the vectorized stmt.  */
2345
2346       vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2347       gcc_assert (vec_stmt);
2348       vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2349       return vec_oprnd;
2350     }
2351
2352
2353   /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2354                     it is a reduction/induction.  **/
2355
2356   bb = bb_for_stmt (def_stmt);
2357   if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2358     {
2359       if (vect_debug_details (NULL))
2360         fprintf (dump_file, "reduction/induction - unsupported.");
2361       internal_error ("no support for reduction/induction"); /* FORNOW */
2362     }
2363
2364
2365   /** ==> Case 2.3: operand is defined outside the loop -
2366                     it is a loop invariant.  */
2367
2368   switch (TREE_CODE (def_stmt))
2369     {
2370     case PHI_NODE:
2371       def = PHI_RESULT (def_stmt);
2372       break;
2373     case MODIFY_EXPR:
2374       def = TREE_OPERAND (def_stmt, 0);
2375       break;
2376     case NOP_EXPR:
2377       def = TREE_OPERAND (def_stmt, 0);
2378       gcc_assert (IS_EMPTY_STMT (def_stmt));
2379       def = op;
2380       break;
2381     default:
2382       if (vect_debug_details (NULL))
2383         {
2384           fprintf (dump_file, "unsupported defining stmt: ");
2385           print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2386         }
2387       internal_error ("unsupported defining stmt");
2388     }
2389
2390   /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}'  */
2391
2392   if (vect_debug_details (NULL))
2393     fprintf (dump_file, "Create vector_inv.");
2394
2395   for (i = nunits - 1; i >= 0; --i)
2396     {
2397       t = tree_cons (NULL_TREE, def, t);
2398     }
2399
2400   vec_inv = build_constructor (vectype, t);
2401   return vect_init_vector (stmt, vec_inv);
2402 }
2403
2404
2405 /* Function vect_finish_stmt_generation.
2406
2407    Insert a new stmt.  */
2408
2409 static void
2410 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2411 {
2412   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2413
2414   if (vect_debug_details (NULL))
2415     {
2416       fprintf (dump_file, "add new stmt: ");
2417       print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2418     }
2419
2420   /* Make sure bsi points to the stmt that is being vectorized.  */
2421
2422   /* Assumption: any stmts created for the vectorization of stmt S were
2423      inserted before S. BSI is expected to point to S or some new stmt before S.
2424    */
2425
2426   while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2427     bsi_next (bsi);
2428   gcc_assert (stmt == bsi_stmt (*bsi));
2429 }
2430
2431
2432 /* Function vectorizable_assignment.
2433
2434    Check if STMT performs an assignment (copy) that can be vectorized.
2435    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2436    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2437    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2438
2439 static bool
2440 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2441 {
2442   tree vec_dest;
2443   tree scalar_dest;
2444   tree op;
2445   tree vec_oprnd;
2446   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2447   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2448   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2449   tree new_temp;
2450
2451   /* Is vectorizable assignment?  */
2452
2453   if (TREE_CODE (stmt) != MODIFY_EXPR)
2454     return false;
2455
2456   scalar_dest = TREE_OPERAND (stmt, 0);
2457   if (TREE_CODE (scalar_dest) != SSA_NAME)
2458     return false;
2459
2460   op = TREE_OPERAND (stmt, 1);
2461   if (!vect_is_simple_use (op, loop, NULL))
2462     {
2463       if (vect_debug_details (NULL))
2464         fprintf (dump_file, "use not simple.");
2465       return false;
2466     }
2467
2468   if (!vec_stmt) /* transformation not required.  */
2469     {
2470       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2471       return true;
2472     }
2473
2474   /** Trasform.  **/
2475   if (vect_debug_details (NULL))
2476     fprintf (dump_file, "transform assignment.");
2477
2478   /* Handle def.  */
2479   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2480
2481   /* Handle use.  */
2482   op = TREE_OPERAND (stmt, 1);
2483   vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2484
2485   /* Arguments are ready. create the new vector stmt.  */
2486   *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2487   new_temp = make_ssa_name (vec_dest, *vec_stmt);
2488   TREE_OPERAND (*vec_stmt, 0) = new_temp;
2489   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2490
2491   return true;
2492 }
2493
2494
2495 /* Function vectorizable_operation.
2496
2497    Check if STMT performs a binary or unary operation that can be vectorized.
2498    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2499    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2500    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2501
2502 static bool
2503 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2504 {
2505   tree vec_dest;
2506   tree scalar_dest;
2507   tree operation;
2508   tree op0, op1 = NULL;
2509   tree vec_oprnd0, vec_oprnd1=NULL;
2510   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2511   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2512   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2513   int i;
2514   enum tree_code code;
2515   enum machine_mode vec_mode;
2516   tree new_temp;
2517   int op_type;
2518   tree op;
2519   optab optab;
2520
2521   /* Is STMT a vectorizable binary/unary operation?   */
2522   if (TREE_CODE (stmt) != MODIFY_EXPR)
2523     return false;
2524
2525   if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2526     return false;
2527
2528   operation = TREE_OPERAND (stmt, 1);
2529   code = TREE_CODE (operation);
2530   optab = optab_for_tree_code (code, vectype);
2531
2532   /* Support only unary or binary operations.  */
2533   op_type = TREE_CODE_LENGTH (code);
2534   if (op_type != unary_op && op_type != binary_op)
2535     {
2536       if (vect_debug_details (NULL))
2537         fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2538       return false;
2539     }
2540
2541   for (i = 0; i < op_type; i++)
2542     {
2543       op = TREE_OPERAND (operation, i);
2544       if (!vect_is_simple_use (op, loop, NULL))
2545         {
2546           if (vect_debug_details (NULL))
2547             fprintf (dump_file, "use not simple.");
2548           return false;
2549         }
2550     }
2551
2552   /* Supportable by target?  */
2553   if (!optab)
2554     {
2555       if (vect_debug_details (NULL))
2556         fprintf (dump_file, "no optab.");
2557       return false;
2558     }
2559   vec_mode = TYPE_MODE (vectype);
2560   if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2561     {
2562       if (vect_debug_details (NULL))
2563         fprintf (dump_file, "op not supported by target.");
2564       return false;
2565     }
2566
2567   if (!vec_stmt) /* transformation not required.  */
2568     {
2569       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2570       return true;
2571     }
2572
2573   /** Transform.  **/
2574
2575   if (vect_debug_details (NULL))
2576     fprintf (dump_file, "transform binary/unary operation.");
2577
2578   /* Handle def.  */
2579   scalar_dest = TREE_OPERAND (stmt, 0);
2580   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2581
2582   /* Handle uses.  */
2583   op0 = TREE_OPERAND (operation, 0);
2584   vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2585
2586   if (op_type == binary_op)
2587     {
2588       op1 = TREE_OPERAND (operation, 1);
2589       vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2590     }
2591
2592   /* Arguments are ready. create the new vector stmt.  */
2593
2594   if (op_type == binary_op)
2595     *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2596                 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2597   else
2598     *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2599                 build1 (code, vectype, vec_oprnd0));
2600   new_temp = make_ssa_name (vec_dest, *vec_stmt);
2601   TREE_OPERAND (*vec_stmt, 0) = new_temp;
2602   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2603
2604   return true;
2605 }
2606
2607
2608 /* Function vectorizable_store.
2609
2610    Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2611    can be vectorized.
2612    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2613    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2614    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2615
2616 static bool
2617 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2618 {
2619   tree scalar_dest;
2620   tree data_ref;
2621   tree op;
2622   tree vec_oprnd1;
2623   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2624   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2625   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2626   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2627   enum machine_mode vec_mode;
2628   tree dummy;
2629   enum dr_alignment_support alignment_support_cheme;
2630
2631   /* Is vectorizable store? */
2632
2633   if (TREE_CODE (stmt) != MODIFY_EXPR)
2634     return false;
2635
2636   scalar_dest = TREE_OPERAND (stmt, 0);
2637   if (TREE_CODE (scalar_dest) != ARRAY_REF
2638       && TREE_CODE (scalar_dest) != INDIRECT_REF)
2639     return false;
2640
2641   op = TREE_OPERAND (stmt, 1);
2642   if (!vect_is_simple_use (op, loop, NULL))
2643     {
2644       if (vect_debug_details (NULL))
2645         fprintf (dump_file, "use not simple.");
2646       return false;
2647     }
2648
2649   vec_mode = TYPE_MODE (vectype);
2650   /* FORNOW. In some cases can vectorize even if data-type not supported
2651      (e.g. - array initialization with 0).  */
2652   if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2653     return false;
2654
2655   if (!STMT_VINFO_DATA_REF (stmt_info))
2656     return false;
2657
2658
2659   if (!vec_stmt) /* transformation not required.  */
2660     {
2661       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2662       return true;
2663     }
2664
2665   /** Trasform.  **/
2666
2667   if (vect_debug_details (NULL))
2668     fprintf (dump_file, "transform store");
2669
2670   alignment_support_cheme = vect_supportable_dr_alignment (dr);
2671   gcc_assert (alignment_support_cheme);
2672   gcc_assert (alignment_support_cheme = dr_aligned);  /* FORNOW */
2673
2674   /* Handle use - get the vectorized def from the defining stmt.  */
2675   vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2676
2677   /* Handle def.  */
2678   /* FORNOW: make sure the data reference is aligned.  */
2679   vect_align_data_ref (stmt);
2680   data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2681   data_ref = build_fold_indirect_ref (data_ref);
2682
2683   /* Arguments are ready. create the new vector stmt.  */
2684   *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2685   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2686
2687   return true;
2688 }
2689
2690
2691 /* vectorizable_load.
2692
2693    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2694    can be vectorized.
2695    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2696    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2697    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2698
2699 static bool
2700 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2701 {
2702   tree scalar_dest;
2703   tree vec_dest = NULL;
2704   tree data_ref = NULL;
2705   tree op;
2706   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2707   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2708   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2709   tree new_temp;
2710   int mode;
2711   tree init_addr;
2712   tree new_stmt;
2713   tree dummy;
2714   basic_block new_bb;
2715   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2716   edge pe = loop_preheader_edge (loop);
2717   enum dr_alignment_support alignment_support_cheme;
2718
2719   /* Is vectorizable load? */
2720
2721   if (TREE_CODE (stmt) != MODIFY_EXPR)
2722     return false;
2723
2724   scalar_dest = TREE_OPERAND (stmt, 0);
2725   if (TREE_CODE (scalar_dest) != SSA_NAME)
2726     return false;
2727
2728   op = TREE_OPERAND (stmt, 1);
2729   if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2730     return false;
2731
2732   if (!STMT_VINFO_DATA_REF (stmt_info))
2733     return false;
2734
2735   mode = (int) TYPE_MODE (vectype);
2736
2737   /* FORNOW. In some cases can vectorize even if data-type not supported
2738     (e.g. - data copies).  */
2739   if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2740     {
2741       if (vect_debug_details (loop))
2742         fprintf (dump_file, "Aligned load, but unsupported type.");
2743       return false;
2744     }
2745
2746   if (!vec_stmt) /* transformation not required.  */
2747     {
2748       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2749       return true;
2750     }
2751
2752   /** Trasform.  **/
2753
2754   if (vect_debug_details (NULL))
2755     fprintf (dump_file, "transform load.");
2756
2757   alignment_support_cheme = vect_supportable_dr_alignment (dr);
2758   gcc_assert (alignment_support_cheme);
2759
2760   if (alignment_support_cheme == dr_aligned
2761       || alignment_support_cheme == dr_unaligned_supported)
2762     {
2763       /* Create:
2764          p = initial_addr;
2765          indx = 0;
2766          loop {
2767            vec_dest = *(p);
2768            indx = indx + 1;
2769          }
2770       */
2771
2772       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2773       data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2774       if (aligned_access_p (dr))
2775         data_ref = build_fold_indirect_ref (data_ref);
2776       else
2777         {
2778           int mis = DR_MISALIGNMENT (dr);
2779           tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
2780           tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
2781           data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2782         }
2783       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2784       new_temp = make_ssa_name (vec_dest, new_stmt);
2785       TREE_OPERAND (new_stmt, 0) = new_temp;
2786       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2787     }
2788   else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2789     {
2790       /* Create:
2791          p1 = initial_addr;
2792          msq_init = *(floor(p1))
2793          p2 = initial_addr + VS - 1;
2794          magic = have_builtin ? builtin_result : initial_address;
2795          indx = 0;
2796          loop {
2797            p2' = p2 + indx * vectype_size
2798            lsq = *(floor(p2'))
2799            vec_dest = realign_load (msq, lsq, magic)
2800            indx = indx + 1;
2801            msq = lsq;
2802          }
2803       */
2804
2805       tree offset;
2806       tree magic;
2807       tree phi_stmt;
2808       tree msq_init;
2809       tree msq, lsq;
2810       tree dataref_ptr;
2811       tree params;
2812
2813       /* <1> Create msq_init = *(floor(p1)) in the loop preheader  */
2814       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2815       data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2816                                            &init_addr, true);
2817       data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2818       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2819       new_temp = make_ssa_name (vec_dest, new_stmt);
2820       TREE_OPERAND (new_stmt, 0) = new_temp;
2821       new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2822       gcc_assert (!new_bb);
2823       msq_init = TREE_OPERAND (new_stmt, 0);
2824
2825
2826       /* <2> Create lsq = *(floor(p2')) in the loop  */
2827       offset = build_int_cst (integer_type_node,
2828                               GET_MODE_NUNITS (TYPE_MODE (vectype)));
2829       offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2830       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2831       dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2832       data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2833       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2834       new_temp = make_ssa_name (vec_dest, new_stmt);
2835       TREE_OPERAND (new_stmt, 0) = new_temp;
2836       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2837       lsq = TREE_OPERAND (new_stmt, 0);
2838
2839
2840       /* <3> */
2841       if (targetm.vectorize.builtin_mask_for_load)
2842         {
2843           /* Create permutation mask, if required, in loop preheader.  */
2844           tree builtin_decl;
2845           params = build_tree_list (NULL_TREE, init_addr);
2846           vec_dest = vect_create_destination_var (scalar_dest, vectype);
2847           builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2848           new_stmt = build_function_call_expr (builtin_decl, params);
2849           new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2850           new_temp = make_ssa_name (vec_dest, new_stmt);
2851           TREE_OPERAND (new_stmt, 0) = new_temp;
2852           new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2853           gcc_assert (!new_bb);
2854           magic = TREE_OPERAND (new_stmt, 0);
2855
2856           /* Since we have just created a CALL_EXPR, we may need to
2857              rename call-clobbered variables.  */
2858           mark_call_clobbered_vars_to_rename ();
2859         }
2860       else
2861         {
2862           /* Use current address instead of init_addr for reduced reg pressure.
2863            */
2864           magic = dataref_ptr;
2865         }
2866
2867
2868       /* <4> Create msq = phi <msq_init, lsq> in loop  */
2869       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2870       msq = make_ssa_name (vec_dest, NULL_TREE);
2871       phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2872       SSA_NAME_DEF_STMT (msq) = phi_stmt;
2873       add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
2874       add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
2875
2876
2877       /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop  */
2878       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2879       new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2880       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2881       new_temp = make_ssa_name (vec_dest, new_stmt);
2882       TREE_OPERAND (new_stmt, 0) = new_temp;
2883       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2884     }
2885   else
2886     gcc_unreachable ();
2887
2888   *vec_stmt = new_stmt;
2889   return true;
2890 }
2891
2892
2893 /* Function vect_supportable_dr_alignment
2894
2895    Return whether the data reference DR is supported with respect to its
2896    alignment.  */
2897
2898 static enum dr_alignment_support
2899 vect_supportable_dr_alignment (struct data_reference *dr)
2900 {
2901   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2902   enum machine_mode mode = (int) TYPE_MODE (vectype);
2903
2904   if (aligned_access_p (dr))
2905     return dr_aligned;
2906
2907   /* Possibly unaligned access.  */
2908
2909   if (DR_IS_READ (dr))
2910     {
2911       if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2912           && (!targetm.vectorize.builtin_mask_for_load
2913               || targetm.vectorize.builtin_mask_for_load ()))
2914         return dr_unaligned_software_pipeline;
2915
2916       if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
2917         /* Can't software pipeline the loads, but can at least do them.  */
2918         return dr_unaligned_supported;
2919     }
2920
2921   /* Unsupported.  */
2922   return dr_unaligned_unsupported;
2923 }
2924
2925
2926 /* Function vect_transform_stmt.
2927
2928    Create a vectorized stmt to replace STMT, and insert it at BSI.  */
2929
2930 static bool
2931 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2932 {
2933   bool is_store = false;
2934   tree vec_stmt = NULL_TREE;
2935   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2936   bool done;
2937
2938   switch (STMT_VINFO_TYPE (stmt_info))
2939     {
2940     case op_vec_info_type:
2941       done = vectorizable_operation (stmt, bsi, &vec_stmt);
2942       gcc_assert (done);
2943       break;
2944
2945     case assignment_vec_info_type:
2946       done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2947       gcc_assert (done);
2948       break;
2949
2950     case load_vec_info_type:
2951       done = vectorizable_load (stmt, bsi, &vec_stmt);
2952       gcc_assert (done);
2953       break;
2954
2955     case store_vec_info_type:
2956       done = vectorizable_store (stmt, bsi, &vec_stmt);
2957       gcc_assert (done);
2958       is_store = true;
2959       break;
2960     default:
2961       if (vect_debug_details (NULL))
2962         fprintf (dump_file, "stmt not supported.");
2963       gcc_unreachable ();
2964     }
2965
2966   STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2967
2968   return is_store;
2969 }
2970
2971
2972 /* This function builds ni_name = number of iterations loop executes
2973    on the loop preheader.  */
2974
2975 static tree
2976 vect_build_loop_niters (loop_vec_info loop_vinfo)
2977 {
2978   tree ni_name, stmt, var;
2979   edge pe;
2980   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2981   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2982
2983   var = create_tmp_var (TREE_TYPE (ni), "niters");
2984   add_referenced_tmp_var (var);
2985   ni_name = force_gimple_operand (ni, &stmt, false, var);
2986
2987   pe = loop_preheader_edge (loop);
2988   if (stmt)
2989     {
2990       basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2991       gcc_assert (!new_bb);
2992     }
2993
2994   return ni_name;
2995 }
2996
2997
2998 /* This function generates the following statements:
2999
3000  ni_name = number of iterations loop executes
3001  ratio = ni_name / vf
3002  ratio_mult_vf_name = ratio * vf
3003
3004  and places them at the loop preheader edge.  */
3005
3006 static void
3007 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
3008                                  tree *ni_name_ptr,
3009                                  tree *ratio_mult_vf_name_ptr,
3010                                  tree *ratio_name_ptr)
3011 {
3012
3013   edge pe;
3014   basic_block new_bb;
3015   tree stmt, ni_name;
3016   tree var;
3017   tree ratio_name;
3018   tree ratio_mult_vf_name;
3019   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3020   tree ni = LOOP_VINFO_NITERS (loop_vinfo);
3021   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3022   tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
3023
3024   pe = loop_preheader_edge (loop);
3025
3026   /* Generate temporary variable that contains
3027      number of iterations loop executes.  */
3028
3029   ni_name = vect_build_loop_niters (loop_vinfo);
3030
3031   /* Create: ratio = ni >> log2(vf) */
3032
3033   var = create_tmp_var (TREE_TYPE (ni), "bnd");
3034   add_referenced_tmp_var (var);
3035   ratio_name = make_ssa_name (var, NULL_TREE);
3036   stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
3037            build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
3038   SSA_NAME_DEF_STMT (ratio_name) = stmt;
3039
3040   pe = loop_preheader_edge (loop);
3041   new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3042   gcc_assert (!new_bb);
3043
3044   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
3045
3046   var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
3047   add_referenced_tmp_var (var);
3048   ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
3049   stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
3050            build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
3051   SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
3052
3053   pe = loop_preheader_edge (loop);
3054   new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3055   gcc_assert (!new_bb);
3056
3057   *ni_name_ptr = ni_name;
3058   *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
3059   *ratio_name_ptr = ratio_name;
3060
3061   return;
3062 }
3063
3064
3065 /*   Function vect_update_ivs_after_vectorizer.
3066
3067      "Advance" the induction variables of LOOP to the value they should take
3068      after the execution of LOOP.  This is currently necessary because the
3069      vectorizer does not handle induction variables that are used after the
3070      loop.  Such a situation occurs when the last iterations of LOOP are
3071      peeled, because:
3072      1. We introduced new uses after LOOP for IVs that were not originally used
3073         after LOOP: the IVs of LOOP are now used by an epilog loop.
3074      2. LOOP is going to be vectorized; this means that it will iterate N/VF
3075         times, whereas the loop IVs should be bumped N times.
3076
3077      Input:
3078      - LOOP - a loop that is going to be vectorized. The last few iterations
3079               of LOOP were peeled.
3080      - NITERS - the number of iterations that LOOP executes (before it is
3081                 vectorized). i.e, the number of times the ivs should be bumped.
3082      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
3083                   coming out from LOOP on which there are uses of the LOOP ivs
3084                   (this is the path from LOOP->exit to epilog_loop->preheader).
3085
3086                   The new definitions of the ivs are placed in LOOP->exit.
3087                   The phi args associated with the edge UPDATE_E in the bb
3088                   UPDATE_E->dest are updated accordingly.
3089
3090      Assumption 1: Like the rest of the vectorizer, this function assumes
3091      a single loop exit that has a single predecessor.
3092
3093      Assumption 2: The phi nodes in the LOOP header and in update_bb are
3094      organized in the same order.
3095
3096      Assumption 3: The access function of the ivs is simple enough (see
3097      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
3098
3099      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
3100      coming out of LOOP on which the ivs of LOOP are used (this is the path
3101      that leads to the epilog loop; other paths skip the epilog loop).  This
3102      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
3103      needs to have its phis updated.
3104  */
3105
3106 static void
3107 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e)
3108 {
3109   basic_block exit_bb = loop->exit_edges[0]->dest;
3110   tree phi, phi1;
3111   basic_block update_bb = update_e->dest;
3112
3113   /* gcc_assert (vect_can_advance_ivs_p (loop)); */
3114
3115   /* Make sure there exists a single-predecessor exit bb:  */
3116   gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
3117
3118   for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
3119        phi && phi1;
3120        phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
3121     {
3122       tree access_fn = NULL;
3123       tree evolution_part;
3124       tree init_expr;
3125       tree step_expr;
3126       tree var, stmt, ni, ni_name;
3127       block_stmt_iterator last_bsi;
3128
3129       /* Skip virtual phi's.  */
3130       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3131         {
3132           if (vect_debug_details (NULL))
3133             fprintf (dump_file, "virtual phi. skip.");
3134           continue;
3135         }
3136
3137       access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
3138       gcc_assert (access_fn);
3139       evolution_part =
3140          unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
3141       gcc_assert (evolution_part != NULL_TREE);
3142
3143       /* FORNOW: We do not support IVs whose evolution function is a polynomial
3144          of degree >= 2 or exponential.  */
3145       gcc_assert (!tree_is_chrec (evolution_part));
3146
3147       step_expr = evolution_part;
3148       init_expr = unshare_expr (initial_condition (access_fn));
3149
3150       ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
3151                   build2 (MULT_EXPR, TREE_TYPE (niters),
3152                        niters, step_expr), init_expr);
3153
3154       var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
3155       add_referenced_tmp_var (var);
3156
3157       ni_name = force_gimple_operand (ni, &stmt, false, var);
3158
3159       /* Insert stmt into exit_bb.  */
3160       last_bsi = bsi_last (exit_bb);
3161       if (stmt)
3162         bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
3163
3164       /* Fix phi expressions in the successor bb.  */
3165       gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
3166                   PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
3167       SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
3168     }
3169 }
3170
3171
3172 /* Function vect_do_peeling_for_loop_bound
3173
3174    Peel the last iterations of the loop represented by LOOP_VINFO.
3175    The peeled iterations form a new epilog loop.  Given that the loop now
3176    iterates NITERS times, the new epilog loop iterates
3177    NITERS % VECTORIZATION_FACTOR times.
3178
3179    The original loop will later be made to iterate
3180    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).  */
3181
3182 static void
3183 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
3184                                 struct loops *loops)
3185 {
3186
3187   tree ni_name, ratio_mult_vf_name;
3188   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3189   struct loop *new_loop;
3190   edge update_e;
3191 #ifdef ENABLE_CHECKING
3192   int loop_num;
3193 #endif
3194
3195   if (vect_debug_details (NULL))
3196     fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
3197
3198   /* Generate the following variables on the preheader of original loop:
3199
3200      ni_name = number of iteration the original loop executes
3201      ratio = ni_name / vf
3202      ratio_mult_vf_name = ratio * vf  */
3203   vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3204                                    &ratio_mult_vf_name, ratio);
3205
3206   /* Update loop info.  */
3207   loop->pre_header = loop_preheader_edge (loop)->src;
3208   loop->pre_header_edges[0] = loop_preheader_edge (loop);
3209
3210 #ifdef ENABLE_CHECKING
3211   loop_num  = loop->num;
3212 #endif
3213   new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
3214                                             ratio_mult_vf_name, ni_name, false);
3215 #ifdef ENABLE_CHECKING
3216   gcc_assert (new_loop);
3217   gcc_assert (loop_num == loop->num);
3218   slpeel_verify_cfg_after_peeling (loop, new_loop);
3219 #endif
3220
3221   /* A guard that controls whether the new_loop is to be executed or skipped
3222      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
3223      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
3224      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
3225      is on the path where the LOOP IVs are used and need to be updated.  */
3226
3227   if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3228     update_e = EDGE_PRED (new_loop->pre_header, 0);
3229   else
3230     update_e = EDGE_PRED (new_loop->pre_header, 1);
3231
3232   /* Update IVs of original loop as if they were advanced
3233      by ratio_mult_vf_name steps.  */
3234   vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e);
3235
3236   /* After peeling we have to reset scalar evolution analyzer.  */
3237   scev_reset ();
3238
3239   return;
3240 }
3241
3242
3243 /* Function vect_gen_niters_for_prolog_loop
3244
3245    Set the number of iterations for the loop represented by LOOP_VINFO
3246    to the minimum between LOOP_NITERS (the original iteration count of the loop)
3247    and the misalignment of DR - the first data reference recorded in
3248    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
3249    this loop, the data reference DR will refer to an aligned location.
3250
3251    The following computation is generated:
3252
3253    compute address misalignment in bytes:
3254    addr_mis = addr & (vectype_size - 1)
3255
3256    prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3257
3258    (elem_size = element type size; an element is the scalar element
3259         whose type is the inner type of the vectype)  */
3260
3261 static tree
3262 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3263 {
3264   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3265   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3266   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3267   tree var, stmt;
3268   tree iters, iters_name;
3269   edge pe;
3270   basic_block new_bb;
3271   tree dr_stmt = DR_STMT (dr);
3272   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3273   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3274   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3275   tree elem_misalign;
3276   tree byte_misalign;
3277   tree new_stmts = NULL_TREE;
3278   tree start_addr =
3279         vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3280   tree ptr_type = TREE_TYPE (start_addr);
3281   tree size = TYPE_SIZE (ptr_type);
3282   tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
3283   tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3284   tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
3285   tree niters_type = TREE_TYPE (loop_niters);
3286   tree elem_size_log =
3287         build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
3288   tree vf_tree = build_int_cst (unsigned_type_node, vf);
3289
3290   pe = loop_preheader_edge (loop);
3291   new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3292   gcc_assert (!new_bb);
3293
3294   /* Create:  byte_misalign = addr & (vectype_size - 1)  */
3295   byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3296
3297   /* Create:  elem_misalign = byte_misalign / element_size  */
3298   elem_misalign =
3299         build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
3300
3301   /* Create:  (niters_type) (VF - elem_misalign)&(VF - 1)  */
3302   iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
3303   iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
3304   iters = fold_convert (niters_type, iters);
3305
3306   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
3307   /* If the loop bound is known at compile time we already verified that it is
3308      greater than vf; since the misalignment ('iters') is at most vf, there's
3309      no need to generate the MIN_EXPR in this case.  */
3310   if (!TREE_CONSTANT (loop_niters))
3311     iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3312
3313   var = create_tmp_var (niters_type, "prolog_loop_niters");
3314   add_referenced_tmp_var (var);
3315   iters_name = force_gimple_operand (iters, &stmt, false, var);
3316
3317   /* Insert stmt on loop preheader edge.  */
3318   pe = loop_preheader_edge (loop);
3319   if (stmt)
3320     {
3321       basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3322       gcc_assert (!new_bb);
3323     }
3324
3325   return iters_name;
3326 }
3327
3328
3329 /* Function vect_update_inits_of_dr
3330
3331    NITERS iterations were peeled from LOOP.  DR represents a data reference
3332    in LOOP.  This function updates the information recorded in DR to
3333    account for the fact that the first NITERS iterations had already been
3334    executed.  Specifically, it updates the OFFSET field of stmt_info.  */
3335
3336 static void
3337 vect_update_inits_of_dr (struct data_reference *dr, tree niters)
3338 {
3339   stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
3340   tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
3341
3342   niters = fold (build2 (MULT_EXPR, TREE_TYPE (niters), niters,
3343                          STMT_VINFO_VECT_STEP (stmt_info)));
3344   offset = fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters));
3345   STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
3346 }
3347
3348
3349 /* Function vect_update_inits_of_drs
3350
3351    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3352    This function updates the information recorded for the data references in
3353    the loop to account for the fact that the first NITERS iterations had
3354    already been executed.  Specifically, it updates the initial_condition of the
3355    access_function of all the data_references in the loop.  */
3356
3357 static void
3358 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3359 {
3360   unsigned int i;
3361   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3362   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3363
3364   if (dump_file && (dump_flags & TDF_DETAILS))
3365     fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3366
3367   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3368     {
3369       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3370       vect_update_inits_of_dr (dr, niters);
3371     }
3372
3373   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3374     {
3375       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3376       vect_update_inits_of_dr (dr, niters);
3377     }
3378 }
3379
3380
3381 /* Function vect_do_peeling_for_alignment
3382
3383    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3384    'niters' is set to the misalignment of one of the data references in the
3385    loop, thereby forcing it to refer to an aligned location at the beginning
3386    of the execution of this loop.  The data reference for which we are
3387    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
3388
3389 static void
3390 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3391 {
3392   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3393   tree niters_of_prolog_loop, ni_name;
3394   tree n_iters;
3395   struct loop *new_loop;
3396
3397   if (vect_debug_details (NULL))
3398     fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3399
3400   ni_name = vect_build_loop_niters (loop_vinfo);
3401   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3402
3403   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
3404   new_loop =
3405         slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3406                                        niters_of_prolog_loop, ni_name, true);
3407 #ifdef ENABLE_CHECKING
3408   gcc_assert (new_loop);
3409   slpeel_verify_cfg_after_peeling (new_loop, loop);
3410 #endif
3411
3412   /* Update number of times loop executes.  */
3413   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3414   LOOP_VINFO_NITERS (loop_vinfo) =
3415     build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3416
3417   /* Update the init conditions of the access functions of all data refs.  */
3418   vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3419
3420   /* After peeling we have to reset scalar evolution analyzer.  */
3421   scev_reset ();
3422
3423   return;
3424 }
3425
3426
3427 /* Function vect_transform_loop.
3428
3429    The analysis phase has determined that the loop is vectorizable.
3430    Vectorize the loop - created vectorized stmts to replace the scalar
3431    stmts in the loop, and update the loop exit condition.  */
3432
3433 static void
3434 vect_transform_loop (loop_vec_info loop_vinfo,
3435                      struct loops *loops ATTRIBUTE_UNUSED)
3436 {
3437   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3438   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3439   int nbbs = loop->num_nodes;
3440   block_stmt_iterator si;
3441   int i;
3442   tree ratio = NULL;
3443   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3444
3445   if (vect_debug_details (NULL))
3446     fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3447
3448
3449   /* Peel the loop if there are data refs with unknown alignment.
3450      Only one data ref with unknown store is allowed.  */
3451
3452   if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3453     vect_do_peeling_for_alignment (loop_vinfo, loops);
3454
3455   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3456      compile time constant), or it is a constant that doesn't divide by the
3457      vectorization factor, then an epilog loop needs to be created.
3458      We therefore duplicate the loop: the original loop will be vectorized,
3459      and will compute the first (n/VF) iterations. The second copy of the loop
3460      will remain scalar and will compute the remaining (n%VF) iterations.
3461      (VF is the vectorization factor).  */
3462
3463   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3464       || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3465           && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3466     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3467   else
3468     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3469                 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3470
3471   /* 1) Make sure the loop header has exactly two entries
3472      2) Make sure we have a preheader basic block.  */
3473
3474   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3475
3476   loop_split_edge_with (loop_preheader_edge (loop), NULL);
3477
3478
3479   /* FORNOW: the vectorizer supports only loops which body consist
3480      of one basic block (header + empty latch). When the vectorizer will
3481      support more involved loop forms, the order by which the BBs are
3482      traversed need to be reconsidered.  */
3483
3484   for (i = 0; i < nbbs; i++)
3485     {
3486       basic_block bb = bbs[i];
3487
3488       for (si = bsi_start (bb); !bsi_end_p (si);)
3489         {
3490           tree stmt = bsi_stmt (si);
3491           stmt_vec_info stmt_info;
3492           bool is_store;
3493
3494           if (vect_debug_details (NULL))
3495             {
3496               fprintf (dump_file, "------>vectorizing statement: ");
3497               print_generic_expr (dump_file, stmt, TDF_SLIM);
3498             }
3499           stmt_info = vinfo_for_stmt (stmt);
3500           gcc_assert (stmt_info);
3501           if (!STMT_VINFO_RELEVANT_P (stmt_info))
3502             {
3503               bsi_next (&si);
3504               continue;
3505             }
3506 #ifdef ENABLE_CHECKING
3507           /* FORNOW: Verify that all stmts operate on the same number of
3508                      units and no inner unrolling is necessary.  */
3509           gcc_assert
3510                 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3511                  == vectorization_factor);
3512 #endif
3513           /* -------- vectorize statement ------------ */
3514           if (vect_debug_details (NULL))
3515             fprintf (dump_file, "transform statement.");
3516
3517           is_store = vect_transform_stmt (stmt, &si);
3518           if (is_store)
3519             {
3520               /* free the attached stmt_vec_info and remove the stmt.  */
3521               stmt_ann_t ann = stmt_ann (stmt);
3522               free (stmt_info);
3523               set_stmt_info (ann, NULL);
3524               bsi_remove (&si);
3525               continue;
3526             }
3527
3528           bsi_next (&si);
3529         }                       /* stmts in BB */
3530     }                           /* BBs in loop */
3531
3532   slpeel_make_loop_iterate_ntimes (loop, ratio);
3533
3534   if (vect_debug_details (loop))
3535     fprintf (dump_file,"Success! loop vectorized.");
3536   if (vect_debug_stats (loop))
3537     fprintf (dump_file, "LOOP VECTORIZED.");
3538 }
3539
3540
3541 /* Function vect_is_simple_use.
3542
3543    Input:
3544    LOOP - the loop that is being vectorized.
3545    OPERAND - operand of a stmt in LOOP.
3546    DEF - the defining stmt in case OPERAND is an SSA_NAME.
3547
3548    Returns whether a stmt with OPERAND can be vectorized.
3549    Supportable operands are constants, loop invariants, and operands that are
3550    defined by the current iteration of the loop. Unsupportable operands are
3551    those that are defined by a previous iteration of the loop (as is the case
3552    in reduction/induction computations).  */
3553
3554 static bool
3555 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3556 {
3557   tree def_stmt;
3558   basic_block bb;
3559
3560   if (def)
3561     *def = NULL_TREE;
3562
3563   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3564     return true;
3565
3566   if (TREE_CODE (operand) != SSA_NAME)
3567     return false;
3568
3569   def_stmt = SSA_NAME_DEF_STMT (operand);
3570   if (def_stmt == NULL_TREE )
3571     {
3572       if (vect_debug_details (NULL))
3573         fprintf (dump_file, "no def_stmt.");
3574       return false;
3575     }
3576
3577   /* empty stmt is expected only in case of a function argument.
3578      (Otherwise - we expect a phi_node or a modify_expr).  */
3579   if (IS_EMPTY_STMT (def_stmt))
3580     {
3581       tree arg = TREE_OPERAND (def_stmt, 0);
3582       if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3583         return true;
3584       if (vect_debug_details (NULL))
3585         {
3586           fprintf (dump_file, "Unexpected empty stmt: ");
3587           print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3588         }
3589       return false;
3590     }
3591
3592   /* phi_node inside the loop indicates an induction/reduction pattern.
3593      This is not supported yet.  */
3594   bb = bb_for_stmt (def_stmt);
3595   if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3596     {
3597       if (vect_debug_details (NULL))
3598         fprintf (dump_file, "reduction/induction - unsupported.");
3599       return false; /* FORNOW: not supported yet.  */
3600     }
3601
3602   /* Expecting a modify_expr or a phi_node.  */
3603   if (TREE_CODE (def_stmt) == MODIFY_EXPR
3604       || TREE_CODE (def_stmt) == PHI_NODE)
3605     {
3606       if (def)
3607         *def = def_stmt;
3608       return true;
3609     }
3610
3611   return false;
3612 }
3613
3614
3615 /* Function vect_analyze_operations.
3616
3617    Scan the loop stmts and make sure they are all vectorizable.  */
3618
3619 static bool
3620 vect_analyze_operations (loop_vec_info loop_vinfo)
3621 {
3622   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3623   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3624   int nbbs = loop->num_nodes;
3625   block_stmt_iterator si;
3626   unsigned int vectorization_factor = 0;
3627   int i;
3628   bool ok;
3629   tree scalar_type;
3630
3631   if (vect_debug_details (NULL))
3632     fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3633
3634   for (i = 0; i < nbbs; i++)
3635     {
3636       basic_block bb = bbs[i];
3637
3638       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3639         {
3640           tree stmt = bsi_stmt (si);
3641           unsigned int nunits;
3642           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3643           tree vectype;
3644
3645           if (vect_debug_details (NULL))
3646             {
3647               fprintf (dump_file, "==> examining statement: ");
3648               print_generic_expr (dump_file, stmt, TDF_SLIM);
3649             }
3650
3651           gcc_assert (stmt_info);
3652
3653           /* skip stmts which do not need to be vectorized.
3654              this is expected to include:
3655              - the COND_EXPR which is the loop exit condition
3656              - any LABEL_EXPRs in the loop
3657              - computations that are used only for array indexing or loop
3658              control  */
3659
3660           if (!STMT_VINFO_RELEVANT_P (stmt_info))
3661             {
3662               if (vect_debug_details (NULL))
3663                 fprintf (dump_file, "irrelevant.");
3664               continue;
3665             }
3666
3667           if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3668             {
3669               if (vect_debug_stats (loop) || vect_debug_details (loop))
3670                 {
3671                   fprintf (dump_file, "not vectorized: vector stmt in loop:");
3672                   print_generic_expr (dump_file, stmt, TDF_SLIM);
3673                 }
3674               return false;
3675             }
3676
3677           if (STMT_VINFO_DATA_REF (stmt_info))
3678             scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3679           else if (TREE_CODE (stmt) == MODIFY_EXPR)
3680             scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3681           else
3682             scalar_type = TREE_TYPE (stmt);
3683
3684           if (vect_debug_details (NULL))
3685             {
3686               fprintf (dump_file, "get vectype for scalar type:  ");
3687               print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3688             }
3689
3690           vectype = get_vectype_for_scalar_type (scalar_type);
3691           if (!vectype)
3692             {
3693               if (vect_debug_stats (loop) || vect_debug_details (loop))
3694                 {
3695                   fprintf (dump_file, "not vectorized: unsupported data-type ");
3696                   print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3697                 }
3698               return false;
3699             }
3700
3701           if (vect_debug_details (NULL))
3702             {
3703               fprintf (dump_file, "vectype: ");
3704               print_generic_expr (dump_file, vectype, TDF_SLIM);
3705             }
3706           STMT_VINFO_VECTYPE (stmt_info) = vectype;
3707
3708           ok = (vectorizable_operation (stmt, NULL, NULL)
3709                 || vectorizable_assignment (stmt, NULL, NULL)
3710                 || vectorizable_load (stmt, NULL, NULL)
3711                 || vectorizable_store (stmt, NULL, NULL));
3712
3713           if (!ok)
3714             {
3715               if (vect_debug_stats (loop) || vect_debug_details (loop))
3716                 {
3717                   fprintf (dump_file, "not vectorized: stmt not supported: ");
3718                   print_generic_expr (dump_file, stmt, TDF_SLIM);
3719                 }
3720               return false;
3721             }
3722
3723           nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3724           if (vect_debug_details (NULL))
3725             fprintf (dump_file, "nunits = %d", nunits);
3726
3727           if (vectorization_factor)
3728             {
3729               /* FORNOW: don't allow mixed units.
3730                  This restriction will be relaxed in the future.  */
3731               if (nunits != vectorization_factor)
3732                 {
3733                   if (vect_debug_stats (loop) || vect_debug_details (loop))
3734                     fprintf (dump_file, "not vectorized: mixed data-types");
3735                   return false;
3736                 }
3737             }
3738           else
3739             vectorization_factor = nunits;
3740
3741 #ifdef ENABLE_CHECKING
3742           gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3743                         * vectorization_factor == UNITS_PER_SIMD_WORD);
3744 #endif
3745         }
3746     }
3747
3748   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
3749
3750   if (vectorization_factor <= 1)
3751     {
3752       if (vect_debug_stats (loop) || vect_debug_details (loop))
3753         fprintf (dump_file, "not vectorized: unsupported data-type");
3754       return false;
3755     }
3756   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3757
3758   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3759     fprintf (dump_file,
3760         "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3761         vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3762
3763   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3764       && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
3765     {
3766       if (vect_debug_stats (loop) || vect_debug_details (loop))
3767         fprintf (dump_file, "not vectorized: iteration count too small.");
3768       return false;
3769     }
3770
3771   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3772       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3773     {
3774       if (vect_debug_stats (loop) || vect_debug_details (loop))
3775         fprintf (dump_file, "epilog loop required.");
3776       if (!vect_can_advance_ivs_p (loop))
3777         {
3778           if (vect_debug_stats (loop) || vect_debug_details (loop))
3779             fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3780           return false;
3781         }
3782       if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3783         {
3784           if (vect_debug_stats (loop) || vect_debug_details (loop))
3785             fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3786           return false;
3787         }
3788     }
3789
3790   return true;
3791 }
3792
3793
3794 /* Function exist_non_indexing_operands_for_use_p
3795
3796    USE is one of the uses attached to STMT. Check if USE is
3797    used in STMT for anything other than indexing an array.  */
3798
3799 static bool
3800 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3801 {
3802   tree operand;
3803   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3804
3805   /* USE corresponds to some operand in STMT. If there is no data
3806      reference in STMT, then any operand that corresponds to USE
3807      is not indexing an array.  */
3808   if (!STMT_VINFO_DATA_REF (stmt_info))
3809     return true;
3810
3811   /* STMT has a data_ref. FORNOW this means that its of one of
3812      the following forms:
3813      -1- ARRAY_REF = var
3814      -2- var = ARRAY_REF
3815      (This should have been verified in analyze_data_refs).
3816
3817      'var' in the second case corresponds to a def, not a use,
3818      so USE cannot correspond to any operands that are not used
3819      for array indexing.
3820
3821      Therefore, all we need to check is if STMT falls into the
3822      first case, and whether var corresponds to USE.  */
3823
3824   if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3825     return false;
3826
3827   operand = TREE_OPERAND (stmt, 1);
3828
3829   if (TREE_CODE (operand) != SSA_NAME)
3830     return false;
3831
3832   if (operand == use)
3833     return true;
3834
3835   return false;
3836 }
3837
3838
3839 /* Function vect_is_simple_iv_evolution.
3840
3841    FORNOW: A simple evolution of an induction variables in the loop is
3842    considered a polynomial evolution with constant step.  */
3843
3844 static bool
3845 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3846                                 tree * step, bool strict)
3847 {
3848   tree init_expr;
3849   tree step_expr;
3850
3851   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3852
3853   /* When there is no evolution in this loop, the evolution function
3854      is not "simple".  */
3855   if (evolution_part == NULL_TREE)
3856     return false;
3857
3858   /* When the evolution is a polynomial of degree >= 2
3859      the evolution function is not "simple".  */
3860   if (tree_is_chrec (evolution_part))
3861     return false;
3862
3863   step_expr = evolution_part;
3864   init_expr = unshare_expr (initial_condition (access_fn));
3865
3866   if (vect_debug_details (NULL))
3867     {
3868       fprintf (dump_file, "step: ");
3869       print_generic_expr (dump_file, step_expr, TDF_SLIM);
3870       fprintf (dump_file, ",  init: ");
3871       print_generic_expr (dump_file, init_expr, TDF_SLIM);
3872     }
3873
3874   *init = init_expr;
3875   *step = step_expr;
3876
3877   if (TREE_CODE (step_expr) != INTEGER_CST)
3878     {
3879       if (vect_debug_details (NULL))
3880         fprintf (dump_file, "step unknown.");
3881       return false;
3882     }
3883
3884   if (strict)
3885     if (!integer_onep (step_expr))
3886       {
3887         if (vect_debug_details (NULL))
3888           print_generic_expr (dump_file, step_expr, TDF_SLIM);
3889         return false;
3890       }
3891
3892   return true;
3893 }
3894
3895
3896 /* Function vect_analyze_scalar_cycles.
3897
3898    Examine the cross iteration def-use cycles of scalar variables, by
3899    analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3900    cycles that they represent do not impede vectorization.
3901
3902    FORNOW: Reduction as in the following loop, is not supported yet:
3903               loop1:
3904               for (i=0; i<N; i++)
3905                  sum += a[i];
3906            The cross-iteration cycle corresponding to variable 'sum' will be
3907            considered too complicated and will impede vectorization.
3908
3909    FORNOW: Induction as in the following loop, is not supported yet:
3910               loop2:
3911               for (i=0; i<N; i++)
3912                  a[i] = i;
3913
3914            However, the following loop *is* vectorizable:
3915               loop3:
3916               for (i=0; i<N; i++)
3917                  a[i] = b[i];
3918
3919            In both loops there exists a def-use cycle for the variable i:
3920               loop: i_2 = PHI (i_0, i_1)
3921                     a[i_2] = ...;
3922                     i_1 = i_2 + 1;
3923                     GOTO loop;
3924
3925            The evolution of the above cycle is considered simple enough,
3926            however, we also check that the cycle does not need to be
3927            vectorized, i.e - we check that the variable that this cycle
3928            defines is only used for array indexing or in stmts that do not
3929            need to be vectorized. This is not the case in loop2, but it
3930            *is* the case in loop3.  */
3931
3932 static bool
3933 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3934 {
3935   tree phi;
3936   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3937   basic_block bb = loop->header;
3938   tree dummy;
3939
3940   if (vect_debug_details (NULL))
3941     fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3942
3943   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3944     {
3945       tree access_fn = NULL;
3946
3947       if (vect_debug_details (NULL))
3948         {
3949           fprintf (dump_file, "Analyze phi: ");
3950           print_generic_expr (dump_file, phi, TDF_SLIM);
3951         }
3952
3953       /* Skip virtual phi's. The data dependences that are associated with
3954          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
3955
3956       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3957         {
3958           if (vect_debug_details (NULL))
3959             fprintf (dump_file, "virtual phi. skip.");
3960           continue;
3961         }
3962
3963       /* Analyze the evolution function.  */
3964
3965       /* FORNOW: The only scalar cross-iteration cycles that we allow are
3966          those of loop induction variables; This property is verified here.
3967
3968          Furthermore, if that induction variable is used in an operation
3969          that needs to be vectorized (i.e, is not solely used to index
3970          arrays and check the exit condition) - we do not support its
3971          vectorization yet. This property is verified in vect_is_simple_use,
3972          during vect_analyze_operations.  */
3973
3974       access_fn = /* instantiate_parameters
3975                      (loop,*/
3976          analyze_scalar_evolution (loop, PHI_RESULT (phi));
3977
3978       if (!access_fn)
3979         {
3980           if (vect_debug_stats (loop) || vect_debug_details (loop))
3981             fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3982           return false;
3983         }
3984
3985       if (vect_debug_details (NULL))
3986         {
3987            fprintf (dump_file, "Access function of PHI: ");
3988            print_generic_expr (dump_file, access_fn, TDF_SLIM);
3989         }
3990
3991       if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3992                                         &dummy, false))
3993         {
3994           if (vect_debug_stats (loop) || vect_debug_details (loop))
3995             fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3996           return false;
3997         }
3998     }
3999
4000   return true;
4001 }
4002
4003
4004 /* Function vect_analyze_data_ref_dependence.
4005
4006    Return TRUE if there (might) exist a dependence between a memory-reference
4007    DRA and a memory-reference DRB.  */
4008
4009 static bool
4010 vect_analyze_data_ref_dependence (struct data_reference *dra,
4011                                   struct data_reference *drb,
4012                                   struct loop *loop)
4013 {
4014   bool differ_p;
4015   struct data_dependence_relation *ddr;
4016
4017   if (!array_base_name_differ_p (dra, drb, &differ_p))
4018     {
4019       if (vect_debug_stats (loop) || vect_debug_details (loop))
4020         {
4021           fprintf (dump_file,
4022                 "not vectorized: can't determine dependence between: ");
4023           print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
4024           fprintf (dump_file, " and ");
4025           print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
4026         }
4027       return true;
4028     }
4029
4030   if (differ_p)
4031     return false;
4032
4033   ddr = initialize_data_dependence_relation (dra, drb);
4034   compute_affine_dependence (ddr);
4035
4036   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4037     return false;
4038
4039   if (vect_debug_stats (loop) || vect_debug_details (loop))
4040     {
4041       fprintf (dump_file,
4042         "not vectorized: possible dependence between data-refs ");
4043       print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
4044       fprintf (dump_file, " and ");
4045       print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
4046     }
4047
4048   return true;
4049 }
4050
4051
4052 /* Function vect_analyze_data_ref_dependences.
4053
4054    Examine all the data references in the loop, and make sure there do not
4055    exist any data dependences between them.
4056
4057    TODO: dependences which distance is greater than the vectorization factor
4058          can be ignored.  */
4059
4060 static bool
4061 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
4062 {
4063   unsigned int i, j;
4064   varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4065   varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4066   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4067
4068   /* Examine store-store (output) dependences.  */
4069
4070   if (vect_debug_details (NULL))
4071     fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
4072
4073   if (vect_debug_details (NULL))
4074     fprintf (dump_file, "compare all store-store pairs.");
4075
4076   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
4077     {
4078       for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4079         {
4080           struct data_reference *dra =
4081             VARRAY_GENERIC_PTR (loop_write_refs, i);
4082           struct data_reference *drb =
4083             VARRAY_GENERIC_PTR (loop_write_refs, j);
4084           if (vect_analyze_data_ref_dependence (dra, drb, loop))
4085             return false;
4086         }
4087     }
4088
4089   /* Examine load-store (true/anti) dependences.  */
4090
4091   if (vect_debug_details (NULL))
4092     fprintf (dump_file, "compare all load-store pairs.");
4093
4094   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
4095     {
4096       for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4097         {
4098           struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
4099           struct data_reference *drb =
4100             VARRAY_GENERIC_PTR (loop_write_refs, j);
4101           if (vect_analyze_data_ref_dependence (dra, drb, loop))
4102             return false;
4103         }
4104     }
4105
4106   return true;
4107 }
4108
4109
4110 /* Function vect_compute_data_ref_alignment
4111
4112    Compute the misalignment of the data reference DR.
4113
4114    Output:
4115    1. If during the misalignment computation it is found that the data reference
4116       cannot be vectorized then false is returned.
4117    2. DR_MISALIGNMENT (DR) is defined.
4118
4119    FOR NOW: No analysis is actually performed. Misalignment is calculated
4120    only for trivial cases. TODO.  */
4121
4122 static bool
4123 vect_compute_data_ref_alignment (struct data_reference *dr)
4124 {
4125   tree stmt = DR_STMT (dr);
4126   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4127   tree ref = DR_REF (dr);
4128   tree vectype;
4129   tree base, alignment;
4130   bool base_aligned_p;
4131   tree misalign;
4132
4133   if (vect_debug_details (NULL))
4134     fprintf (dump_file, "vect_compute_data_ref_alignment:");
4135
4136   /* Initialize misalignment to unknown.  */
4137   DR_MISALIGNMENT (dr) = -1;
4138
4139   misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
4140   base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
4141   base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4142   vectype = STMT_VINFO_VECTYPE (stmt_info);
4143
4144   if (!misalign)
4145     {
4146       if (vect_debug_details (NULL))
4147         {
4148           fprintf (dump_file, "Unknown alignment for access: ");
4149           print_generic_expr (dump_file, base, TDF_SLIM);
4150         }
4151       return true;
4152     }
4153
4154   if (!base_aligned_p)
4155     {
4156       if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4157         {
4158           if (vect_debug_details (NULL))
4159             {
4160               fprintf (dump_file, "can't force alignment of ref: ");
4161               print_generic_expr (dump_file, ref, TDF_SLIM);
4162             }
4163           return true;
4164         }
4165
4166       /* Force the alignment of the decl.
4167          NOTE: This is the only change to the code we make during
4168          the analysis phase, before deciding to vectorize the loop.  */
4169       if (vect_debug_details (NULL))
4170         fprintf (dump_file, "force alignment");
4171       DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4172       DECL_USER_ALIGN (base) = 1;
4173     }
4174
4175   /* At this point we assume that the base is aligned.  */
4176   gcc_assert (base_aligned_p
4177               || (TREE_CODE (base) == VAR_DECL
4178                   && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4179
4180   /* Alignment required, in bytes:  */
4181   alignment = size_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
4182
4183   /* Modulo alignment.  */
4184   misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
4185   if (tree_int_cst_sgn (misalign) < 0)
4186     {
4187       /* Negative misalignment value.  */
4188       if (vect_debug_details (NULL))
4189         fprintf (dump_file, "unexpected misalign value");
4190       return false;
4191     }
4192
4193   DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
4194
4195   if (vect_debug_details (NULL))
4196     fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4197
4198   return true;
4199 }
4200
4201
4202 /* Function vect_compute_data_refs_alignment
4203
4204    Compute the misalignment of data references in the loop.
4205    This pass may take place at function granularity instead of at loop
4206    granularity.
4207
4208    FOR NOW: No analysis is actually performed. Misalignment is calculated
4209    only for trivial cases. TODO.  */
4210
4211 static bool
4212 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4213 {
4214   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4215   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4216   unsigned int i;
4217
4218   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4219     {
4220       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4221       if (!vect_compute_data_ref_alignment (dr))
4222         return false;
4223     }
4224
4225   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4226     {
4227       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4228       if (!vect_compute_data_ref_alignment (dr))
4229         return false;
4230     }
4231
4232   return true;
4233 }
4234
4235
4236 /* Function vect_enhance_data_refs_alignment
4237
4238    This pass will use loop versioning and loop peeling in order to enhance
4239    the alignment of data references in the loop.
4240
4241    FOR NOW: we assume that whatever versioning/peeling takes place, only the
4242    original loop is to be vectorized; Any other loops that are created by
4243    the transformations performed in this pass - are not supposed to be
4244    vectorized. This restriction will be relaxed.  */
4245
4246 static void
4247 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4248 {
4249   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4250   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4251   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4252   unsigned int i;
4253
4254   /*
4255      This pass will require a cost model to guide it whether to apply peeling
4256      or versioning or a combination of the two. For example, the scheme that
4257      intel uses when given a loop with several memory accesses, is as follows:
4258      choose one memory access ('p') which alignment you want to force by doing
4259      peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4260      other accesses are not necessarily aligned, or (2) use loop versioning to
4261      generate one loop in which all accesses are aligned, and another loop in
4262      which only 'p' is necessarily aligned.
4263
4264      ("Automatic Intra-Register Vectorization for the Intel Architecture",
4265       Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4266       Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4267
4268      Devising a cost model is the most critical aspect of this work. It will
4269      guide us on which access to peel for, whether to use loop versioning, how
4270      many versions to create, etc. The cost model will probably consist of
4271      generic considerations as well as target specific considerations (on
4272      powerpc for example, misaligned stores are more painful than misaligned
4273      loads).
4274
4275      Here is the general steps involved in alignment enhancements:
4276
4277      -- original loop, before alignment analysis:
4278         for (i=0; i<N; i++){
4279           x = q[i];                     # DR_MISALIGNMENT(q) = unknown
4280           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
4281         }
4282
4283      -- After vect_compute_data_refs_alignment:
4284         for (i=0; i<N; i++){
4285           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4286           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
4287         }
4288
4289      -- Possibility 1: we do loop versioning:
4290      if (p is aligned) {
4291         for (i=0; i<N; i++){    # loop 1A
4292           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4293           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
4294         }
4295      }
4296      else {
4297         for (i=0; i<N; i++){    # loop 1B
4298           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4299           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
4300         }
4301      }
4302
4303      -- Possibility 2: we do loop peeling:
4304      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
4305         x = q[i];
4306         p[i] = y;
4307      }
4308      for (i = 3; i < N; i++){   # loop 2A
4309         x = q[i];                       # DR_MISALIGNMENT(q) = 0
4310         p[i] = y;                       # DR_MISALIGNMENT(p) = unknown
4311      }
4312
4313      -- Possibility 3: combination of loop peeling and versioning:
4314      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
4315         x = q[i];
4316         p[i] = y;
4317      }
4318      if (p is aligned) {
4319         for (i = 3; i<N; i++){  # loop 3A
4320           x = q[i];                     # DR_MISALIGNMENT(q) = 0
4321           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
4322         }
4323      }
4324      else {
4325         for (i = 3; i<N; i++){  # loop 3B
4326           x = q[i];                     # DR_MISALIGNMENT(q) = 0
4327           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
4328         }
4329      }
4330
4331      These loops are later passed to loop_transform to be vectorized. The
4332      vectorizer will use the alignment information to guide the transformation
4333      (whether to generate regular loads/stores, or with special handling for
4334      misalignment).
4335    */
4336
4337   /* (1) Peeling to force alignment.  */
4338
4339   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4340      Considerations:
4341      + How many accesses will become aligned due to the peeling
4342      - How many accesses will become unaligned due to the peeling,
4343        and the cost of misaligned accesses.
4344      - The cost of peeling (the extra runtime checks, the increase
4345        in code size).
4346
4347      The scheme we use FORNOW: peel to force the alignment of the first
4348      misaligned store in the loop.
4349      Rationale: misaligned stores are not yet supported.
4350
4351      TODO: Use a better cost model.  */
4352
4353   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4354     {
4355       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4356       if (!aligned_access_p (dr))
4357         {
4358           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4359           LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4360           break;
4361         }
4362     }
4363
4364   if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4365     {
4366       if (vect_debug_details (loop))
4367         fprintf (dump_file, "Peeling for alignment will not be applied.");
4368       return;
4369     }
4370   else
4371     if (vect_debug_details (loop))
4372       fprintf (dump_file, "Peeling for alignment will be applied.");
4373
4374
4375   /* (1.2) Update the alignment info according to the peeling factor.
4376            If the misalignment of the DR we peel for is M, then the
4377            peeling factor is VF - M, and the misalignment of each access DR_i
4378            in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4379            If the misalignment of the DR we peel for is unknown, then the
4380            misalignment of each access DR_i in the loop is also unknown.
4381
4382            FORNOW: set the misalignment of the accesses to unknown even
4383                    if the peeling factor is known at compile time.
4384
4385            TODO: - if the peeling factor is known at compile time, use that
4386                    when updating the misalignment info of the loop DRs.
4387                  - consider accesses that are known to have the same
4388                    alignment, even if that alignment is unknown.  */
4389
4390   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4391     {
4392       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4393       if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4394         DR_MISALIGNMENT (dr) = 0;
4395       else
4396         DR_MISALIGNMENT (dr) = -1;
4397     }
4398   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4399     {
4400       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4401       if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4402         DR_MISALIGNMENT (dr) = 0;
4403       else
4404         DR_MISALIGNMENT (dr) = -1;
4405     }
4406 }
4407
4408
4409 /* Function vect_analyze_data_refs_alignment
4410
4411    Analyze the alignment of the data-references in the loop.
4412    FOR NOW: Until support for misliagned accesses is in place, only if all
4413    accesses are aligned can the loop be vectorized. This restriction will be
4414    relaxed.  */
4415
4416 static bool
4417 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4418 {
4419   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4420   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4421   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4422   enum dr_alignment_support supportable_dr_alignment;
4423   unsigned int i;
4424
4425   if (vect_debug_details (NULL))
4426     fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4427
4428
4429   /* This pass may take place at function granularity instead of at loop
4430      granularity.  */
4431
4432   if (!vect_compute_data_refs_alignment (loop_vinfo))
4433     {
4434       if (vect_debug_details (loop) || vect_debug_stats (loop))
4435         fprintf (dump_file,
4436                  "not vectorized: can't calculate alignment for data ref.");
4437       return false;
4438     }
4439
4440
4441   /* This pass will decide on using loop versioning and/or loop peeling in
4442      order to enhance the alignment of data references in the loop.  */
4443
4444   vect_enhance_data_refs_alignment (loop_vinfo);
4445
4446
4447   /* Finally, check that all the data references in the loop can be
4448      handled with respect to their alignment.  */
4449
4450   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4451     {
4452       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4453       supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4454       if (!supportable_dr_alignment)
4455         {
4456           if (vect_debug_details (loop) || vect_debug_stats (loop))
4457             fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4458           return false;
4459         }
4460     }
4461   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4462     {
4463       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4464       supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4465       if (!supportable_dr_alignment)
4466         {
4467           if (vect_debug_details (loop) || vect_debug_stats (loop))
4468             fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4469           return false;
4470         }
4471     }
4472
4473   return true;
4474 }
4475
4476
4477 /* Function vect_analyze_data_ref_access.
4478
4479    Analyze the access pattern of the data-reference DR. For now, a data access
4480    has to consecutive to be considered vectorizable.  */
4481
4482 static bool
4483 vect_analyze_data_ref_access (struct data_reference *dr)
4484 {
4485   tree stmt = DR_STMT (dr);
4486   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4487   tree step = STMT_VINFO_VECT_STEP (stmt_info);
4488   tree scalar_type = TREE_TYPE (DR_REF (dr));
4489
4490   if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
4491     {
4492       if (vect_debug_details (NULL))
4493         fprintf (dump_file, "not consecutive access");
4494       return false;
4495     }
4496   return true;
4497 }
4498
4499
4500 /* Function vect_analyze_data_ref_accesses.
4501
4502    Analyze the access pattern of all the data references in the loop.
4503
4504    FORNOW: the only access pattern that is considered vectorizable is a
4505            simple step 1 (consecutive) access.
4506
4507    FORNOW: handle only arrays and pointer accesses.  */
4508
4509 static bool
4510 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4511 {
4512   unsigned int i;
4513   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4514   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4515
4516   if (vect_debug_details (NULL))
4517     fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4518
4519   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4520     {
4521       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4522       bool ok = vect_analyze_data_ref_access (dr);
4523       if (!ok)
4524         {
4525           if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4526               || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4527             fprintf (dump_file, "not vectorized: complicated access pattern.");
4528           return false;
4529         }
4530     }
4531
4532   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4533     {
4534       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4535       bool ok = vect_analyze_data_ref_access (dr);
4536       if (!ok)
4537         {
4538           if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4539               || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4540             fprintf (dump_file, "not vectorized: complicated access pattern.");
4541           return false;
4542         }
4543     }
4544
4545   return true;
4546 }
4547
4548
4549 /* Function vect_analyze_pointer_ref_access.
4550
4551    Input:
4552    STMT - a stmt that contains a data-ref
4553    MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4554
4555    If the data-ref access is vectorizable, return a data_reference structure
4556    that represents it (DR). Otherwise - return NULL.  */
4557
4558 static struct data_reference *
4559 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4560 {
4561   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4562   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4563   tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4564   tree init, step;
4565   tree reftype, innertype;
4566   tree indx_access_fn;
4567   int loopnum = loop->num;
4568   struct data_reference *dr;
4569
4570   if (!access_fn)
4571     {
4572       if (vect_debug_stats (loop) || vect_debug_details (loop))
4573         fprintf (dump_file, "not vectorized: complicated pointer access.");
4574       return NULL;
4575     }
4576
4577   if (vect_debug_details (NULL))
4578     {
4579       fprintf (dump_file, "Access function of ptr: ");
4580       print_generic_expr (dump_file, access_fn, TDF_SLIM);
4581     }
4582
4583   if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4584     {
4585       if (vect_debug_stats (loop) || vect_debug_details (loop))
4586         fprintf (dump_file, "not vectorized: pointer access is not simple.");
4587       return NULL;
4588     }
4589
4590   STRIP_NOPS (init);
4591
4592   if (!TREE_CONSTANT (step))
4593     {
4594       if (vect_debug_stats (loop) || vect_debug_details (loop))
4595         fprintf (dump_file,
4596                 "not vectorized: non constant step for pointer access.");
4597       return NULL;
4598     }
4599
4600   reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4601   if (TREE_CODE (reftype) != POINTER_TYPE)
4602     {
4603       if (vect_debug_stats (loop) || vect_debug_details (loop))
4604         fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4605       return NULL;
4606     }
4607
4608   reftype = TREE_TYPE (init);
4609   if (TREE_CODE (reftype) != POINTER_TYPE)
4610     {
4611       if (vect_debug_stats (loop) || vect_debug_details (loop))
4612         fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4613       return NULL;
4614     }
4615
4616   innertype = TREE_TYPE (reftype);
4617   if (tree_int_cst_compare (TYPE_SIZE_UNIT (innertype), step))
4618     {
4619       /* FORNOW: support only consecutive access */
4620       if (vect_debug_stats (loop) || vect_debug_details (loop))
4621         fprintf (dump_file, "not vectorized: non consecutive access.");
4622       return NULL;
4623     }
4624
4625   STMT_VINFO_VECT_STEP (stmt_info) = fold_convert (sizetype, step);
4626   if (TREE_CODE (init) == PLUS_EXPR
4627       || TREE_CODE (init) == MINUS_EXPR)
4628     STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4629       fold (size_binop (TREE_CODE (init), size_zero_node,
4630                         fold_convert (sizetype, TREE_OPERAND (init, 1))));
4631   else
4632     STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = size_zero_node;
4633
4634   indx_access_fn =
4635         build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4636   if (vect_debug_details (NULL))
4637     {
4638       fprintf (dump_file, "Access function of ptr indx: ");
4639       print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4640     }
4641   dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4642   return dr;
4643 }
4644
4645
4646 /* Function vect_get_memtag_and_dr.
4647
4648    The function returns the relevant variable for memory tag (for aliasing
4649    purposes). Also data reference structure DR is created.
4650
4651    This function handles three kinds of MEMREF:
4652
4653    It is called from vect_analyze_data_refs with a MEMREF that is either an
4654    ARRAY_REF or an INDIRECT_REF (this is category 1 - "recursion begins").
4655    It builds a DR for them using vect_get_base_and_offset, and calls itself
4656    recursively to retrieve the relevant memtag for the MEMREF, "peeling" the
4657    MEMREF along the way. During the recursive calls, the function may be called
4658    with a MEMREF for which the recursion has to continue - PLUS_EXPR,
4659    MINUS_EXPR, INDIRECT_REF (category 2 - "recursion continues"),
4660    and/or with a MEMREF for which a memtag can be trivially obtained - VAR_DECL
4661    and SSA_NAME (this is category 3 - "recursion stop condition").
4662
4663    When the MEMREF falls into category 1 there is still no data reference struct
4664    (DR) available. It is created by this function, and then, along the recursion,
4665    MEMREF will fall into category 2 or 3, in which case a DR will have already
4666    been created, but the analysis continues to retrieve the MEMTAG.
4667
4668    Input:
4669    MEMREF - data reference in STMT
4670    IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4671
4672    Output:
4673    DR - data_reference struct for MEMREF
4674    return value - the relevant variable for memory tag (for aliasing purposes).
4675
4676 */
4677
4678 static tree
4679 vect_get_memtag_and_dr (tree memref, tree stmt, bool is_read,
4680                         loop_vec_info loop_vinfo,
4681                         tree vectype, struct data_reference **dr)
4682 {
4683   tree symbl, oprnd0, oprnd1;
4684   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4685   tree offset, misalign, step;
4686   tree ref_to_be_analyzed, tag, dr_base;
4687   struct data_reference *new_dr;
4688   bool base_aligned_p;
4689
4690   if (*dr)
4691     {
4692       /* Category 3: recursion stop condition.  */
4693       /* (1) A DR already exists. We only need to get the relevant memtag for
4694          MEMREF, the rest of the data was already initialized.  */
4695
4696       switch (TREE_CODE (memref))
4697         {
4698           /* (1.1) Stop condition: find the relevant memtag and return.  */
4699         case SSA_NAME:
4700           symbl = SSA_NAME_VAR (memref);
4701           tag = get_var_ann (symbl)->type_mem_tag;
4702           if (!tag)
4703             {
4704               tree ptr = TREE_OPERAND (DR_REF ((*dr)), 0);
4705               if (TREE_CODE (ptr) == SSA_NAME)
4706                 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
4707             }
4708           if (!tag)
4709             {
4710               if (vect_debug_details (NULL))
4711                 fprintf (dump_file, "not vectorized: no memtag for ref.");
4712               return NULL_TREE;
4713             }
4714           return tag;
4715
4716         case VAR_DECL:
4717         case PARM_DECL:
4718           return memref;
4719
4720           /* Category 2: recursion continues.  */
4721           /* (1.2) A recursive call to find the relevant memtag is required.  */
4722         case INDIRECT_REF:
4723           symbl = TREE_OPERAND (memref, 0);
4724           break; /* For recursive call.  */
4725
4726         case COMPONENT_REF:
4727           /* Could have recorded more accurate information -
4728              i.e, the actual FIELD_DECL that is being referenced -
4729              but later passes expect VAR_DECL as the nmt.  */
4730           /* Fall through.  */
4731
4732         case ADDR_EXPR:
4733           symbl = STMT_VINFO_VECT_DR_BASE (stmt_info);
4734           break; /* For recursive call.  */
4735
4736         case PLUS_EXPR:
4737         case MINUS_EXPR:
4738           /* Although DR exists, we have to call the function recursively to
4739              build MEMTAG for such expression. This is handled below.  */
4740           oprnd0 = TREE_OPERAND (memref, 0);
4741           oprnd1 = TREE_OPERAND (memref, 1);
4742
4743           STRIP_NOPS (oprnd1);
4744            /* Supported plus/minus expressions are of the form
4745              {address_base + offset}, such that address_base is of type
4746              POINTER/ARRAY, and offset is either an INTEGER_CST of type POINTER,
4747              or it's not of type POINTER/ARRAY.
4748              TODO: swap operands if {offset + address_base}.  */
4749           if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4750                && TREE_CODE (oprnd1) != INTEGER_CST)
4751               || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4752             return NULL_TREE;
4753
4754           symbl = oprnd0;
4755           break; /* For recursive call.  */
4756
4757         default:
4758           return NULL_TREE;
4759         }
4760     }
4761   else
4762     {
4763       /* Category 1: recursion begins.  */
4764       /* (2) A DR does not exist yet and must be built, followed by a
4765          recursive call to get the relevant memtag for MEMREF.  */
4766
4767       switch (TREE_CODE (memref))
4768         {
4769         case INDIRECT_REF:
4770           new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4771           if (!new_dr)
4772             return NULL_TREE;
4773           *dr = new_dr;
4774           symbl = DR_BASE_NAME (new_dr);
4775           ref_to_be_analyzed = DR_BASE_NAME (new_dr);
4776           break;
4777
4778         case ARRAY_REF:
4779           new_dr = analyze_array (stmt, memref, is_read);
4780           *dr = new_dr;
4781           symbl = DR_BASE_NAME (new_dr);
4782           ref_to_be_analyzed = memref;
4783           break;
4784
4785         default:
4786           /* TODO: Support data-refs of form a[i].p for unions and single
4787              field structures.  */
4788           return NULL_TREE;
4789         }
4790
4791       offset = size_zero_node;
4792       misalign = size_zero_node;
4793       step = size_zero_node;
4794
4795       /* Analyze data-ref, find its base, initial offset from the base, step,
4796          and alignment.  */
4797       dr_base = vect_get_base_and_offset (new_dr, ref_to_be_analyzed,
4798                                           vectype, loop_vinfo, &offset,
4799                                           &misalign, &step, &base_aligned_p);
4800       if (!dr_base)
4801         return NULL_TREE;
4802
4803       /* Initialize information according to above analysis.  */
4804       /* Since offset and step of a pointer can be also set in
4805          vect_analyze_pointer_ref_access, we combine the values here. */
4806       if (STMT_VINFO_VECT_INIT_OFFSET (stmt_info))
4807         STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4808           fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
4809                         STMT_VINFO_VECT_INIT_OFFSET (stmt_info)));
4810       else
4811         STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
4812
4813       if (step && STMT_VINFO_VECT_STEP (stmt_info))
4814         STMT_VINFO_VECT_STEP (stmt_info) =
4815           size_binop (PLUS_EXPR, step, STMT_VINFO_VECT_STEP (stmt_info));
4816       else
4817         STMT_VINFO_VECT_STEP (stmt_info) = step;
4818
4819       STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned_p;
4820       STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
4821       STMT_VINFO_VECT_DR_BASE (stmt_info) = dr_base;
4822     }
4823
4824   if (!symbl)
4825     return NULL_TREE;
4826   /* Recursive call to retrieve the relevant memtag.  */
4827   tag = vect_get_memtag_and_dr (symbl, stmt, is_read, loop_vinfo, vectype, dr);
4828   return tag;
4829 }
4830
4831
4832
4833 /* Function vect_analyze_data_refs.
4834
4835    Find all the data references in the loop.
4836
4837    The general structure of the analysis of data refs in the vectorizer is as
4838    follows:
4839    1- vect_analyze_data_refs(loop):
4840       Find and analyze all data-refs in the loop:
4841           foreach ref
4842              ref_stmt.memtag =  vect_get_memtag_and_dr (ref)
4843    1.1- vect_get_memtag_and_dr(ref):
4844       Analyze ref, and build a DR (data_referece struct) for it;
4845       call vect_get_base_and_offset to compute base, initial_offset,
4846       step and alignment. Set ref_stmt.base, ref_stmt.initial_offset,
4847       ref_stmt.alignment, and ref_stmt.step accordingly.
4848    1.1.1- vect_get_base_and_offset():
4849       Calculate base, initial_offset, step and alignment.
4850       For ARRAY_REFs and COMPONENT_REFs use call get_inner_reference.
4851    2- vect_analyze_dependences(): apply dependece testing using ref_stmt.DR
4852    3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
4853    4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
4854
4855    FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4856            which base is really an array (not a pointer) and which alignment
4857            can be forced. This restriction will be relaxed.  */
4858
4859 static bool
4860 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4861 {
4862   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4863   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4864   int nbbs = loop->num_nodes;
4865   block_stmt_iterator si;
4866   int j;
4867   struct data_reference *dr;
4868
4869   if (vect_debug_details (NULL))
4870     fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4871
4872   for (j = 0; j < nbbs; j++)
4873     {
4874       basic_block bb = bbs[j];
4875       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4876         {
4877           bool is_read = false;
4878           tree stmt = bsi_stmt (si);
4879           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4880           v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4881           v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4882           vuse_optype vuses = STMT_VUSE_OPS (stmt);
4883           varray_type *datarefs = NULL;
4884           int nvuses, nv_may_defs, nv_must_defs;
4885           tree memref = NULL;
4886           tree symbl;
4887           tree scalar_type, vectype;
4888
4889           /* Assumption: there exists a data-ref in stmt, if and only if
4890              it has vuses/vdefs.  */
4891
4892           if (!vuses && !v_may_defs && !v_must_defs)
4893             continue;
4894
4895           nvuses = NUM_VUSES (vuses);
4896           nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4897           nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4898
4899           if (nvuses && (nv_may_defs || nv_must_defs))
4900             {
4901               if (vect_debug_details (NULL))
4902                 {
4903                   fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4904                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4905                 }
4906               return false;
4907             }
4908
4909           if (TREE_CODE (stmt) != MODIFY_EXPR)
4910             {
4911               if (vect_debug_details (NULL))
4912                 {
4913                   fprintf (dump_file, "unexpected vops in stmt: ");
4914                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4915                 }
4916               return false;
4917             }
4918
4919           if (vuses)
4920             {
4921               memref = TREE_OPERAND (stmt, 1);
4922               datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4923               is_read = true;
4924             }
4925           else /* vdefs */
4926             {
4927               memref = TREE_OPERAND (stmt, 0);
4928               datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4929               is_read = false;
4930             }
4931
4932           scalar_type = TREE_TYPE (memref);
4933           vectype = get_vectype_for_scalar_type (scalar_type);
4934           if (!vectype)
4935             {
4936               if (vect_debug_details (NULL))
4937                 {
4938                   fprintf (dump_file, "no vectype for stmt: ");
4939                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4940                   fprintf (dump_file, " scalar_type: ");
4941                   print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4942                 }
4943               /* It is not possible to vectorize this data reference.  */
4944               return false;
4945             }
4946           /* Analyze MEMREF. If it is of a supported form, build data_reference
4947              struct for it (DR) and find memtag for aliasing purposes.  */
4948           dr = NULL;
4949           symbl = vect_get_memtag_and_dr (memref, stmt, is_read, loop_vinfo,
4950                                           vectype, &dr);
4951           if (!symbl)
4952             {
4953               if (vect_debug_stats (loop) || vect_debug_details (loop))
4954                 {
4955                   fprintf (dump_file, "not vectorized: unhandled data ref: ");
4956                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4957                 }
4958               return false;
4959             }
4960           STMT_VINFO_MEMTAG (stmt_info) = symbl;
4961           STMT_VINFO_VECTYPE (stmt_info) = vectype;
4962           VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
4963           STMT_VINFO_DATA_REF (stmt_info) = dr;
4964         }
4965     }
4966
4967   return true;
4968 }
4969
4970
4971 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
4972
4973 /* Function vect_mark_relevant.
4974
4975    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
4976
4977 static void
4978 vect_mark_relevant (varray_type *worklist, tree stmt)
4979 {
4980   stmt_vec_info stmt_info;
4981
4982   if (vect_debug_details (NULL))
4983     fprintf (dump_file, "mark relevant.");
4984
4985   if (TREE_CODE (stmt) == PHI_NODE)
4986     {
4987       VARRAY_PUSH_TREE (*worklist, stmt);
4988       return;
4989     }
4990
4991   stmt_info = vinfo_for_stmt (stmt);
4992
4993   if (!stmt_info)
4994     {
4995       if (vect_debug_details (NULL))
4996         {
4997           fprintf (dump_file, "mark relevant: no stmt info!!.");
4998           print_generic_expr (dump_file, stmt, TDF_SLIM);
4999         }
5000       return;
5001     }
5002
5003   if (STMT_VINFO_RELEVANT_P (stmt_info))
5004     {
5005       if (vect_debug_details (NULL))
5006         fprintf (dump_file, "already marked relevant.");
5007       return;
5008     }
5009
5010   STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5011   VARRAY_PUSH_TREE (*worklist, stmt);
5012 }
5013
5014
5015 /* Function vect_stmt_relevant_p.
5016
5017    Return true if STMT in loop that is represented by LOOP_VINFO is
5018    "relevant for vectorization".
5019
5020    A stmt is considered "relevant for vectorization" if:
5021    - it has uses outside the loop.
5022    - it has vdefs (it alters memory).
5023    - control stmts in the loop (except for the exit condition).
5024
5025    CHECKME: what other side effects would the vectorizer allow?  */
5026
5027 static bool
5028 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5029 {
5030   v_may_def_optype v_may_defs;
5031   v_must_def_optype v_must_defs;
5032   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5033   int i;
5034   dataflow_t df;
5035   int num_uses;
5036
5037   /* cond stmt other than loop exit cond.  */
5038   if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5039     return true;
5040
5041   /* changing memory.  */
5042   v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5043   v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5044   if (v_may_defs || v_must_defs)
5045     {
5046       if (vect_debug_details (NULL))
5047         fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5048       return true;
5049     }
5050
5051   /* uses outside the loop.  */
5052   df = get_immediate_uses (stmt);
5053   num_uses = num_immediate_uses (df);
5054   for (i = 0; i < num_uses; i++)
5055     {
5056       tree use = immediate_use (df, i);
5057       basic_block bb = bb_for_stmt (use);
5058       if (!flow_bb_inside_loop_p (loop, bb))
5059         {
5060           if (vect_debug_details (NULL))
5061             fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5062           return true;
5063         }
5064     }
5065
5066   return false;
5067 }
5068
5069
5070 /* Function vect_mark_stmts_to_be_vectorized.
5071
5072    Not all stmts in the loop need to be vectorized. For example:
5073
5074      for i...
5075        for j...
5076    1.    T0 = i + j
5077    2.    T1 = a[T0]
5078
5079    3.    j = j + 1
5080
5081    Stmt 1 and 3 do not need to be vectorized, because loop control and
5082    addressing of vectorized data-refs are handled differently.
5083
5084    This pass detects such stmts.  */
5085
5086 static bool
5087 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5088 {
5089   varray_type worklist;
5090   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5091   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5092   unsigned int nbbs = loop->num_nodes;
5093   block_stmt_iterator si;
5094   tree stmt;
5095   stmt_ann_t ann;
5096   unsigned int i;
5097   int j;
5098   use_optype use_ops;
5099   stmt_vec_info stmt_info;
5100
5101   if (vect_debug_details (NULL))
5102     fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5103
5104   VARRAY_TREE_INIT (worklist, 64, "work list");
5105
5106   /* 1. Init worklist.  */
5107
5108   for (i = 0; i < nbbs; i++)
5109     {
5110       basic_block bb = bbs[i];
5111       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5112         {
5113           stmt = bsi_stmt (si);
5114
5115           if (vect_debug_details (NULL))
5116             {
5117               fprintf (dump_file, "init: stmt relevant? ");
5118               print_generic_expr (dump_file, stmt, TDF_SLIM);
5119             }
5120
5121           stmt_info = vinfo_for_stmt (stmt);
5122           STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5123
5124           if (vect_stmt_relevant_p (stmt, loop_vinfo))
5125             vect_mark_relevant (&worklist, stmt);
5126         }
5127     }
5128
5129
5130   /* 2. Process_worklist */
5131
5132   while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5133     {
5134       stmt = VARRAY_TOP_TREE (worklist);
5135       VARRAY_POP (worklist);
5136
5137       if (vect_debug_details (NULL))
5138         {
5139           fprintf (dump_file, "worklist: examine stmt: ");
5140           print_generic_expr (dump_file, stmt, TDF_SLIM);
5141         }
5142
5143       /* Examine the USES in this statement. Mark all the statements which
5144          feed this statement's uses as "relevant", unless the USE is used as
5145          an array index.  */
5146
5147       if (TREE_CODE (stmt) == PHI_NODE)
5148         {
5149           /* follow the def-use chain inside the loop.  */
5150           for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5151             {
5152               tree arg = PHI_ARG_DEF (stmt, j);
5153               tree def_stmt = NULL_TREE;
5154               basic_block bb;
5155               if (!vect_is_simple_use (arg, loop, &def_stmt))
5156                 {
5157                   if (vect_debug_details (NULL))
5158                     fprintf (dump_file, "worklist: unsupported use.");
5159                   varray_clear (worklist);
5160                   return false;
5161                 }
5162               if (!def_stmt)
5163                 continue;
5164
5165               if (vect_debug_details (NULL))
5166                 {
5167                   fprintf (dump_file, "worklist: def_stmt: ");
5168                   print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5169                 }
5170
5171               bb = bb_for_stmt (def_stmt);
5172               if (flow_bb_inside_loop_p (loop, bb))
5173                 vect_mark_relevant (&worklist, def_stmt);
5174             }
5175         }
5176
5177       ann = stmt_ann (stmt);
5178       use_ops = USE_OPS (ann);
5179
5180       for (i = 0; i < NUM_USES (use_ops); i++)
5181         {
5182           tree use = USE_OP (use_ops, i);
5183
5184           /* We are only interested in uses that need to be vectorized. Uses
5185              that are used for address computation are not considered relevant.
5186            */
5187           if (exist_non_indexing_operands_for_use_p (use, stmt))
5188             {
5189               tree def_stmt = NULL_TREE;
5190               basic_block bb;
5191               if (!vect_is_simple_use (use, loop, &def_stmt))
5192                 {
5193                   if (vect_debug_details (NULL))
5194                     fprintf (dump_file, "worklist: unsupported use.");
5195                   varray_clear (worklist);
5196                   return false;
5197                 }
5198
5199               if (!def_stmt)
5200                 continue;
5201
5202               if (vect_debug_details (NULL))
5203                 {
5204                   fprintf (dump_file, "worklist: examine use %d: ", i);
5205                   print_generic_expr (dump_file, use, TDF_SLIM);
5206                 }
5207
5208               bb = bb_for_stmt (def_stmt);
5209               if (flow_bb_inside_loop_p (loop, bb))
5210                 vect_mark_relevant (&worklist, def_stmt);
5211             }
5212         }
5213     }                           /* while worklist */
5214
5215   varray_clear (worklist);
5216   return true;
5217 }
5218
5219
5220 /* Function vect_can_advance_ivs_p
5221
5222    In case the number of iterations that LOOP iterates in unknown at compile
5223    time, an epilog loop will be generated, and the loop induction variables
5224    (IVs) will be "advanced" to the value they are supposed to take just before
5225    the epilog loop.  Here we check that the access function of the loop IVs
5226    and the expression that represents the loop bound are simple enough.
5227    These restrictions will be relaxed in the future.  */
5228
5229 static bool
5230 vect_can_advance_ivs_p (struct loop *loop)
5231 {
5232   basic_block bb = loop->header;
5233   tree phi;
5234
5235   /* Analyze phi functions of the loop header.  */
5236
5237   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5238     {
5239       tree access_fn = NULL;
5240       tree evolution_part;
5241
5242       if (vect_debug_details (NULL))
5243         {
5244           fprintf (dump_file, "Analyze phi: ");
5245           print_generic_expr (dump_file, phi, TDF_SLIM);
5246         }
5247
5248       /* Skip virtual phi's. The data dependences that are associated with
5249          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
5250
5251       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5252         {
5253           if (vect_debug_details (NULL))
5254             fprintf (dump_file, "virtual phi. skip.");
5255           continue;
5256         }
5257
5258       /* Analyze the evolution function.  */
5259
5260       access_fn = instantiate_parameters
5261         (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5262
5263       if (!access_fn)
5264         {
5265           if (vect_debug_details (NULL))
5266             fprintf (dump_file, "No Access function.");
5267           return false;
5268         }
5269
5270       if (vect_debug_details (NULL))
5271         {
5272           fprintf (dump_file, "Access function of PHI: ");
5273           print_generic_expr (dump_file, access_fn, TDF_SLIM);
5274         }
5275
5276       evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5277
5278       if (evolution_part == NULL_TREE)
5279         return false;
5280
5281       /* FORNOW: We do not transform initial conditions of IVs
5282          which evolution functions are a polynomial of degree >= 2.  */
5283
5284       if (tree_is_chrec (evolution_part))
5285         return false;
5286     }
5287
5288   return true;
5289 }
5290
5291
5292 /* Function vect_get_loop_niters.
5293
5294    Determine how many iterations the loop is executed.
5295    If an expression that represents the number of iterations
5296    can be constructed, place it in NUMBER_OF_ITERATIONS.
5297    Return the loop exit condition.  */
5298
5299 static tree
5300 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5301 {
5302   tree niters;
5303
5304   if (vect_debug_details (NULL))
5305     fprintf (dump_file, "\n<<get_loop_niters>>\n");
5306
5307   niters = number_of_iterations_in_loop (loop);
5308
5309   if (niters != NULL_TREE
5310       && niters != chrec_dont_know)
5311     {
5312       *number_of_iterations = niters;
5313
5314       if (vect_debug_details (NULL))
5315         {
5316           fprintf (dump_file, "==> get_loop_niters:" );
5317           print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5318         }
5319     }
5320
5321   return get_loop_exit_condition (loop);
5322 }
5323
5324
5325 /* Function vect_analyze_loop_form.
5326
5327    Verify the following restrictions (some may be relaxed in the future):
5328    - it's an inner-most loop
5329    - number of BBs = 2 (which are the loop header and the latch)
5330    - the loop has a pre-header
5331    - the loop has a single entry and exit
5332    - the loop exit condition is simple enough, and the number of iterations
5333      can be analyzed (a countable loop).  */
5334
5335 static loop_vec_info
5336 vect_analyze_loop_form (struct loop *loop)
5337 {
5338   loop_vec_info loop_vinfo;
5339   tree loop_cond;
5340   tree number_of_iterations = NULL;
5341   bool rescan = false;
5342
5343   if (vect_debug_details (loop))
5344     fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5345
5346   if (loop->inner
5347       || !loop->single_exit
5348       || loop->num_nodes != 2
5349       || EDGE_COUNT (loop->header->preds) != 2
5350       || loop->num_entries != 1)
5351     {
5352       if (vect_debug_stats (loop) || vect_debug_details (loop))
5353         {
5354           fprintf (dump_file, "not vectorized: bad loop form. ");
5355           if (loop->inner)
5356             fprintf (dump_file, "nested loop.");
5357           else if (!loop->single_exit)
5358             fprintf (dump_file, "multiple exits.");
5359           else if (loop->num_nodes != 2)
5360             fprintf (dump_file, "too many BBs in loop.");
5361           else if (EDGE_COUNT (loop->header->preds) != 2)
5362             fprintf (dump_file, "too many incoming edges.");
5363           else if (loop->num_entries != 1)
5364             fprintf (dump_file, "too many entries.");
5365         }
5366
5367       return NULL;
5368     }
5369
5370   /* We assume that the loop exit condition is at the end of the loop. i.e,
5371      that the loop is represented as a do-while (with a proper if-guard
5372      before the loop if needed), where the loop header contains all the
5373      executable statements, and the latch is empty.  */
5374   if (!empty_block_p (loop->latch))
5375     {
5376       if (vect_debug_stats (loop) || vect_debug_details (loop))
5377         fprintf (dump_file, "not vectorized: unexpectd loop form.");
5378       return NULL;
5379     }
5380
5381   /* Make sure we have a preheader basic block.  */
5382   if (!loop->pre_header)
5383     {
5384       rescan = true;
5385       loop_split_edge_with (loop_preheader_edge (loop), NULL);
5386     }
5387
5388   /* Make sure there exists a single-predecessor exit bb:  */
5389   if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5390     {
5391       rescan = true;
5392       loop_split_edge_with (loop->exit_edges[0], NULL);
5393     }
5394
5395   if (rescan)
5396     {
5397       flow_loop_scan (loop, LOOP_ALL);
5398       /* Flow loop scan does not update loop->single_exit field.  */
5399       loop->single_exit = loop->exit_edges[0];
5400     }
5401
5402   if (empty_block_p (loop->header))
5403     {
5404       if (vect_debug_stats (loop) || vect_debug_details (loop))
5405         fprintf (dump_file, "not vectorized: empty loop.");
5406       return NULL;
5407     }
5408
5409   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5410   if (!loop_cond)
5411     {
5412       if (vect_debug_stats (loop) || vect_debug_details (loop))
5413         fprintf (dump_file, "not vectorized: complicated exit condition.");
5414       return NULL;
5415     }
5416
5417   if (!number_of_iterations)
5418     {
5419       if (vect_debug_stats (loop) || vect_debug_details (loop))
5420         fprintf (dump_file,
5421                  "not vectorized: number of iterations cannot be computed.");
5422       return NULL;
5423     }
5424
5425   if (chrec_contains_undetermined (number_of_iterations))
5426     {
5427       if (vect_debug_details (NULL))
5428         fprintf (dump_file, "Infinite number of iterations.");
5429       return false;
5430     }
5431
5432   loop_vinfo = new_loop_vec_info (loop);
5433   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5434
5435   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5436     {
5437       if (vect_debug_details (loop))
5438         {
5439           fprintf (dump_file, "loop bound unknown.\n");
5440           fprintf (dump_file, "Symbolic number of iterations is ");
5441           print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5442         }
5443     }
5444   else
5445   if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5446     {
5447       if (vect_debug_stats (loop) || vect_debug_details (loop))
5448         fprintf (dump_file, "not vectorized: number of iterations = 0.");
5449       return NULL;
5450     }
5451
5452   LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5453
5454   return loop_vinfo;
5455 }
5456
5457
5458 /* Function vect_analyze_loop.
5459
5460    Apply a set of analyses on LOOP, and create a loop_vec_info struct
5461    for it. The different analyses will record information in the
5462    loop_vec_info struct.  */
5463
5464 static loop_vec_info
5465 vect_analyze_loop (struct loop *loop)
5466 {
5467   bool ok;
5468   loop_vec_info loop_vinfo;
5469
5470   if (vect_debug_details (NULL))
5471     fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5472
5473   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
5474
5475   loop_vinfo = vect_analyze_loop_form (loop);
5476   if (!loop_vinfo)
5477     {
5478       if (vect_debug_details (loop))
5479         fprintf (dump_file, "bad loop form.");
5480       return NULL;
5481     }
5482
5483   /* Find all data references in the loop (which correspond to vdefs/vuses)
5484      and analyze their evolution in the loop.
5485
5486      FORNOW: Handle only simple, array references, which
5487      alignment can be forced, and aligned pointer-references.  */
5488
5489   ok = vect_analyze_data_refs (loop_vinfo);
5490   if (!ok)
5491     {
5492       if (vect_debug_details (loop))
5493         fprintf (dump_file, "bad data references.");
5494       destroy_loop_vec_info (loop_vinfo);
5495       return NULL;
5496     }
5497
5498   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
5499
5500   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5501   if (!ok)
5502     {
5503       if (vect_debug_details (loop))
5504         fprintf (dump_file, "unexpected pattern.");
5505       if (vect_debug_details (loop))
5506         fprintf (dump_file, "not vectorized: unexpected pattern.");
5507       destroy_loop_vec_info (loop_vinfo);
5508       return NULL;
5509     }
5510
5511   /* Check that all cross-iteration scalar data-flow cycles are OK.
5512      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
5513
5514   ok = vect_analyze_scalar_cycles (loop_vinfo);
5515   if (!ok)
5516     {
5517       if (vect_debug_details (loop))
5518         fprintf (dump_file, "bad scalar cycle.");
5519       destroy_loop_vec_info (loop_vinfo);
5520       return NULL;
5521     }
5522
5523   /* Analyze data dependences between the data-refs in the loop.
5524      FORNOW: fail at the first data dependence that we encounter.  */
5525
5526   ok = vect_analyze_data_ref_dependences (loop_vinfo);
5527   if (!ok)
5528     {
5529       if (vect_debug_details (loop))
5530         fprintf (dump_file, "bad data dependence.");
5531       destroy_loop_vec_info (loop_vinfo);
5532       return NULL;
5533     }
5534
5535   /* Analyze the access patterns of the data-refs in the loop (consecutive,
5536      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
5537
5538   ok = vect_analyze_data_ref_accesses (loop_vinfo);
5539   if (!ok)
5540     {
5541       if (vect_debug_details (loop))
5542         fprintf (dump_file, "bad data access.");
5543       destroy_loop_vec_info (loop_vinfo);
5544       return NULL;
5545     }
5546
5547   /* Analyze the alignment of the data-refs in the loop.
5548      FORNOW: Only aligned accesses are handled.  */
5549
5550   ok = vect_analyze_data_refs_alignment (loop_vinfo);
5551   if (!ok)
5552     {
5553       if (vect_debug_details (loop))
5554         fprintf (dump_file, "bad data alignment.");
5555       destroy_loop_vec_info (loop_vinfo);
5556       return NULL;
5557     }
5558
5559   /* Scan all the operations in the loop and make sure they are
5560      vectorizable.  */
5561
5562   ok = vect_analyze_operations (loop_vinfo);
5563   if (!ok)
5564     {
5565       if (vect_debug_details (loop))
5566         fprintf (dump_file, "bad operation or unsupported loop bound.");
5567       destroy_loop_vec_info (loop_vinfo);
5568       return NULL;
5569     }
5570
5571   LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5572
5573   return loop_vinfo;
5574 }
5575
5576
5577 /* Function need_imm_uses_for.
5578
5579    Return whether we ought to include information for 'var'
5580    when calculating immediate uses.  For this pass we only want use
5581    information for non-virtual variables.  */
5582
5583 static bool
5584 need_imm_uses_for (tree var)
5585 {
5586   return is_gimple_reg (var);
5587 }
5588
5589
5590 /* Function vectorize_loops.
5591
5592    Entry Point to loop vectorization phase.  */
5593
5594 void
5595 vectorize_loops (struct loops *loops)
5596 {
5597   unsigned int i, loops_num;
5598   unsigned int num_vectorized_loops = 0;
5599
5600   /* Does the target support SIMD?  */
5601   /* FORNOW: until more sophisticated machine modelling is in place.  */
5602   if (!UNITS_PER_SIMD_WORD)
5603     {
5604       if (vect_debug_details (NULL))
5605         fprintf (dump_file, "vectorizer: target vector size is not defined.");
5606       return;
5607     }
5608
5609 #ifdef ENABLE_CHECKING
5610   verify_loop_closed_ssa ();
5611 #endif
5612
5613   compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5614
5615   /*  ----------- Analyze loops. -----------  */
5616
5617   /* If some loop was duplicated, it gets bigger number
5618      than all previously defined loops. This fact allows us to run
5619      only over initial loops skipping newly generated ones.  */
5620   loops_num = loops->num;
5621   for (i = 1; i < loops_num; i++)
5622     {
5623       loop_vec_info loop_vinfo;
5624       struct loop *loop = loops->parray[i];
5625
5626       if (!loop)
5627         continue;
5628
5629       loop_vinfo = vect_analyze_loop (loop);
5630       loop->aux = loop_vinfo;
5631
5632       if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5633         continue;
5634
5635       vect_transform_loop (loop_vinfo, loops);
5636       num_vectorized_loops++;
5637     }
5638
5639   if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5640     fprintf (dump_file, "\nvectorized %u loops in function.\n",
5641              num_vectorized_loops);
5642
5643   /*  ----------- Finalize. -----------  */
5644
5645   free_df ();
5646   for (i = 1; i < loops_num; i++)
5647     {
5648       struct loop *loop = loops->parray[i];
5649       loop_vec_info loop_vinfo;
5650
5651       if (!loop)
5652         continue;
5653       loop_vinfo = loop->aux;
5654       destroy_loop_vec_info (loop_vinfo);
5655       loop->aux = NULL;
5656     }
5657
5658   rewrite_into_ssa (false);
5659   rewrite_into_loop_closed_ssa (); /* FORNOW */
5660   bitmap_clear (vars_to_rename);
5661 }