gcc/tree-vectorizer.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20 02111-1307, USA.  */
  21
  22 /* Loop Vectorization Pass.
  23
  24    This pass tries to vectorize loops. This first implementation focuses on
  25    simple inner-most loops, with no conditional control flow, and a set of
  26    simple operations which vector form can be expressed using existing
  27    tree codes (PLUS, MULT etc).
  28
  29    For example, the vectorizer transforms the following simple loop:
  30
  31         short a[N]; short b[N]; short c[N]; int i;
  32
  33         for (i=0; i<N; i++){
  34           a[i] = b[i] + c[i];
  35         }
  36
  37    as if it was manually vectorized by rewriting the source code into:
  38
  39         typedef int __attribute__((mode(V8HI))) v8hi;
  40         short a[N];  short b[N]; short c[N];   int i;
  41         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  42         v8hi va, vb, vc;
  43
  44         for (i=0; i<N/8; i++){
  45           vb = pb[i];
  46           vc = pc[i];
  47           va = vb + vc;
  48           pa[i] = va;
  49         }
  50
  51         The main entry to this pass is vectorize_loops(), in which
  52    the vectorizer applies a set of analyses on a given set of loops,
  53    followed by the actual vectorization transformation for the loops that
  54    had successfully passed the analysis phase.
  55
  56         Throughout this pass we make a distinction between two types of
  57    data: scalars (which are represented by SSA_NAMES), and memory references
  58    ("data-refs"). These two types of data require different handling both
  59    during analysis and transformation. The types of data-refs that the
  60    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  61    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  62    accesses are required to have a  simple (consecutive) access pattern.
  63
  64    Analysis phase:
  65    ===============
  66         The driver for the analysis phase is vect_analyze_loop_nest().
  67    It applies a set of analyses, some of which rely on the scalar evolution
  68    analyzer (scev) developed by Sebastian Pop.
  69
  70         During the analysis phase the vectorizer records some information
  71    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  72    loop, as well as general information about the loop as a whole, which is
  73    recorded in a "loop_vec_info" struct attached to each loop.
  74
  75    Transformation phase:
  76    =====================
  77         The loop transformation phase scans all the stmts in the loop, and
  78    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  79    the loop that needs to be vectorized. It insert the vector code sequence
  80    just before the scalar stmt S, and records a pointer to the vector code
  81    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
  82    attached to S). This pointer will be used for the vectorization of following
  83    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
  84    otherwise, we rely on dead code elimination for removing it.
  85
  86         For example, say stmt S1 was vectorized into stmt VS1:
  87
  88    VS1: vb = px[i];
  89    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  90    S2:  a = b;
  91
  92    To vectorize stmt S2, the vectorizer first finds the stmt that defines
  93    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
  94    vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
  95    resulting sequence would be:
  96
  97    VS1: vb = px[i];
  98    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  99    VS2: va = vb;
 100    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 101
 102         Operands that are not SSA_NAMEs, are data-refs that appear in
 103    load/store operations (like 'x[i]' in S1), and are handled differently.
 104
 105    Target modeling:
 106    =================
 107         Currently the only target specific information that is used is the
 108    size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
 109    support different sizes of vectors, for now will need to specify one value
 110    for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
 111
 112         Since we only vectorize operations which vector form can be
 113    expressed using existing tree codes, to verify that an operation is
 114    supported, the vectorizer checks the relevant optab at the relevant
 115    machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
 116    the value found is CODE_FOR_nothing, then there's no target support, and
 117    we can't vectorize the stmt.
 118
 119    For additional information on this project see:
 120    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 121 */
 122
 123 #include "config.h"
 124 #include "system.h"
 125 #include "coretypes.h"
 126 #include "tm.h"
 127 #include "errors.h"
 128 #include "ggc.h"
 129 #include "tree.h"
 130 #include "target.h"
 131
 132 #include "rtl.h"
 133 #include "basic-block.h"
 134 #include "diagnostic.h"
 135 #include "tree-flow.h"
 136 #include "tree-dump.h"
 137 #include "timevar.h"
 138 #include "cfgloop.h"
 139 #include "cfglayout.h"
 140 #include "expr.h"
 141 #include "optabs.h"
 142 #include "toplev.h"
 143 #include "tree-chrec.h"
 144 #include "tree-data-ref.h"
 145 #include "tree-scalar-evolution.h"
 146 #include "tree-vectorizer.h"
 147 #include "tree-pass.h"
 148 #include "langhooks.h"
 149
 150
 151 /*************************************************************************
 152   Simple Loop Peeling Utilities
 153  *************************************************************************/
 154
 155 /* Entry point for peeling of simple loops.
 156    Peel the first/last iterations of a loop.
 157    It can be used outside of the vectorizer for loops that are simple enough
 158    (see function documentation).  In the vectorizer it is used to peel the
 159    last few iterations when the loop bound is unknown or does not evenly
 160    divide by the vectorization factor, and to peel the first few iterations
 161    to force the alignment of data references in the loop.  */
 162 struct loop *slpeel_tree_peel_loop_to_edge
 163   (struct loop *, struct loops *, edge, tree, tree, bool);
 164 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
 165   (struct loop *, struct loops *, edge);
 166 static void slpeel_update_phis_for_duplicate_loop
 167   (struct loop *, struct loop *, bool after);
 168 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
 169 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
 170 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
 171 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
 172 static void allocate_new_names (bitmap);
 173 static void rename_use_op (use_operand_p);
 174 static void rename_def_op (def_operand_p, tree);
 175 static void rename_variables_in_bb (basic_block);
 176 static void free_new_names (bitmap);
 177 static void rename_variables_in_loop (struct loop *);
 178 #ifdef ENABLE_CHECKING
 179 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
 180 #endif
 181
 182
 183 /*************************************************************************
 184   Vectorization Utilities.
 185  *************************************************************************/
 186
 187 /* Main analysis functions.  */
 188 static loop_vec_info vect_analyze_loop (struct loop *);
 189 static loop_vec_info vect_analyze_loop_form (struct loop *);
 190 static bool vect_analyze_data_refs (loop_vec_info);
 191 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
 192 static bool vect_analyze_scalar_cycles (loop_vec_info);
 193 static bool vect_analyze_data_ref_accesses (loop_vec_info);
 194 static bool vect_analyze_data_refs_alignment (loop_vec_info);
 195 static bool vect_compute_data_refs_alignment (loop_vec_info);
 196 static bool vect_analyze_operations (loop_vec_info);
 197
 198 /* Main code transformation functions.  */
 199 static void vect_transform_loop (loop_vec_info, struct loops *);
 200 static bool vect_transform_stmt (tree, block_stmt_iterator *);
 201 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
 202 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
 203 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
 204 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
 205 static enum dr_alignment_support vect_supportable_dr_alignment
 206   (struct data_reference *);
 207 static void vect_align_data_ref (tree);
 208 static void vect_enhance_data_refs_alignment (loop_vec_info);
 209
 210 /* Utility functions for the analyses.  */
 211 static bool vect_is_simple_use (tree , struct loop *, tree *);
 212 static bool exist_non_indexing_operands_for_use_p (tree, tree);
 213 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
 214 static void vect_mark_relevant (varray_type *, tree);
 215 static bool vect_stmt_relevant_p (tree, loop_vec_info);
 216 static tree vect_get_loop_niters (struct loop *, tree *);
 217 static bool vect_compute_data_ref_alignment (struct data_reference *);
 218 static bool vect_analyze_data_ref_access (struct data_reference *);
 219 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
 220 static struct data_reference * vect_analyze_pointer_ref_access
 221   (tree, tree, bool);
 222 static bool vect_can_advance_ivs_p (struct loop *);
 223 static tree vect_get_base_and_offset (struct data_reference *, tree, tree,
 224                                       loop_vec_info, tree *, tree *, tree *,
 225                                       bool*);
 226 static struct data_reference * vect_analyze_pointer_ref_access
 227   (tree, tree, bool);
 228 static tree vect_get_ptr_offset (tree, tree, tree *);
 229 static tree vect_get_memtag_and_dr
 230   (tree, tree, bool, loop_vec_info, tree, struct data_reference **);
 231 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
 232                                       tree *, tree *);
 233 static tree vect_strip_conversion (tree);
 234
 235 /* Utility functions for the code transformation.  */
 236 static tree vect_create_destination_var (tree, tree);
 237 static tree vect_create_data_ref_ptr
 238   (tree, block_stmt_iterator *, tree, tree *, bool);
 239 static tree vect_create_index_for_vector_ref
 240   (struct loop *, block_stmt_iterator *);
 241 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
 242 static tree get_vectype_for_scalar_type (tree);
 243 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
 244 static tree vect_get_vec_def_for_operand (tree, tree);
 245 static tree vect_init_vector (tree, tree);
 246 static void vect_finish_stmt_generation
 247   (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
 248
 249 /* Utility function dealing with loop peeling (not peeling itself).  */
 250 static void vect_generate_tmps_on_preheader
 251   (loop_vec_info, tree *, tree *, tree *);
 252 static tree vect_build_loop_niters (loop_vec_info);
 253 static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge);
 254 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
 255 static void vect_update_inits_of_dr (struct data_reference *, tree niters);
 256 static void vect_update_inits_of_drs (loop_vec_info, tree);
 257 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
 258 static void vect_do_peeling_for_loop_bound
 259   (loop_vec_info, tree *, struct loops *);
 260
 261 /* Utilities for creation and deletion of vec_info structs.  */
 262 loop_vec_info new_loop_vec_info (struct loop *loop);
 263 void destroy_loop_vec_info (loop_vec_info);
 264 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
 265
 266 static bool vect_debug_stats (struct loop *loop);
 267 static bool vect_debug_details (struct loop *loop);
 268
 269 \f
 270 /*************************************************************************
 271   Simple Loop Peeling Utilities
 272
 273   Utilities to support loop peeling for vectorization purposes.
 274  *************************************************************************/
 275
 276
 277 /* For each definition in DEFINITIONS this function allocates
 278    new ssa name.  */
 279
 280 static void
 281 allocate_new_names (bitmap definitions)
 282 {
 283   unsigned ver;
 284   bitmap_iterator bi;
 285
 286   EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
 287     {
 288       tree def = ssa_name (ver);
 289       tree *new_name_ptr = xmalloc (sizeof (tree));
 290
 291       bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
 292
 293       *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
 294       SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
 295
 296       SSA_NAME_AUX (def) = new_name_ptr;
 297     }
 298 }
 299
 300
 301 /* Renames the use *OP_P.  */
 302
 303 static void
 304 rename_use_op (use_operand_p op_p)
 305 {
 306   tree *new_name_ptr;
 307
 308   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
 309     return;
 310
 311   new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
 312
 313   /* Something defined outside of the loop.  */
 314   if (!new_name_ptr)
 315     return;
 316
 317   /* An ordinary ssa name defined in the loop.  */
 318
 319   SET_USE (op_p, *new_name_ptr);
 320 }
 321
 322
 323 /* Renames the def *OP_P in statement STMT.  */
 324
 325 static void
 326 rename_def_op (def_operand_p op_p, tree stmt)
 327 {
 328   tree *new_name_ptr;
 329
 330   if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
 331     return;
 332
 333   new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
 334
 335   /* Something defined outside of the loop.  */
 336   if (!new_name_ptr)
 337     return;
 338
 339   /* An ordinary ssa name defined in the loop.  */
 340
 341   SET_DEF (op_p, *new_name_ptr);
 342   SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
 343 }
 344
 345
 346 /* Renames the variables in basic block BB.  */
 347
 348 static void
 349 rename_variables_in_bb (basic_block bb)
 350 {
 351   tree phi;
 352   block_stmt_iterator bsi;
 353   tree stmt;
 354   stmt_ann_t ann;
 355   use_optype uses;
 356   vuse_optype vuses;
 357   def_optype defs;
 358   v_may_def_optype v_may_defs;
 359   v_must_def_optype v_must_defs;
 360   unsigned i;
 361   edge e;
 362   edge_iterator ei;
 363   struct loop *loop = bb->loop_father;
 364
 365   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
 366     rename_def_op (PHI_RESULT_PTR (phi), phi);
 367
 368   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 369     {
 370       stmt = bsi_stmt (bsi);
 371       get_stmt_operands (stmt);
 372       ann = stmt_ann (stmt);
 373
 374       uses = USE_OPS (ann);
 375       for (i = 0; i < NUM_USES (uses); i++)
 376         rename_use_op (USE_OP_PTR (uses, i));
 377
 378       defs = DEF_OPS (ann);
 379       for (i = 0; i < NUM_DEFS (defs); i++)
 380         rename_def_op (DEF_OP_PTR (defs, i), stmt);
 381
 382       vuses = VUSE_OPS (ann);
 383       for (i = 0; i < NUM_VUSES (vuses); i++)
 384         rename_use_op (VUSE_OP_PTR (vuses, i));
 385
 386       v_may_defs = V_MAY_DEF_OPS (ann);
 387       for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
 388         {
 389           rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
 390           rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
 391         }
 392
 393       v_must_defs = V_MUST_DEF_OPS (ann);
 394       for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
 395         {
 396           rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
 397           rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
 398         }
 399     }
 400
 401   FOR_EACH_EDGE (e, ei, bb->succs)
 402     {
 403       if (!flow_bb_inside_loop_p (loop, e->dest))
 404         continue;
 405       for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
 406         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
 407     }
 408 }
 409
 410
 411 /* Releases the structures holding the new ssa names.  */
 412
 413 static void
 414 free_new_names (bitmap definitions)
 415 {
 416   unsigned ver;
 417   bitmap_iterator bi;
 418
 419   EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
 420     {
 421       tree def = ssa_name (ver);
 422
 423       if (SSA_NAME_AUX (def))
 424         {
 425           free (SSA_NAME_AUX (def));
 426           SSA_NAME_AUX (def) = NULL;
 427         }
 428     }
 429 }
 430
 431
 432 /* Renames variables in new generated LOOP.  */
 433
 434 static void
 435 rename_variables_in_loop (struct loop *loop)
 436 {
 437   unsigned i;
 438   basic_block *bbs;
 439
 440   bbs = get_loop_body (loop);
 441
 442   for (i = 0; i < loop->num_nodes; i++)
 443     rename_variables_in_bb (bbs[i]);
 444
 445   free (bbs);
 446 }
 447
 448
 449 /* Update the PHI nodes of NEW_LOOP.
 450
 451    NEW_LOOP is a duplicate of ORIG_LOOP.
 452    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 453    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 454    executes before it.  */
 455
 456 static void
 457 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 458                                        struct loop *new_loop, bool after)
 459 {
 460   tree *new_name_ptr, new_ssa_name;
 461   tree phi_new, phi_orig;
 462   tree def;
 463   edge orig_loop_latch = loop_latch_edge (orig_loop);
 464   edge orig_entry_e = loop_preheader_edge (orig_loop);
 465   edge new_loop_exit_e = new_loop->exit_edges[0];
 466   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 467   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 468
 469   /*
 470      step 1. For each loop-header-phi:
 471              Add the first phi argument for the phi in NEW_LOOP
 472             (the one associated with the entry of NEW_LOOP)
 473
 474      step 2. For each loop-header-phi:
 475              Add the second phi argument for the phi in NEW_LOOP
 476             (the one associated with the latch of NEW_LOOP)
 477
 478      step 3. Update the phis in the successor block of NEW_LOOP.
 479
 480         case 1: NEW_LOOP was placed before ORIG_LOOP:
 481                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 482                 Updating the phis in the successor block can therefore be done
 483                 along with the scanning of the loop header phis, because the
 484                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 485                 phi nodes, organized in the same order.
 486
 487         case 2: NEW_LOOP was placed after ORIG_LOOP:
 488                 The successor block of NEW_LOOP is the original exit block of
 489                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 490                 We postpone updating these phis to a later stage (when
 491                 loop guards are added).
 492    */
 493
 494
 495   /* Scan the phis in the headers of the old and new loops
 496      (they are organized in exactly the same order).  */
 497
 498   for (phi_new = phi_nodes (new_loop->header),
 499        phi_orig = phi_nodes (orig_loop->header);
 500        phi_new && phi_orig;
 501        phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
 502     {
 503       /* step 1.  */
 504       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 505       add_phi_arg (phi_new, def, new_loop_entry_e);
 506
 507       /* step 2.  */
 508       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 509       if (TREE_CODE (def) != SSA_NAME)
 510         continue;
 511
 512       new_name_ptr = SSA_NAME_AUX (def);
 513       if (!new_name_ptr)
 514         /* Something defined outside of the loop.  */
 515         continue;
 516
 517       /* An ordinary ssa name defined in the loop.  */
 518       new_ssa_name = *new_name_ptr;
 519       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
 520
 521       /* step 3 (case 1).  */
 522       if (!after)
 523         {
 524           gcc_assert (new_loop_exit_e == orig_entry_e);
 525           SET_PHI_ARG_DEF (phi_orig,
 526                            new_loop_exit_e->dest_idx,
 527                            new_ssa_name);
 528         }
 529     }
 530 }
 531
 532
 533 /* Update PHI nodes for a guard of the LOOP.
 534
 535    Input:
 536    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 537         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 538         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 539         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 540         We denote this bb NEW_MERGE_BB because it had a single predecessor (the
 541         LOOP header) before the guard code was added, and now it became a merge
 542         point of two paths - the path that ends with the LOOP exit-edge, and
 543         the path that ends with GUARD_EDGE.
 544
 545         This function creates and updates the relevant phi nodes to account for
 546         the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
 547         1. Create phi nodes at NEW_MERGE_BB.
 548         2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 549            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 550            was added:
 551
 552         ===> The CFG before the guard-code was added:
 553         LOOP_header_bb:
 554           if (exit_loop) goto update_bb : LOOP_header_bb
 555         update_bb:
 556
 557         ==> The CFG after the guard-code was added:
 558         guard_bb:
 559           if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
 560         LOOP_header_bb:
 561           if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
 562         new_merge_bb:
 563           goto update_bb
 564         update_bb:
 565
 566    - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
 567         UPDATE_BB are loop entry phis, like the phis in the LOOP header,
 568         organized in the same order.
 569         If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
 570         loop exit phis.
 571
 572    - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
 573         "original" loop).  FALSE if LOOP is an original loop (not a newly
 574         created copy).  The SSA_NAME_AUX fields of the defs in the original
 575         loop are the corresponding new ssa-names used in the new duplicated
 576         loop copy.  IS_NEW_LOOP indicates which of the two args of the phi
 577         nodes in UPDATE_BB takes the original ssa-name, and which takes the
 578         new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
 579         the LOOP-exit-edge takes the new-name, and the phi-arg that is
 580         associated with GUARD_EDGE takes the original name.  If IS_NEW_LOOP is
 581         FALSE, it's the other way around.
 582   */
 583
 584 static void
 585 slpeel_update_phi_nodes_for_guard (edge guard_edge,
 586                                    struct loop *loop,
 587                                    bool entry_phis,
 588                                    bool is_new_loop)
 589 {
 590   tree orig_phi, new_phi, update_phi;
 591   tree guard_arg, loop_arg;
 592   basic_block new_merge_bb = guard_edge->dest;
 593   edge e = EDGE_SUCC (new_merge_bb, 0);
 594   basic_block update_bb = e->dest;
 595   basic_block orig_bb = (entry_phis ? loop->header : update_bb);
 596
 597   for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
 598        orig_phi && update_phi;
 599        orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
 600     {
 601       /* 1. Generate new phi node in NEW_MERGE_BB:  */
 602       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 603                                  new_merge_bb);
 604
 605       /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 606             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 607       if (entry_phis)
 608         {
 609           loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
 610                                             EDGE_SUCC (loop->latch, 0));
 611           guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
 612         }
 613       else /* exit phis */
 614         {
 615           tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 616           tree *new_name_ptr = SSA_NAME_AUX (orig_def);
 617           tree new_name;
 618
 619           if (new_name_ptr)
 620             new_name = *new_name_ptr;
 621           else
 622             /* Something defined outside of the loop  */
 623             new_name = orig_def;
 624
 625           if (is_new_loop)
 626             {
 627               guard_arg = orig_def;
 628               loop_arg = new_name;
 629             }
 630           else
 631             {
 632               guard_arg = new_name;
 633               loop_arg = orig_def;
 634             }
 635         }
 636       add_phi_arg (new_phi, loop_arg, loop->exit_edges[0]);
 637       add_phi_arg (new_phi, guard_arg, guard_edge);
 638
 639       /* 3. Update phi in successor block.  */
 640       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 641                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 642       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 643     }
 644
 645   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 646 }
 647
 648
 649 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 650    that starts at zero, increases by one and its limit is NITERS.
 651
 652    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 653
 654 static void
 655 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 656 {
 657   tree indx_before_incr, indx_after_incr, cond_stmt, cond;
 658   tree orig_cond;
 659   edge exit_edge = loop->exit_edges[0];
 660   block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
 661   tree begin_label = tree_block_label (loop->latch);
 662   tree exit_label = tree_block_label (loop->single_exit->dest);
 663   tree init = build_int_cst (TREE_TYPE (niters), 0);
 664   tree step = build_int_cst (TREE_TYPE (niters), 1);
 665   tree then_label;
 666   tree else_label;
 667
 668   orig_cond = get_loop_exit_condition (loop);
 669   gcc_assert (orig_cond);
 670   create_iv (init, step, NULL_TREE, loop,
 671              &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
 672
 673   /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
 674      back to the exit condition statement.  */
 675   bsi_next (&loop_exit_bsi);
 676   gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
 677
 678   if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
 679     {
 680       cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
 681       then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 682       else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 683     }
 684   else /* 'then' edge loops back.  */
 685     {
 686       cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
 687       then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 688       else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 689     }
 690
 691   cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
 692                      then_label, else_label);
 693   bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
 694
 695   /* Remove old loop exit test:  */
 696   bsi_remove (&loop_exit_bsi);
 697
 698   if (vect_debug_stats (loop) || vect_debug_details (loop))
 699     print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
 700
 701   loop->nb_iterations = niters;
 702 }
 703
 704
 705 /* Given LOOP this function generates a new copy of it and puts it
 706    on E which is either the entry or exit of LOOP.  */
 707
 708 static struct loop *
 709 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
 710                                         edge e)
 711 {
 712   struct loop *new_loop;
 713   basic_block *new_bbs, *bbs;
 714   bool at_exit;
 715   bool was_imm_dom;
 716   basic_block exit_dest;
 717   tree phi, phi_arg;
 718
 719   at_exit = (e == loop->exit_edges[0]);
 720   if (!at_exit && e != loop_preheader_edge (loop))
 721     {
 722       if (dump_file && (dump_flags & TDF_DETAILS))
 723           fprintf (dump_file, "Edge is not an entry nor an exit edge.\n");
 724       return NULL;
 725     }
 726
 727   bbs = get_loop_body (loop);
 728
 729   /* Check whether duplication is possible.  */
 730   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 731     {
 732       if (vect_debug_stats (loop) || vect_debug_details (loop))
 733           fprintf (dump_file, "Cannot copy basic blocks.\n");
 734       free (bbs);
 735       return NULL;
 736     }
 737
 738   /* Generate new loop structure.  */
 739   new_loop = duplicate_loop (loops, loop, loop->outer);
 740   if (!new_loop)
 741     {
 742       if (vect_debug_stats (loop) || vect_debug_details (loop))
 743           fprintf (dump_file, "duplicate_loop returns NULL.\n");
 744       free (bbs);
 745       return NULL;
 746     }
 747
 748   exit_dest = loop->exit_edges[0]->dest;
 749   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 750                                           exit_dest) == loop->header ?
 751                  true : false);
 752
 753   new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
 754
 755   copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
 756
 757   /* Duplicating phi args at exit bbs as coming
 758      also from exit of duplicated loop.  */
 759   for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
 760     {
 761       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
 762       if (phi_arg)
 763         {
 764           edge new_loop_exit_edge;
 765
 766           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 767             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 768           else
 769             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 770
 771           add_phi_arg (phi, phi_arg, new_loop_exit_edge);
 772         }
 773     }
 774
 775   if (at_exit) /* Add the loop copy at exit.  */
 776     {
 777       redirect_edge_and_branch_force (e, new_loop->header);
 778       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 779       if (was_imm_dom)
 780         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 781     }
 782   else /* Add the copy at entry.  */
 783     {
 784       edge new_exit_e;
 785       edge entry_e = loop_preheader_edge (loop);
 786       basic_block preheader = entry_e->src;
 787
 788       if (!flow_bb_inside_loop_p (new_loop,
 789                                   EDGE_SUCC (new_loop->header, 0)->dest))
 790         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 791       else
 792         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 793
 794       redirect_edge_and_branch_force (new_exit_e, loop->header);
 795       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 796                                new_exit_e->src);
 797
 798       /* We have to add phi args to the loop->header here as coming
 799          from new_exit_e edge.  */
 800       for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 801         {
 802           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 803           if (phi_arg)
 804             add_phi_arg (phi, phi_arg, new_exit_e);
 805         }
 806
 807       redirect_edge_and_branch_force (entry_e, new_loop->header);
 808       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 809     }
 810
 811   flow_loop_scan (new_loop, LOOP_ALL);
 812   flow_loop_scan (loop, LOOP_ALL);
 813   free (new_bbs);
 814   free (bbs);
 815
 816   return new_loop;
 817 }
 818
 819
 820 /* Given the condition statement COND, put it as the last statement
 821    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 822    Assumes that this is the single exit of the guarded loop.
 823    Returns the skip edge.  */
 824
 825 static edge
 826 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
 827                         basic_block dom_bb)
 828 {
 829   block_stmt_iterator bsi;
 830   edge new_e, enter_e;
 831   tree cond_stmt, then_label, else_label;
 832
 833   enter_e = EDGE_SUCC (guard_bb, 0);
 834   enter_e->flags &= ~EDGE_FALLTHRU;
 835   enter_e->flags |= EDGE_FALSE_VALUE;
 836   bsi = bsi_last (guard_bb);
 837
 838   then_label = build1 (GOTO_EXPR, void_type_node,
 839                        tree_block_label (exit_bb));
 840   else_label = build1 (GOTO_EXPR, void_type_node,
 841                        tree_block_label (enter_e->dest));
 842   cond_stmt = build3 (COND_EXPR, void_type_node, cond,
 843                      then_label, else_label);
 844   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
 845   /* Add new edge to connect entry block to the second loop.  */
 846   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 847   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 848   return new_e;
 849 }
 850
 851
 852 /* This function verifies that the following restrictions apply to LOOP:
 853    (1) it is innermost
 854    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 855    (3) it is single entry, single exit
 856    (4) its exit condition is the last stmt in the header
 857    (5) E is the entry/exit edge of LOOP.
 858  */
 859
 860 static bool
 861 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
 862 {
 863   edge exit_e = loop->exit_edges [0];
 864   edge entry_e = loop_preheader_edge (loop);
 865   tree orig_cond = get_loop_exit_condition (loop);
 866   block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
 867
 868   if (any_marked_for_rewrite_p ())
 869     return false;
 870
 871   if (loop->inner
 872       /* All loops have an outer scope; the only case loop->outer is NULL is for
 873          the function itself.  */
 874       || !loop->outer
 875       || loop->num_nodes != 2
 876       || !empty_block_p (loop->latch)
 877       || loop->num_exits != 1
 878       || loop->num_entries != 1
 879       /* Verify that new loop exit condition can be trivially modified.  */
 880       || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
 881       || (e != exit_e && e != entry_e))
 882     return false;
 883
 884   return true;
 885 }
 886
 887 #ifdef ENABLE_CHECKING
 888 static void
 889 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 890                                  struct loop *second_loop)
 891 {
 892   basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
 893   basic_block loop2_entry_bb = second_loop->pre_header;
 894   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
 895
 896   /* A guard that controls whether the second_loop is to be executed or skipped
 897      is placed in first_loop->exit.  first_loopt->exit therefore has two
 898      successors - one is the preheader of second_loop, and the other is a bb
 899      after second_loop.
 900    */
 901   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
 902
 903
 904   /* 1. Verify that one of the successors of first_loopt->exit is the preheader
 905         of second_loop.  */
 906
 907   /* The preheader of new_loop is expected to have two predessors:
 908      first_loop->exit and the block that precedes first_loop.  */
 909
 910   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
 911               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
 912                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
 913                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
 914                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
 915
 916   /* Verify that the other successor of first_loopt->exit is after the
 917      second_loop.  */
 918   /* TODO */
 919 }
 920 #endif
 921
 922 /* Function slpeel_tree_peel_loop_to_edge.
 923
 924    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
 925    that is placed on the entry (exit) edge E of LOOP. After this transformation
 926    we have two loops one after the other - first-loop iterates FIRST_NITERS
 927    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
 928
 929    Input:
 930    - LOOP: the loop to be peeled.
 931    - E: the exit or entry edge of LOOP.
 932         If it is the entry edge, we peel the first iterations of LOOP. In this
 933         case first-loop is LOOP, and second-loop is the newly created loop.
 934         If it is the exit edge, we peel the last iterations of LOOP. In this
 935         case, first-loop is the newly created loop, and second-loop is LOOP.
 936    - NITERS: the number of iterations that LOOP iterates.
 937    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
 938    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
 939         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
 940         is false, the caller of this function may want to take care of this
 941         (this can be useful if we don't want new stmts added to first-loop).
 942
 943    Output:
 944    The function returns a pointer to the new loop-copy, or NULL if it failed
 945    to perform the transformation.
 946
 947    The function generates two if-then-else guards: one before the first loop,
 948    and the other before the second loop:
 949    The first guard is:
 950      if (FIRST_NITERS == 0) then skip the first loop,
 951      and go directly to the second loop.
 952    The second guard is:
 953      if (FIRST_NITERS == NITERS) then skip the second loop.
 954
 955    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
 956    FORNOW the resulting code will not be in loop-closed-ssa form.
 957 */
 958
 959 struct loop*
 960 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
 961                                edge e, tree first_niters,
 962                                tree niters, bool update_first_loop_count)
 963 {
 964   struct loop *new_loop = NULL, *first_loop, *second_loop;
 965   edge skip_e;
 966   tree pre_condition;
 967   bitmap definitions;
 968   basic_block bb_before_second_loop, bb_after_second_loop;
 969   basic_block bb_before_first_loop;
 970   basic_block bb_between_loops;
 971   edge exit_e = loop->exit_edges [0];
 972
 973   if (!slpeel_can_duplicate_loop_p (loop, e))
 974     return NULL;
 975
 976   /* We have to initialize cfg_hooks. Then, when calling
 977    cfg_hooks->split_edge, the function tree_split_edge
 978    is actually called and, when calling cfg_hooks->duplicate_block,
 979    the function tree_duplicate_bb is called.  */
 980   tree_register_cfg_hooks ();
 981
 982
 983   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
 984         Resulting CFG would be:
 985
 986         first_loop:
 987         do {
 988         } while ...
 989
 990         second_loop:
 991         do {
 992         } while ...
 993
 994         orig_exit_bb:
 995    */
 996
 997   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
 998     {
 999       if (vect_debug_stats (loop) || vect_debug_details (loop))
1000         fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1001       return NULL;
1002     }
1003
1004   if (e == exit_e)
1005     {
1006       /* NEW_LOOP was placed after LOOP.  */
1007       first_loop = loop;
1008       second_loop = new_loop;
1009     }
1010   else
1011     {
1012       /* NEW_LOOP was placed before LOOP.  */
1013       first_loop = new_loop;
1014       second_loop = loop;
1015     }
1016
1017   definitions = marked_ssa_names ();
1018   allocate_new_names (definitions);
1019   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1020   rename_variables_in_loop (new_loop);
1021
1022
1023   /* 2. Add the guard that controls whether the first loop is executed.
1024         Resulting CFG would be:
1025
1026         bb_before_first_loop:
1027         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1028                                GOTO first-loop
1029
1030         first_loop:
1031         do {
1032         } while ...
1033
1034         bb_before_second_loop:
1035
1036         second_loop:
1037         do {
1038         } while ...
1039
1040         orig_exit_bb:
1041    */
1042
1043   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1044   add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1045   bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1046   add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1047   flow_loop_scan (first_loop, LOOP_ALL);
1048   flow_loop_scan (second_loop, LOOP_ALL);
1049
1050   pre_condition =
1051         build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1052   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1053                                   bb_before_second_loop, bb_before_first_loop);
1054   slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1055                                      first_loop == new_loop);
1056
1057
1058   /* 3. Add the guard that controls whether the second loop is executed.
1059         Resulting CFG would be:
1060
1061         bb_before_first_loop:
1062         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1063                                GOTO first-loop
1064
1065         first_loop:
1066         do {
1067         } while ...
1068
1069         bb_between_loops:
1070         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1071                                     GOTO bb_before_second_loop
1072
1073         bb_before_second_loop:
1074
1075         second_loop:
1076         do {
1077         } while ...
1078
1079         bb_after_second_loop:
1080
1081         orig_exit_bb:
1082    */
1083
1084   bb_between_loops = split_edge (first_loop->exit_edges[0]);
1085   add_bb_to_loop (bb_between_loops, first_loop->outer);
1086   bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1087   add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1088   flow_loop_scan (first_loop, LOOP_ALL);
1089   flow_loop_scan (second_loop, LOOP_ALL);
1090
1091   pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1092   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1093                                   bb_after_second_loop, bb_before_first_loop);
1094   slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1095                                      second_loop == new_loop);
1096
1097   /* Flow loop scan does not update loop->single_exit field.  */
1098   first_loop->single_exit = first_loop->exit_edges[0];
1099   second_loop->single_exit = second_loop->exit_edges[0];
1100
1101   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1102    */
1103   if (update_first_loop_count)
1104     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1105
1106   free_new_names (definitions);
1107   BITMAP_XFREE (definitions);
1108   unmark_all_for_rewrite ();
1109
1110   return new_loop;
1111 }
1112
1113 \f
1114 /* Here the proper Vectorizer starts.  */
1115
1116 /*************************************************************************
1117   Vectorization Utilities.
1118  *************************************************************************/
1119
1120 /* Function new_stmt_vec_info.
1121
1122    Create and initialize a new stmt_vec_info struct for STMT.  */
1123
1124 stmt_vec_info
1125 new_stmt_vec_info (tree stmt, struct loop *loop)
1126 {
1127   stmt_vec_info res;
1128   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1129
1130   STMT_VINFO_TYPE (res) = undef_vec_info_type;
1131   STMT_VINFO_STMT (res) = stmt;
1132   STMT_VINFO_LOOP (res) = loop;
1133   STMT_VINFO_RELEVANT_P (res) = 0;
1134   STMT_VINFO_VECTYPE (res) = NULL;
1135   STMT_VINFO_VEC_STMT (res) = NULL;
1136   STMT_VINFO_DATA_REF (res) = NULL;
1137   STMT_VINFO_MEMTAG (res) = NULL;
1138   STMT_VINFO_VECT_DR_BASE (res) = NULL;
1139   STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1140   STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1141   STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1142   STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1143
1144   return res;
1145 }
1146
1147
1148 /* Function new_loop_vec_info.
1149
1150    Create and initialize a new loop_vec_info struct for LOOP, as well as
1151    stmt_vec_info structs for all the stmts in LOOP.  */
1152
1153 loop_vec_info
1154 new_loop_vec_info (struct loop *loop)
1155 {
1156   loop_vec_info res;
1157   basic_block *bbs;
1158   block_stmt_iterator si;
1159   unsigned int i;
1160
1161   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1162
1163   bbs = get_loop_body (loop);
1164
1165   /* Create stmt_info for all stmts in the loop.  */
1166   for (i = 0; i < loop->num_nodes; i++)
1167     {
1168       basic_block bb = bbs[i];
1169       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1170         {
1171           tree stmt = bsi_stmt (si);
1172           stmt_ann_t ann;
1173
1174           get_stmt_operands (stmt);
1175           ann = stmt_ann (stmt);
1176           set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1177         }
1178     }
1179
1180   LOOP_VINFO_LOOP (res) = loop;
1181   LOOP_VINFO_BBS (res) = bbs;
1182   LOOP_VINFO_EXIT_COND (res) = NULL;
1183   LOOP_VINFO_NITERS (res) = NULL;
1184   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1185   LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1186   LOOP_VINFO_VECT_FACTOR (res) = 0;
1187   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1188                            "loop_write_datarefs");
1189   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1190                            "loop_read_datarefs");
1191   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1192
1193   return res;
1194 }
1195
1196
1197 /* Function destroy_loop_vec_info.
1198
1199    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1200    stmts in the loop.  */
1201
1202 void
1203 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1204 {
1205   struct loop *loop;
1206   basic_block *bbs;
1207   int nbbs;
1208   block_stmt_iterator si;
1209   int j;
1210
1211   if (!loop_vinfo)
1212     return;
1213
1214   loop = LOOP_VINFO_LOOP (loop_vinfo);
1215
1216   bbs = LOOP_VINFO_BBS (loop_vinfo);
1217   nbbs = loop->num_nodes;
1218
1219   for (j = 0; j < nbbs; j++)
1220     {
1221       basic_block bb = bbs[j];
1222       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1223         {
1224           tree stmt = bsi_stmt (si);
1225           stmt_ann_t ann = stmt_ann (stmt);
1226           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1227           free (stmt_info);
1228           set_stmt_info (ann, NULL);
1229         }
1230     }
1231
1232   free (LOOP_VINFO_BBS (loop_vinfo));
1233   varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1234   varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1235
1236   free (loop_vinfo);
1237 }
1238
1239
1240 /* Function debug_loop_stats.
1241
1242    For vectorization statistics dumps.  */
1243
1244 static bool
1245 vect_debug_stats (struct loop *loop)
1246 {
1247   basic_block bb;
1248   block_stmt_iterator si;
1249   tree node = NULL_TREE;
1250
1251   if (!dump_file || !(dump_flags & TDF_STATS))
1252     return false;
1253
1254   if (!loop)
1255     {
1256       fprintf (dump_file, "\n");
1257       return true;
1258     }
1259
1260   if (!loop->header)
1261     return false;
1262
1263   bb = loop->header;
1264
1265   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1266     {
1267       node = bsi_stmt (si);
1268       if (node && EXPR_P (node) && EXPR_LOCUS (node))
1269         break;
1270     }
1271
1272   if (node && EXPR_P (node) && EXPR_LOCUS (node)
1273       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1274     {
1275       fprintf (dump_file, "\nloop at %s:%d: ",
1276         EXPR_FILENAME (node), EXPR_LINENO (node));
1277       return true;
1278     }
1279
1280   return false;
1281 }
1282
1283
1284 /* Function debug_loop_details.
1285
1286    For vectorization debug dumps.  */
1287
1288 static bool
1289 vect_debug_details (struct loop *loop)
1290 {
1291    basic_block bb;
1292    block_stmt_iterator si;
1293    tree node = NULL_TREE;
1294
1295   if (!dump_file || !(dump_flags & TDF_DETAILS))
1296     return false;
1297
1298   if (!loop)
1299     {
1300       fprintf (dump_file, "\n");
1301       return true;
1302     }
1303
1304   if (!loop->header)
1305     return false;
1306
1307   bb = loop->header;
1308
1309   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1310     {
1311       node = bsi_stmt (si);
1312       if (node && EXPR_P (node) && EXPR_LOCUS (node))
1313         break;
1314     }
1315
1316   if (node && EXPR_P (node) && EXPR_LOCUS (node)
1317       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1318     {
1319       fprintf (dump_file, "\nloop at %s:%d: ",
1320                EXPR_FILENAME (node), EXPR_LINENO (node));
1321       return true;
1322     }
1323
1324   return false;
1325 }
1326
1327
1328 /* Function vect_get_ptr_offset
1329
1330    Compute the OFFSET modulo vector-type alignment of pointer REF in bits.  */
1331
1332 static tree
1333 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1334                      tree vectype ATTRIBUTE_UNUSED,
1335                      tree *offset ATTRIBUTE_UNUSED)
1336 {
1337   /* TODO: Use alignment information.  */
1338   return NULL_TREE;
1339 }
1340
1341
1342 /* Function vect_strip_conversions
1343
1344    Strip conversions that don't narrow the mode.  */
1345
1346 static tree
1347 vect_strip_conversion (tree expr)
1348 {
1349   tree to, ti, oprnd0;
1350
1351   while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
1352     {
1353       to = TREE_TYPE (expr);
1354       oprnd0 = TREE_OPERAND (expr, 0);
1355       ti = TREE_TYPE (oprnd0);
1356
1357       if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
1358         return NULL_TREE;
1359       if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
1360         return NULL_TREE;
1361
1362       expr = oprnd0;
1363     }
1364   return expr;
1365 }
1366
1367
1368 /* Function vect_analyze_offset_expr
1369
1370    Given an offset expression EXPR received from get_inner_reference, analyze
1371    it and create an expression for INITIAL_OFFSET by substituting the variables
1372    of EXPR with initial_condition of the corresponding access_fn in the loop.
1373    E.g.,
1374       for i
1375          for (j = 3; j < N; j++)
1376             a[j].b[i][j] = 0;
1377
1378    For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
1379    substituted, since its access_fn in the inner loop is i. 'j' will be
1380    substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
1381    C` =  3 * C_j + C.
1382
1383    Compute MISALIGN (the misalignment of the data reference initial access from
1384    its base) if possible. Misalignment can be calculated only if all the
1385    variables can be substituted with constants, or if a variable is multiplied
1386    by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
1387    be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
1388    of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
1389    VECTYPE_ALIGNMENT computation in the caller of this function).
1390
1391    STEP is an evolution of the data reference in this loop in bytes.
1392    In the above example, STEP is C_j.
1393
1394    Return FALSE, if the analysis fails, e.g., there is no access_fn for a
1395    variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
1396    are NULL_TREEs. Otherwise, return TRUE.
1397
1398 */
1399
1400 static bool
1401 vect_analyze_offset_expr (tree expr,
1402                           struct loop *loop,
1403                           tree vectype_alignment,
1404                           tree *initial_offset,
1405                           tree *misalign,
1406                           tree *step)
1407 {
1408   tree oprnd0;
1409   tree oprnd1;
1410   tree left_offset = size_zero_node;
1411   tree right_offset = size_zero_node;
1412   tree left_misalign = size_zero_node;
1413   tree right_misalign = size_zero_node;
1414   tree left_step = size_zero_node;
1415   tree right_step = size_zero_node;
1416   enum tree_code code;
1417   tree init, evolution;
1418
1419   *step = NULL_TREE;
1420   *misalign = NULL_TREE;
1421   *initial_offset = NULL_TREE;
1422
1423   /* Strip conversions that don't narrow the mode.  */
1424   expr = vect_strip_conversion (expr);
1425   if (!expr)
1426     return false;
1427
1428   /* Stop conditions:
1429      1. Constant.  */
1430   if (TREE_CODE (expr) == INTEGER_CST)
1431     {
1432       *initial_offset = fold_convert (sizetype, expr);
1433       *misalign = fold_convert (sizetype, expr);
1434       *step = size_zero_node;
1435       return true;
1436     }
1437
1438   /* 2. Variable. Try to substitute with initial_condition of the corresponding
1439      access_fn in the current loop.  */
1440   if (SSA_VAR_P (expr))
1441     {
1442       tree access_fn = analyze_scalar_evolution (loop, expr);
1443
1444       if (access_fn == chrec_dont_know)
1445         /* No access_fn.  */
1446         return false;
1447
1448       init = initial_condition_in_loop_num (access_fn, loop->num);
1449       if (init == expr && !expr_invariant_in_loop_p (loop, init))
1450         /* Not enough information: may be not loop invariant.
1451            E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
1452            initial_condition is D, but it depends on i - loop's induction
1453            variable.  */
1454         return false;
1455
1456       evolution = evolution_part_in_loop_num (access_fn, loop->num);
1457       if (evolution && TREE_CODE (evolution) != INTEGER_CST)
1458         /* Evolution is not constant.  */
1459         return false;
1460
1461       if (TREE_CODE (init) == INTEGER_CST)
1462         *misalign = fold_convert (sizetype, init);
1463       else
1464         /* Not constant, misalignment cannot be calculated.  */
1465         *misalign = NULL_TREE;
1466
1467       *initial_offset = fold_convert (sizetype, init);
1468
1469       *step = evolution ? fold_convert (sizetype, evolution) : size_zero_node;
1470       return true;
1471     }
1472
1473   /* Recursive computation.  */
1474   if (!BINARY_CLASS_P (expr))
1475     {
1476       /* We expect to get binary expressions (PLUS/MINUS and MULT).  */
1477       if (vect_debug_details (NULL))
1478         {
1479           fprintf (dump_file, "Not binary expression ");
1480           print_generic_expr (dump_file, expr, TDF_SLIM);
1481         }
1482       return false;
1483     }
1484   oprnd0 = TREE_OPERAND (expr, 0);
1485   oprnd1 = TREE_OPERAND (expr, 1);
1486
1487   if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
1488                                 &left_misalign, &left_step)
1489       || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
1490                                     &right_offset, &right_misalign, &right_step))
1491       return false;
1492
1493   /* The type of the operation: plus, minus or mult.  */
1494   code = TREE_CODE (expr);
1495   switch (code)
1496     {
1497     case MULT_EXPR:
1498       if (TREE_CODE (right_offset) != INTEGER_CST)
1499         /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
1500            sized types.
1501            FORNOW: We don't support such cases.  */
1502         return false;
1503
1504       /* Strip conversions that don't narrow the mode.  */
1505       left_offset = vect_strip_conversion (left_offset);
1506       if (!left_offset)
1507         return false;
1508       /* Misalignment computation.  */
1509       if (SSA_VAR_P (left_offset))
1510         {
1511           /* If the left side contains variable that cannot be substituted with
1512              constant, we check if the right side is a multiple of ALIGNMENT.  */
1513           if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
1514                                          vectype_alignment)))
1515             *misalign = size_zero_node;
1516           else
1517             /* If the remainder is not zero or the right side isn't constant, we
1518                can't compute  misalignment.  */
1519             *misalign = NULL_TREE;
1520         }
1521       else
1522         {
1523           /* The left operand was successfully substituted with constant.  */
1524           if (left_misalign)
1525             /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
1526                NULL_TREE.  */
1527             *misalign  = size_binop (code, left_misalign, right_misalign);
1528           else
1529             *misalign = NULL_TREE;
1530         }
1531
1532       /* Step calculation.  */
1533       /* Multiply the step by the right operand.  */
1534       *step  = size_binop (MULT_EXPR, left_step, right_offset);
1535       break;
1536
1537     case PLUS_EXPR:
1538     case MINUS_EXPR:
1539       /* Combine the recursive calculations for step and misalignment.  */
1540       *step = size_binop (code, left_step, right_step);
1541
1542       if (left_misalign && right_misalign)
1543         *misalign  = size_binop (code, left_misalign, right_misalign);
1544       else
1545         *misalign = NULL_TREE;
1546
1547       break;
1548
1549     default:
1550       gcc_unreachable ();
1551     }
1552
1553   /* Compute offset.  */
1554   *initial_offset = fold_convert (sizetype,
1555                                   fold (build2 (code, TREE_TYPE (left_offset),
1556                                                 left_offset,
1557                                                 right_offset)));
1558   return true;
1559 }
1560
1561
1562 /* Function vect_get_base_and_offset
1563
1564    Return the BASE of the data reference EXPR.
1565    If VECTYPE is given, also compute the INITIAL_OFFSET from BASE, MISALIGN and
1566    STEP.
1567    E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1568    'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1569    instantiated with initial_conditions of access_functions of variables,
1570    modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1571
1572    Function get_inner_reference is used for the above in case of ARRAY_REF and
1573    COMPONENT_REF.
1574
1575    Input:
1576    EXPR - the memory reference that is being analyzed
1577    DR - the data_reference struct of the _original_ memory reference
1578         (Note: DR_REF (DR) is not necessarily EXPR)
1579    VECTYPE - the type that defines the alignment (i.e, we compute
1580              alignment relative to TYPE_ALIGN(VECTYPE))
1581
1582    Output:
1583    BASE (returned value) - the base of the data reference EXPR.
1584                            E.g, if EXPR is a.b[k].c[i][j] the returned
1585                            base is a.
1586    INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1587    MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1588               computation is impossible
1589    STEP - evolution of the DR_REF in the loop
1590    BASE_ALIGNED_P - indicates if BASE is aligned
1591
1592    If something unexpected is encountered (an unsupported form of data-ref),
1593    then NULL_TREE is returned.  */
1594
1595 static tree
1596 vect_get_base_and_offset (struct data_reference *dr,
1597                           tree expr,
1598                           tree vectype,
1599                           loop_vec_info loop_vinfo,
1600                           tree *initial_offset,
1601                           tree *misalign,
1602                           tree *step,
1603                           bool *base_aligned_p)
1604 {
1605   tree this_offset = size_zero_node;
1606   tree this_misalign = size_zero_node;
1607   tree this_step = size_zero_node;
1608   tree base = NULL_TREE;
1609   tree next_ref;
1610   tree oprnd0, oprnd1;
1611   enum tree_code code = TREE_CODE (expr);
1612   HOST_WIDE_INT pbitsize;
1613   HOST_WIDE_INT pbitpos;
1614   tree poffset;
1615   enum machine_mode pmode;
1616   int punsignedp, pvolatilep;
1617   tree bit_pos_in_bytes;
1618   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1619
1620   *base_aligned_p = false;
1621
1622   switch (code)
1623     {
1624     /* These cases end the recursion:  */
1625     case VAR_DECL:
1626     case PARM_DECL:
1627       *initial_offset = size_zero_node;
1628       *step = size_zero_node;
1629       *misalign = size_zero_node;
1630       if (DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1631         *base_aligned_p = true;
1632       return expr;
1633
1634     case SSA_NAME:
1635       if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1636         return NULL_TREE;
1637
1638       if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1639         {
1640           base = vect_get_ptr_offset (expr, vectype, misalign);
1641           if (base)
1642             *base_aligned_p = true;
1643         }
1644       else
1645         {
1646           *base_aligned_p = true;
1647           *misalign = size_zero_node;
1648         }
1649       *initial_offset = size_zero_node;
1650       *step = size_zero_node;
1651       return expr;
1652
1653     case INTEGER_CST:
1654       *initial_offset = fold_convert (sizetype, expr);
1655       *misalign = fold_convert (sizetype, expr);
1656       *step = size_zero_node;
1657       return expr;
1658
1659     /* These cases continue the recursion:  */
1660     case ADDR_EXPR:
1661       oprnd0 = TREE_OPERAND (expr, 0);
1662       next_ref = oprnd0;
1663       break;
1664
1665     case INDIRECT_REF:
1666       oprnd0 = TREE_OPERAND (expr, 0);
1667       next_ref = oprnd0;
1668       break;
1669
1670     case PLUS_EXPR:
1671     case MINUS_EXPR:
1672       oprnd0 = TREE_OPERAND (expr, 0);
1673       oprnd1 = TREE_OPERAND (expr, 1);
1674
1675       /* In case we have a PLUS_EXPR of the form
1676          (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1677          This is verified in vect_get_memtag_and_dr.  */
1678       base = vect_get_base_and_offset (dr, oprnd1, vectype, loop_vinfo,
1679                                        &this_offset, &this_misalign,
1680                                        &this_step, base_aligned_p);
1681       /* Offset was already computed in vect_analyze_pointer_ref_access.  */
1682       this_offset = size_zero_node;
1683
1684       if (!base)
1685         this_misalign = NULL_TREE;
1686
1687       next_ref = oprnd0;
1688       break;
1689
1690     default:
1691       if (!handled_component_p (expr))
1692         /* Unsupported expression.  */
1693         return NULL_TREE;
1694
1695       /* Find the base and the offset from it.  */
1696       next_ref = get_inner_reference (expr, &pbitsize, &pbitpos, &poffset,
1697                                       &pmode, &punsignedp, &pvolatilep, false);
1698       if (!next_ref)
1699         return NULL_TREE;
1700
1701       if (poffset
1702           && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1703                                         &this_offset, &this_misalign,
1704                                         &this_step))
1705         {
1706           /* Failed to compute offset or step.  */
1707           *step = NULL_TREE;
1708           *initial_offset = NULL_TREE;
1709           *misalign = NULL_TREE;
1710           return NULL_TREE;
1711         }
1712
1713       /* Add bit position to OFFSET and MISALIGN.  */
1714
1715       bit_pos_in_bytes = size_int (pbitpos/BITS_PER_UNIT);
1716       /* Check that there is no remainder in bits.  */
1717       if (pbitpos%BITS_PER_UNIT)
1718         {
1719           if (vect_debug_details (NULL))
1720             fprintf (dump_file, "bit offset alignment.");
1721           return NULL_TREE;
1722         }
1723       this_offset = fold (size_binop (PLUS_EXPR, bit_pos_in_bytes,
1724                                       fold_convert (sizetype, this_offset)));
1725       if (this_misalign)
1726         this_misalign = size_binop (PLUS_EXPR, this_misalign, bit_pos_in_bytes);
1727
1728       /* Continue the recursion to refine the base (get_inner_reference returns
1729          &a for &a[i], and not a).  */
1730       break;
1731     }
1732
1733   base = vect_get_base_and_offset (dr, next_ref, vectype, loop_vinfo,
1734                                    initial_offset, misalign, step,
1735                                    base_aligned_p);
1736   if (base)
1737     {
1738       /* Combine the results.  */
1739       if (this_misalign && *misalign)
1740         *misalign = size_binop (PLUS_EXPR, *misalign, this_misalign);
1741       else
1742         *misalign = NULL_TREE;
1743
1744       *step = size_binop (PLUS_EXPR, *step, this_step);
1745
1746       *initial_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (*initial_offset),
1747                                       *initial_offset, this_offset));
1748
1749       if (vect_debug_details (NULL))
1750         {
1751           print_generic_expr (dump_file, expr, TDF_SLIM);
1752           fprintf (dump_file, "\n --> total offset for ref: ");
1753           print_generic_expr (dump_file, *initial_offset, TDF_SLIM);
1754           fprintf (dump_file, "\n --> total misalign for ref: ");
1755           print_generic_expr (dump_file, *misalign, TDF_SLIM);
1756           fprintf (dump_file, "\n --> total step for ref: ");
1757           print_generic_expr (dump_file, *step, TDF_SLIM);
1758         }
1759     }
1760   return base;
1761 }
1762
1763
1764 /* Function vect_force_dr_alignment_p.
1765
1766    Returns whether the alignment of a DECL can be forced to be aligned
1767    on ALIGNMENT bit boundary.  */
1768
1769 static bool
1770 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1771 {
1772   if (TREE_CODE (decl) != VAR_DECL)
1773     return false;
1774
1775   if (DECL_EXTERNAL (decl))
1776     return false;
1777
1778   if (TREE_ASM_WRITTEN (decl))
1779     return false;
1780
1781   if (TREE_STATIC (decl))
1782     return (alignment <= MAX_OFILE_ALIGNMENT);
1783   else
1784     /* This is not 100% correct.  The absolute correct stack alignment
1785        is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1786        PREFERRED_STACK_BOUNDARY is honored by all translation units.
1787        However, until someone implements forced stack alignment, SSE
1788        isn't really usable without this.  */
1789     return (alignment <= PREFERRED_STACK_BOUNDARY);
1790 }
1791
1792
1793 /* Function vect_get_new_vect_var.
1794
1795    Returns a name for a new variable. The current naming scheme appends the
1796    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1797    the name of vectorizer generated variables, and appends that to NAME if
1798    provided.  */
1799
1800 static tree
1801 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1802 {
1803   const char *prefix;
1804   int prefix_len;
1805   tree new_vect_var;
1806
1807   if (var_kind == vect_simple_var)
1808     prefix = "vect_";
1809   else
1810     prefix = "vect_p";
1811
1812   prefix_len = strlen (prefix);
1813
1814   if (name)
1815     new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1816   else
1817     new_vect_var = create_tmp_var (type, prefix);
1818
1819   return new_vect_var;
1820 }
1821
1822
1823 /* Function vect_create_index_for_vector_ref.
1824
1825    Create (and return) an index variable, along with it's update chain in the
1826    loop. This variable will be used to access a memory location in a vector
1827    operation.
1828
1829    Input:
1830    LOOP: The loop being vectorized.
1831    BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1832         function can be added here, or in the loop pre-header.
1833
1834    Output:
1835    Return an index that will be used to index a vector array.  It is expected
1836    that a pointer to the first vector will be used as the base address for the
1837    indexed reference.
1838
1839    FORNOW: we are not trying to be efficient, just creating a new index each
1840    time from scratch.  At this time all vector references could use the same
1841    index.
1842
1843    TODO: create only one index to be used by all vector references.  Record
1844    the index in the LOOP_VINFO the first time this procedure is called and
1845    return it on subsequent calls.  The increment of this index must be placed
1846    just before the conditional expression that ends the single block loop.  */
1847
1848 static tree
1849 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1850 {
1851   tree init, step;
1852   tree indx_before_incr, indx_after_incr;
1853
1854   /* It is assumed that the base pointer used for vectorized access contains
1855      the address of the first vector.  Therefore the index used for vectorized
1856      access must be initialized to zero and incremented by 1.  */
1857
1858   init = integer_zero_node;
1859   step = integer_one_node;
1860
1861   /* Assuming that bsi_insert is used with BSI_NEW_STMT  */
1862   create_iv (init, step, NULL_TREE, loop, bsi, false,
1863         &indx_before_incr, &indx_after_incr);
1864
1865   return indx_before_incr;
1866 }
1867
1868
1869 /* Function vect_create_addr_base_for_vector_ref.
1870
1871    Create an expression that computes the address of the first memory location
1872    that will be accessed for a data reference.
1873
1874    Input:
1875    STMT: The statement containing the data reference.
1876    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1877    OFFSET: Optional. If supplied, it is be added to the initial address.
1878
1879    Output:
1880    1. Return an SSA_NAME whose value is the address of the memory location of
1881       the first vector of the data reference.
1882    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1883       these statement(s) which define the returned SSA_NAME.
1884
1885    FORNOW: We are only handling array accesses with step 1.  */
1886
1887 static tree
1888 vect_create_addr_base_for_vector_ref (tree stmt,
1889                                       tree *new_stmt_list,
1890                                       tree offset)
1891 {
1892   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1893   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1894   tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1895   tree base_name = unshare_expr (DR_BASE_NAME (dr));
1896   tree ref = DR_REF (dr);
1897   tree scalar_type = TREE_TYPE (ref);
1898   tree scalar_ptr_type = build_pointer_type (scalar_type);
1899   tree vec_stmt;
1900   tree new_temp;
1901   tree addr_base, addr_expr;
1902   tree dest, new_stmt;
1903   tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
1904
1905   if (TREE_CODE (TREE_TYPE (data_ref_base)) != POINTER_TYPE)
1906     /* After the analysis stage, we expect to get here only with RECORD_TYPE
1907        and ARRAY_TYPE. */
1908     /* Add '&' to ref_base.  */
1909     data_ref_base = build_fold_addr_expr (data_ref_base);
1910   else
1911     {
1912       /* Create '(scalar_type*) base' for pointers.  */
1913       tree dest, new_stmt, new_temp, vec_stmt, tmp_base;
1914       tree scalar_array_type = build_array_type (scalar_type, 0);
1915       tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1916       tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1917       add_referenced_tmp_var (array_ptr);
1918
1919       dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1920       add_referenced_tmp_var (dest);
1921       tmp_base = force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1922       append_to_statement_list_force (new_stmt,  new_stmt_list);
1923
1924       vec_stmt = fold_convert (scalar_array_ptr_type, tmp_base);
1925       vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1926       new_temp = make_ssa_name (array_ptr, vec_stmt);
1927       TREE_OPERAND (vec_stmt, 0) = new_temp;
1928       append_to_statement_list_force (vec_stmt,  new_stmt_list);
1929       data_ref_base = new_temp;
1930     }
1931
1932   /* Create base_offset */
1933   dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
1934   add_referenced_tmp_var (dest);
1935   base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
1936   append_to_statement_list_force (new_stmt, new_stmt_list);
1937
1938   if (offset)
1939     {
1940       tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
1941       add_referenced_tmp_var (tmp);
1942       offset = fold (build2 (MULT_EXPR, TREE_TYPE (offset), offset,
1943                              STMT_VINFO_VECT_STEP (stmt_info)));
1944       base_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (base_offset), base_offset,
1945                                   offset));
1946       base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
1947       append_to_statement_list_force (new_stmt, new_stmt_list);
1948     }
1949
1950   /* base + base_offset */
1951   addr_base = fold (build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
1952                             base_offset));
1953
1954   /* addr_expr = addr_base */
1955   addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1956                                      get_name (base_name));
1957   add_referenced_tmp_var (addr_expr);
1958   vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1959   new_temp = make_ssa_name (addr_expr, vec_stmt);
1960   TREE_OPERAND (vec_stmt, 0) = new_temp;
1961   append_to_statement_list_force (vec_stmt, new_stmt_list);
1962
1963   if (vect_debug_details (NULL))
1964     {
1965       fprintf (dump_file, "created ");
1966       print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
1967       fprintf (dump_file, "\n");
1968     }
1969   return new_temp;
1970 }
1971
1972
1973 /* Function get_vectype_for_scalar_type.
1974
1975    Returns the vector type corresponding to SCALAR_TYPE as supported
1976    by the target.  */
1977
1978 static tree
1979 get_vectype_for_scalar_type (tree scalar_type)
1980 {
1981   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1982   int nbytes = GET_MODE_SIZE (inner_mode);
1983   int nunits;
1984   tree vectype;
1985
1986   if (nbytes == 0)
1987     return NULL_TREE;
1988
1989   /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1990      is expected.  */
1991   nunits = UNITS_PER_SIMD_WORD / nbytes;
1992
1993   vectype = build_vector_type (scalar_type, nunits);
1994   if (vect_debug_details (NULL))
1995     {
1996       fprintf (dump_file, "get vectype with %d units of type ", nunits);
1997       print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1998     }
1999
2000   if (!vectype)
2001     return NULL_TREE;
2002
2003   if (vect_debug_details (NULL))
2004     {
2005       fprintf (dump_file, "vectype: ");
2006       print_generic_expr (dump_file, vectype, TDF_SLIM);
2007     }
2008
2009   if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2010     {
2011       /* TODO: tree-complex.c sometimes can parallelize operations
2012          on generic vectors.  We can vectorize the loop in that case,
2013          but then we should re-run the lowering pass.  */
2014       if (vect_debug_details (NULL))
2015         fprintf (dump_file, "mode not supported by target.");
2016       return NULL_TREE;
2017     }
2018
2019   return vectype;
2020 }
2021
2022
2023 /* Function vect_align_data_ref.
2024
2025    Handle mislignment of a memory accesses.
2026
2027    FORNOW: Can't handle misaligned accesses.
2028    Make sure that the dataref is aligned.  */
2029
2030 static void
2031 vect_align_data_ref (tree stmt)
2032 {
2033   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2034   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2035
2036   /* FORNOW: can't handle misaligned accesses;
2037              all accesses expected to be aligned.  */
2038   gcc_assert (aligned_access_p (dr));
2039 }
2040
2041
2042 /* Function vect_create_data_ref_ptr.
2043
2044    Create a memory reference expression for vector access, to be used in a
2045    vector load/store stmt. The reference is based on a new pointer to vector
2046    type (vp).
2047
2048    Input:
2049    1. STMT: a stmt that references memory. Expected to be of the form
2050          MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
2051    2. BSI: block_stmt_iterator where new stmts can be added.
2052    3. OFFSET (optional): an offset to be added to the initial address accessed
2053         by the data-ref in STMT.
2054    4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2055         pointing to the initial address.
2056
2057    Output:
2058    1. Declare a new ptr to vector_type, and have it point to the base of the
2059       data reference (initial addressed accessed by the data reference).
2060       For example, for vector of type V8HI, the following code is generated:
2061
2062       v8hi *vp;
2063       vp = (v8hi *)initial_address;
2064
2065       if OFFSET is not supplied:
2066          initial_address = &a[init];
2067       if OFFSET is supplied:
2068          initial_address = &a[init + OFFSET];
2069
2070       Return the initial_address in INITIAL_ADDRESS.
2071
2072    2. Create a data-reference in the loop based on the new vector pointer vp,
2073       and using a new index variable 'idx' as follows:
2074
2075       vp' = vp + update
2076
2077       where if ONLY_INIT is true:
2078          update = zero
2079       and otherwise
2080          update = idx + vector_type_size
2081
2082       Return the pointer vp'.
2083
2084
2085    FORNOW: handle only aligned and consecutive accesses.  */
2086
2087 static tree
2088 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
2089                           tree *initial_address, bool only_init)
2090 {
2091   tree base_name;
2092   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2093   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2094   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2095   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2096   tree vect_ptr_type;
2097   tree vect_ptr;
2098   tree tag;
2099   v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2100   v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2101   vuse_optype vuses = STMT_VUSE_OPS (stmt);
2102   int nvuses, nv_may_defs, nv_must_defs;
2103   int i;
2104   tree new_temp;
2105   tree vec_stmt;
2106   tree new_stmt_list = NULL_TREE;
2107   tree idx;
2108   edge pe = loop_preheader_edge (loop);
2109   basic_block new_bb;
2110   tree vect_ptr_init;
2111   tree vectype_size;
2112   tree ptr_update;
2113   tree data_ref_ptr;
2114   tree type, tmp, size;
2115
2116   base_name = unshare_expr (DR_BASE_NAME (dr));
2117   if (vect_debug_details (NULL))
2118     {
2119       tree data_ref_base = base_name;
2120       fprintf (dump_file, "create array_ref of type: ");
2121       print_generic_expr (dump_file, vectype, TDF_SLIM);
2122       if (TREE_CODE (data_ref_base) == VAR_DECL)
2123         fprintf (dump_file, "\nvectorizing a one dimensional array ref: ");
2124       else if (TREE_CODE (data_ref_base) == ARRAY_REF)
2125         fprintf (dump_file, "\nvectorizing a multidimensional array ref: ");
2126       else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2127         fprintf (dump_file, "\nvectorizing a record based array ref: ");
2128       else if (TREE_CODE (data_ref_base) == SSA_NAME)
2129         fprintf (dump_file, "\nvectorizing a pointer ref: ");
2130       print_generic_expr (dump_file, base_name, TDF_SLIM);
2131     }
2132
2133   /** (1) Create the new vector-pointer variable:  **/
2134
2135   vect_ptr_type = build_pointer_type (vectype);
2136   vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2137                                     get_name (base_name));
2138   add_referenced_tmp_var (vect_ptr);
2139
2140
2141   /** (2) Handle aliasing information of the new vector-pointer:  **/
2142
2143   tag = STMT_VINFO_MEMTAG (stmt_info);
2144   gcc_assert (tag);
2145   get_var_ann (vect_ptr)->type_mem_tag = tag;
2146
2147   /* Mark for renaming all aliased variables
2148      (i.e, the may-aliases of the type-mem-tag).  */
2149   nvuses = NUM_VUSES (vuses);
2150   nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
2151   nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
2152   for (i = 0; i < nvuses; i++)
2153     {
2154       tree use = VUSE_OP (vuses, i);
2155       if (TREE_CODE (use) == SSA_NAME)
2156         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
2157     }
2158   for (i = 0; i < nv_may_defs; i++)
2159     {
2160       tree def = V_MAY_DEF_RESULT (v_may_defs, i);
2161       if (TREE_CODE (def) == SSA_NAME)
2162         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2163     }
2164   for (i = 0; i < nv_must_defs; i++)
2165     {
2166       tree def = V_MUST_DEF_RESULT (v_must_defs, i);
2167       if (TREE_CODE (def) == SSA_NAME)
2168         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2169     }
2170
2171
2172   /** (3) Calculate the initial address the vector-pointer, and set
2173           the vector-pointer to point to it before the loop:  **/
2174
2175   /* Create: (&(base[init_val+offset]) in the loop preheader.  */
2176   new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2177                                                    offset);
2178   pe = loop_preheader_edge (loop);
2179   new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
2180   gcc_assert (!new_bb);
2181   *initial_address = new_temp;
2182
2183   /* Create: p = (vectype *) initial_base  */
2184   vec_stmt = fold_convert (vect_ptr_type, new_temp);
2185   vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2186   new_temp = make_ssa_name (vect_ptr, vec_stmt);
2187   TREE_OPERAND (vec_stmt, 0) = new_temp;
2188   new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
2189   gcc_assert (!new_bb);
2190   vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
2191
2192
2193   /** (4) Handle the updating of the vector-pointer inside the loop: **/
2194
2195   if (only_init) /* No update in loop is required.  */
2196     return vect_ptr_init;
2197
2198   idx = vect_create_index_for_vector_ref (loop, bsi);
2199
2200   /* Create: update = idx * vectype_size  */
2201   tmp = create_tmp_var (integer_type_node, "update");
2202   add_referenced_tmp_var (tmp);
2203   size = TYPE_SIZE (vect_ptr_type);
2204   type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2205   ptr_update = create_tmp_var (type, "update");
2206   add_referenced_tmp_var (ptr_update);
2207   vectype_size = TYPE_SIZE_UNIT (vectype);
2208   vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
2209   vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
2210   new_temp = make_ssa_name (tmp, vec_stmt);
2211   TREE_OPERAND (vec_stmt, 0) = new_temp;
2212   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2213   vec_stmt = fold_convert (type, new_temp);
2214   vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
2215   new_temp = make_ssa_name (ptr_update, vec_stmt);
2216   TREE_OPERAND (vec_stmt, 0) = new_temp;
2217   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2218
2219   /* Create: data_ref_ptr = vect_ptr_init + update  */
2220   vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
2221   vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2222   new_temp = make_ssa_name (vect_ptr, vec_stmt);
2223   TREE_OPERAND (vec_stmt, 0) = new_temp;
2224   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2225   data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
2226
2227   return data_ref_ptr;
2228 }
2229
2230
2231 /* Function vect_create_destination_var.
2232
2233    Create a new temporary of type VECTYPE.  */
2234
2235 static tree
2236 vect_create_destination_var (tree scalar_dest, tree vectype)
2237 {
2238   tree vec_dest;
2239   const char *new_name;
2240
2241   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2242
2243   new_name = get_name (scalar_dest);
2244   if (!new_name)
2245     new_name = "var_";
2246   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
2247   add_referenced_tmp_var (vec_dest);
2248
2249   return vec_dest;
2250 }
2251
2252
2253 /* Function vect_init_vector.
2254
2255    Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2256    the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2257    used in the vectorization of STMT.  */
2258
2259 static tree
2260 vect_init_vector (tree stmt, tree vector_var)
2261 {
2262   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2263   struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2264   tree new_var;
2265   tree init_stmt;
2266   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2267   tree vec_oprnd;
2268   edge pe;
2269   tree new_temp;
2270   basic_block new_bb;
2271
2272   new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2273   add_referenced_tmp_var (new_var);
2274
2275   init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2276   new_temp = make_ssa_name (new_var, init_stmt);
2277   TREE_OPERAND (init_stmt, 0) = new_temp;
2278
2279   pe = loop_preheader_edge (loop);
2280   new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2281   gcc_assert (!new_bb);
2282
2283   if (vect_debug_details (NULL))
2284     {
2285       fprintf (dump_file, "created new init_stmt: ");
2286       print_generic_expr (dump_file, init_stmt, TDF_SLIM);
2287     }
2288
2289   vec_oprnd = TREE_OPERAND (init_stmt, 0);
2290   return vec_oprnd;
2291 }
2292
2293
2294 /* Function vect_get_vec_def_for_operand.
2295
2296    OP is an operand in STMT. This function returns a (vector) def that will be
2297    used in the vectorized stmt for STMT.
2298
2299    In the case that OP is an SSA_NAME which is defined in the loop, then
2300    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2301
2302    In case OP is an invariant or constant, a new stmt that creates a vector def
2303    needs to be introduced.  */
2304
2305 static tree
2306 vect_get_vec_def_for_operand (tree op, tree stmt)
2307 {
2308   tree vec_oprnd;
2309   tree vec_stmt;
2310   tree def_stmt;
2311   stmt_vec_info def_stmt_info = NULL;
2312   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2313   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2314   int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2315   struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2316   basic_block bb;
2317   tree vec_inv;
2318   tree t = NULL_TREE;
2319   tree def;
2320   int i;
2321
2322   if (vect_debug_details (NULL))
2323     {
2324       fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2325       print_generic_expr (dump_file, op, TDF_SLIM);
2326     }
2327
2328   /** ===> Case 1: operand is a constant.  **/
2329
2330   if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2331     {
2332       /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
2333
2334       tree vec_cst;
2335
2336       /* Build a tree with vector elements.  */
2337       if (vect_debug_details (NULL))
2338         fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2339
2340       for (i = nunits - 1; i >= 0; --i)
2341         {
2342           t = tree_cons (NULL_TREE, op, t);
2343         }
2344       vec_cst = build_vector (vectype, t);
2345       return vect_init_vector (stmt, vec_cst);
2346     }
2347
2348   gcc_assert (TREE_CODE (op) == SSA_NAME);
2349
2350   /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it.  **/
2351
2352   def_stmt = SSA_NAME_DEF_STMT (op);
2353   def_stmt_info = vinfo_for_stmt (def_stmt);
2354
2355   if (vect_debug_details (NULL))
2356     {
2357       fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2358       print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2359     }
2360
2361
2362   /** ==> Case 2.1: operand is defined inside the loop.  **/
2363
2364   if (def_stmt_info)
2365     {
2366       /* Get the def from the vectorized stmt.  */
2367
2368       vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2369       gcc_assert (vec_stmt);
2370       vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2371       return vec_oprnd;
2372     }
2373
2374
2375   /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2376                     it is a reduction/induction.  **/
2377
2378   bb = bb_for_stmt (def_stmt);
2379   if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2380     {
2381       if (vect_debug_details (NULL))
2382         fprintf (dump_file, "reduction/induction - unsupported.");
2383       internal_error ("no support for reduction/induction"); /* FORNOW */
2384     }
2385
2386
2387   /** ==> Case 2.3: operand is defined outside the loop -
2388                     it is a loop invariant.  */
2389
2390   switch (TREE_CODE (def_stmt))
2391     {
2392     case PHI_NODE:
2393       def = PHI_RESULT (def_stmt);
2394       break;
2395     case MODIFY_EXPR:
2396       def = TREE_OPERAND (def_stmt, 0);
2397       break;
2398     case NOP_EXPR:
2399       def = TREE_OPERAND (def_stmt, 0);
2400       gcc_assert (IS_EMPTY_STMT (def_stmt));
2401       def = op;
2402       break;
2403     default:
2404       if (vect_debug_details (NULL))
2405         {
2406           fprintf (dump_file, "unsupported defining stmt: ");
2407           print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2408         }
2409       internal_error ("unsupported defining stmt");
2410     }
2411
2412   /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}'  */
2413
2414   if (vect_debug_details (NULL))
2415     fprintf (dump_file, "Create vector_inv.");
2416
2417   for (i = nunits - 1; i >= 0; --i)
2418     {
2419       t = tree_cons (NULL_TREE, def, t);
2420     }
2421
2422   vec_inv = build_constructor (vectype, t);
2423   return vect_init_vector (stmt, vec_inv);
2424 }
2425
2426
2427 /* Function vect_finish_stmt_generation.
2428
2429    Insert a new stmt.  */
2430
2431 static void
2432 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2433 {
2434   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2435
2436   if (vect_debug_details (NULL))
2437     {
2438       fprintf (dump_file, "add new stmt: ");
2439       print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2440     }
2441
2442   /* Make sure bsi points to the stmt that is being vectorized.  */
2443
2444   /* Assumption: any stmts created for the vectorization of stmt S were
2445      inserted before S. BSI is expected to point to S or some new stmt before S.
2446    */
2447
2448   while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2449     bsi_next (bsi);
2450   gcc_assert (stmt == bsi_stmt (*bsi));
2451 }
2452
2453
2454 /* Function vectorizable_assignment.
2455
2456    Check if STMT performs an assignment (copy) that can be vectorized.
2457    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2458    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2459    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2460
2461 static bool
2462 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2463 {
2464   tree vec_dest;
2465   tree scalar_dest;
2466   tree op;
2467   tree vec_oprnd;
2468   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2469   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2470   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2471   tree new_temp;
2472
2473   /* Is vectorizable assignment?  */
2474
2475   if (TREE_CODE (stmt) != MODIFY_EXPR)
2476     return false;
2477
2478   scalar_dest = TREE_OPERAND (stmt, 0);
2479   if (TREE_CODE (scalar_dest) != SSA_NAME)
2480     return false;
2481
2482   op = TREE_OPERAND (stmt, 1);
2483   if (!vect_is_simple_use (op, loop, NULL))
2484     {
2485       if (vect_debug_details (NULL))
2486         fprintf (dump_file, "use not simple.");
2487       return false;
2488     }
2489
2490   if (!vec_stmt) /* transformation not required.  */
2491     {
2492       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2493       return true;
2494     }
2495
2496   /** Transform.  **/
2497   if (vect_debug_details (NULL))
2498     fprintf (dump_file, "transform assignment.");
2499
2500   /* Handle def.  */
2501   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2502
2503   /* Handle use.  */
2504   op = TREE_OPERAND (stmt, 1);
2505   vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2506
2507   /* Arguments are ready. create the new vector stmt.  */
2508   *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2509   new_temp = make_ssa_name (vec_dest, *vec_stmt);
2510   TREE_OPERAND (*vec_stmt, 0) = new_temp;
2511   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2512
2513   return true;
2514 }
2515
2516
2517 /* Function vectorizable_operation.
2518
2519    Check if STMT performs a binary or unary operation that can be vectorized.
2520    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2521    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2522    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2523
2524 static bool
2525 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2526 {
2527   tree vec_dest;
2528   tree scalar_dest;
2529   tree operation;
2530   tree op0, op1 = NULL;
2531   tree vec_oprnd0, vec_oprnd1=NULL;
2532   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2533   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2534   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2535   int i;
2536   enum tree_code code;
2537   enum machine_mode vec_mode;
2538   tree new_temp;
2539   int op_type;
2540   tree op;
2541   optab optab;
2542
2543   /* Is STMT a vectorizable binary/unary operation?   */
2544   if (TREE_CODE (stmt) != MODIFY_EXPR)
2545     return false;
2546
2547   if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2548     return false;
2549
2550   operation = TREE_OPERAND (stmt, 1);
2551   code = TREE_CODE (operation);
2552   optab = optab_for_tree_code (code, vectype);
2553
2554   /* Support only unary or binary operations.  */
2555   op_type = TREE_CODE_LENGTH (code);
2556   if (op_type != unary_op && op_type != binary_op)
2557     {
2558       if (vect_debug_details (NULL))
2559         fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2560       return false;
2561     }
2562
2563   for (i = 0; i < op_type; i++)
2564     {
2565       op = TREE_OPERAND (operation, i);
2566       if (!vect_is_simple_use (op, loop, NULL))
2567         {
2568           if (vect_debug_details (NULL))
2569             fprintf (dump_file, "use not simple.");
2570           return false;
2571         }
2572     }
2573
2574   /* Supportable by target?  */
2575   if (!optab)
2576     {
2577       if (vect_debug_details (NULL))
2578         fprintf (dump_file, "no optab.");
2579       return false;
2580     }
2581   vec_mode = TYPE_MODE (vectype);
2582   if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2583     {
2584       if (vect_debug_details (NULL))
2585         fprintf (dump_file, "op not supported by target.");
2586       return false;
2587     }
2588
2589   if (!vec_stmt) /* transformation not required.  */
2590     {
2591       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2592       return true;
2593     }
2594
2595   /** Transform.  **/
2596
2597   if (vect_debug_details (NULL))
2598     fprintf (dump_file, "transform binary/unary operation.");
2599
2600   /* Handle def.  */
2601   scalar_dest = TREE_OPERAND (stmt, 0);
2602   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2603
2604   /* Handle uses.  */
2605   op0 = TREE_OPERAND (operation, 0);
2606   vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2607
2608   if (op_type == binary_op)
2609     {
2610       op1 = TREE_OPERAND (operation, 1);
2611       vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2612     }
2613
2614   /* Arguments are ready. create the new vector stmt.  */
2615
2616   if (op_type == binary_op)
2617     *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2618                 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2619   else
2620     *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2621                 build1 (code, vectype, vec_oprnd0));
2622   new_temp = make_ssa_name (vec_dest, *vec_stmt);
2623   TREE_OPERAND (*vec_stmt, 0) = new_temp;
2624   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2625
2626   return true;
2627 }
2628
2629
2630 /* Function vectorizable_store.
2631
2632    Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2633    can be vectorized.
2634    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2635    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2636    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2637
2638 static bool
2639 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2640 {
2641   tree scalar_dest;
2642   tree data_ref;
2643   tree op;
2644   tree vec_oprnd1;
2645   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2646   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2647   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2648   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2649   enum machine_mode vec_mode;
2650   tree dummy;
2651   enum dr_alignment_support alignment_support_cheme;
2652
2653   /* Is vectorizable store? */
2654
2655   if (TREE_CODE (stmt) != MODIFY_EXPR)
2656     return false;
2657
2658   scalar_dest = TREE_OPERAND (stmt, 0);
2659   if (TREE_CODE (scalar_dest) != ARRAY_REF
2660       && TREE_CODE (scalar_dest) != INDIRECT_REF)
2661     return false;
2662
2663   op = TREE_OPERAND (stmt, 1);
2664   if (!vect_is_simple_use (op, loop, NULL))
2665     {
2666       if (vect_debug_details (NULL))
2667         fprintf (dump_file, "use not simple.");
2668       return false;
2669     }
2670
2671   vec_mode = TYPE_MODE (vectype);
2672   /* FORNOW. In some cases can vectorize even if data-type not supported
2673      (e.g. - array initialization with 0).  */
2674   if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2675     return false;
2676
2677   if (!STMT_VINFO_DATA_REF (stmt_info))
2678     return false;
2679
2680
2681   if (!vec_stmt) /* transformation not required.  */
2682     {
2683       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2684       return true;
2685     }
2686
2687   /** Transform.  **/
2688
2689   if (vect_debug_details (NULL))
2690     fprintf (dump_file, "transform store");
2691
2692   alignment_support_cheme = vect_supportable_dr_alignment (dr);
2693   gcc_assert (alignment_support_cheme);
2694   gcc_assert (alignment_support_cheme = dr_aligned);  /* FORNOW */
2695
2696   /* Handle use - get the vectorized def from the defining stmt.  */
2697   vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2698
2699   /* Handle def.  */
2700   /* FORNOW: make sure the data reference is aligned.  */
2701   vect_align_data_ref (stmt);
2702   data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2703   data_ref = build_fold_indirect_ref (data_ref);
2704
2705   /* Arguments are ready. create the new vector stmt.  */
2706   *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2707   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2708
2709   return true;
2710 }
2711
2712
2713 /* vectorizable_load.
2714
2715    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2716    can be vectorized.
2717    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2718    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2719    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2720
2721 static bool
2722 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2723 {
2724   tree scalar_dest;
2725   tree vec_dest = NULL;
2726   tree data_ref = NULL;
2727   tree op;
2728   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2729   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2730   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2731   tree new_temp;
2732   int mode;
2733   tree init_addr;
2734   tree new_stmt;
2735   tree dummy;
2736   basic_block new_bb;
2737   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2738   edge pe = loop_preheader_edge (loop);
2739   enum dr_alignment_support alignment_support_cheme;
2740
2741   /* Is vectorizable load? */
2742
2743   if (TREE_CODE (stmt) != MODIFY_EXPR)
2744     return false;
2745
2746   scalar_dest = TREE_OPERAND (stmt, 0);
2747   if (TREE_CODE (scalar_dest) != SSA_NAME)
2748     return false;
2749
2750   op = TREE_OPERAND (stmt, 1);
2751   if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2752     return false;
2753
2754   if (!STMT_VINFO_DATA_REF (stmt_info))
2755     return false;
2756
2757   mode = (int) TYPE_MODE (vectype);
2758
2759   /* FORNOW. In some cases can vectorize even if data-type not supported
2760     (e.g. - data copies).  */
2761   if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2762     {
2763       if (vect_debug_details (loop))
2764         fprintf (dump_file, "Aligned load, but unsupported type.");
2765       return false;
2766     }
2767
2768   if (!vec_stmt) /* transformation not required.  */
2769     {
2770       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2771       return true;
2772     }
2773
2774   /** Transform.  **/
2775
2776   if (vect_debug_details (NULL))
2777     fprintf (dump_file, "transform load.");
2778
2779   alignment_support_cheme = vect_supportable_dr_alignment (dr);
2780   gcc_assert (alignment_support_cheme);
2781
2782   if (alignment_support_cheme == dr_aligned
2783       || alignment_support_cheme == dr_unaligned_supported)
2784     {
2785       /* Create:
2786          p = initial_addr;
2787          indx = 0;
2788          loop {
2789            vec_dest = *(p);
2790            indx = indx + 1;
2791          }
2792       */
2793
2794       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2795       data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2796       if (aligned_access_p (dr))
2797         data_ref = build_fold_indirect_ref (data_ref);
2798       else
2799         {
2800           int mis = DR_MISALIGNMENT (dr);
2801           tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
2802           tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
2803           data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2804         }
2805       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2806       new_temp = make_ssa_name (vec_dest, new_stmt);
2807       TREE_OPERAND (new_stmt, 0) = new_temp;
2808       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2809     }
2810   else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2811     {
2812       /* Create:
2813          p1 = initial_addr;
2814          msq_init = *(floor(p1))
2815          p2 = initial_addr + VS - 1;
2816          magic = have_builtin ? builtin_result : initial_address;
2817          indx = 0;
2818          loop {
2819            p2' = p2 + indx * vectype_size
2820            lsq = *(floor(p2'))
2821            vec_dest = realign_load (msq, lsq, magic)
2822            indx = indx + 1;
2823            msq = lsq;
2824          }
2825       */
2826
2827       tree offset;
2828       tree magic;
2829       tree phi_stmt;
2830       tree msq_init;
2831       tree msq, lsq;
2832       tree dataref_ptr;
2833       tree params;
2834
2835       /* <1> Create msq_init = *(floor(p1)) in the loop preheader  */
2836       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2837       data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2838                                            &init_addr, true);
2839       data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2840       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2841       new_temp = make_ssa_name (vec_dest, new_stmt);
2842       TREE_OPERAND (new_stmt, 0) = new_temp;
2843       new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2844       gcc_assert (!new_bb);
2845       msq_init = TREE_OPERAND (new_stmt, 0);
2846
2847
2848       /* <2> Create lsq = *(floor(p2')) in the loop  */
2849       offset = build_int_cst (integer_type_node,
2850                               GET_MODE_NUNITS (TYPE_MODE (vectype)));
2851       offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2852       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2853       dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2854       data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2855       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2856       new_temp = make_ssa_name (vec_dest, new_stmt);
2857       TREE_OPERAND (new_stmt, 0) = new_temp;
2858       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2859       lsq = TREE_OPERAND (new_stmt, 0);
2860
2861
2862       /* <3> */
2863       if (targetm.vectorize.builtin_mask_for_load)
2864         {
2865           /* Create permutation mask, if required, in loop preheader.  */
2866           tree builtin_decl;
2867           params = build_tree_list (NULL_TREE, init_addr);
2868           vec_dest = vect_create_destination_var (scalar_dest, vectype);
2869           builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2870           new_stmt = build_function_call_expr (builtin_decl, params);
2871           new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2872           new_temp = make_ssa_name (vec_dest, new_stmt);
2873           TREE_OPERAND (new_stmt, 0) = new_temp;
2874           new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2875           gcc_assert (!new_bb);
2876           magic = TREE_OPERAND (new_stmt, 0);
2877
2878           /* Since we have just created a CALL_EXPR, we may need to
2879              rename call-clobbered variables.  */
2880           mark_call_clobbered_vars_to_rename ();
2881         }
2882       else
2883         {
2884           /* Use current address instead of init_addr for reduced reg pressure.
2885            */
2886           magic = dataref_ptr;
2887         }
2888
2889
2890       /* <4> Create msq = phi <msq_init, lsq> in loop  */
2891       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2892       msq = make_ssa_name (vec_dest, NULL_TREE);
2893       phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2894       SSA_NAME_DEF_STMT (msq) = phi_stmt;
2895       add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
2896       add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
2897
2898
2899       /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop  */
2900       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2901       new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2902       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2903       new_temp = make_ssa_name (vec_dest, new_stmt);
2904       TREE_OPERAND (new_stmt, 0) = new_temp;
2905       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2906     }
2907   else
2908     gcc_unreachable ();
2909
2910   *vec_stmt = new_stmt;
2911   return true;
2912 }
2913
2914
2915 /* Function vect_supportable_dr_alignment
2916
2917    Return whether the data reference DR is supported with respect to its
2918    alignment.  */
2919
2920 static enum dr_alignment_support
2921 vect_supportable_dr_alignment (struct data_reference *dr)
2922 {
2923   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2924   enum machine_mode mode = (int) TYPE_MODE (vectype);
2925
2926   if (aligned_access_p (dr))
2927     return dr_aligned;
2928
2929   /* Possibly unaligned access.  */
2930
2931   if (DR_IS_READ (dr))
2932     {
2933       if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2934           && (!targetm.vectorize.builtin_mask_for_load
2935               || targetm.vectorize.builtin_mask_for_load ()))
2936         return dr_unaligned_software_pipeline;
2937
2938       if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
2939         /* Can't software pipeline the loads, but can at least do them.  */
2940         return dr_unaligned_supported;
2941     }
2942
2943   /* Unsupported.  */
2944   return dr_unaligned_unsupported;
2945 }
2946
2947
2948 /* Function vect_transform_stmt.
2949
2950    Create a vectorized stmt to replace STMT, and insert it at BSI.  */
2951
2952 static bool
2953 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2954 {
2955   bool is_store = false;
2956   tree vec_stmt = NULL_TREE;
2957   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2958   bool done;
2959
2960   switch (STMT_VINFO_TYPE (stmt_info))
2961     {
2962     case op_vec_info_type:
2963       done = vectorizable_operation (stmt, bsi, &vec_stmt);
2964       gcc_assert (done);
2965       break;
2966
2967     case assignment_vec_info_type:
2968       done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2969       gcc_assert (done);
2970       break;
2971
2972     case load_vec_info_type:
2973       done = vectorizable_load (stmt, bsi, &vec_stmt);
2974       gcc_assert (done);
2975       break;
2976
2977     case store_vec_info_type:
2978       done = vectorizable_store (stmt, bsi, &vec_stmt);
2979       gcc_assert (done);
2980       is_store = true;
2981       break;
2982     default:
2983       if (vect_debug_details (NULL))
2984         fprintf (dump_file, "stmt not supported.");
2985       gcc_unreachable ();
2986     }
2987
2988   STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2989
2990   return is_store;
2991 }
2992
2993
2994 /* This function builds ni_name = number of iterations loop executes
2995    on the loop preheader.  */
2996
2997 static tree
2998 vect_build_loop_niters (loop_vec_info loop_vinfo)
2999 {
3000   tree ni_name, stmt, var;
3001   edge pe;
3002   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3003   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
3004
3005   var = create_tmp_var (TREE_TYPE (ni), "niters");
3006   add_referenced_tmp_var (var);
3007   ni_name = force_gimple_operand (ni, &stmt, false, var);
3008
3009   pe = loop_preheader_edge (loop);
3010   if (stmt)
3011     {
3012       basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3013       gcc_assert (!new_bb);
3014     }
3015
3016   return ni_name;
3017 }
3018
3019
3020 /* This function generates the following statements:
3021
3022  ni_name = number of iterations loop executes
3023  ratio = ni_name / vf
3024  ratio_mult_vf_name = ratio * vf
3025
3026  and places them at the loop preheader edge.  */
3027
3028 static void
3029 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
3030                                  tree *ni_name_ptr,
3031                                  tree *ratio_mult_vf_name_ptr,
3032                                  tree *ratio_name_ptr)
3033 {
3034
3035   edge pe;
3036   basic_block new_bb;
3037   tree stmt, ni_name;
3038   tree var;
3039   tree ratio_name;
3040   tree ratio_mult_vf_name;
3041   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3042   tree ni = LOOP_VINFO_NITERS (loop_vinfo);
3043   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3044   tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
3045
3046   pe = loop_preheader_edge (loop);
3047
3048   /* Generate temporary variable that contains
3049      number of iterations loop executes.  */
3050
3051   ni_name = vect_build_loop_niters (loop_vinfo);
3052
3053   /* Create: ratio = ni >> log2(vf) */
3054
3055   var = create_tmp_var (TREE_TYPE (ni), "bnd");
3056   add_referenced_tmp_var (var);
3057   ratio_name = make_ssa_name (var, NULL_TREE);
3058   stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
3059            build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
3060   SSA_NAME_DEF_STMT (ratio_name) = stmt;
3061
3062   pe = loop_preheader_edge (loop);
3063   new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3064   gcc_assert (!new_bb);
3065
3066   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
3067
3068   var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
3069   add_referenced_tmp_var (var);
3070   ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
3071   stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
3072            build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
3073   SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
3074
3075   pe = loop_preheader_edge (loop);
3076   new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3077   gcc_assert (!new_bb);
3078
3079   *ni_name_ptr = ni_name;
3080   *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
3081   *ratio_name_ptr = ratio_name;
3082
3083   return;
3084 }
3085
3086
3087 /*   Function vect_update_ivs_after_vectorizer.
3088
3089      "Advance" the induction variables of LOOP to the value they should take
3090      after the execution of LOOP.  This is currently necessary because the
3091      vectorizer does not handle induction variables that are used after the
3092      loop.  Such a situation occurs when the last iterations of LOOP are
3093      peeled, because:
3094      1. We introduced new uses after LOOP for IVs that were not originally used
3095         after LOOP: the IVs of LOOP are now used by an epilog loop.
3096      2. LOOP is going to be vectorized; this means that it will iterate N/VF
3097         times, whereas the loop IVs should be bumped N times.
3098
3099      Input:
3100      - LOOP - a loop that is going to be vectorized. The last few iterations
3101               of LOOP were peeled.
3102      - NITERS - the number of iterations that LOOP executes (before it is
3103                 vectorized). i.e, the number of times the ivs should be bumped.
3104      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
3105                   coming out from LOOP on which there are uses of the LOOP ivs
3106                   (this is the path from LOOP->exit to epilog_loop->preheader).
3107
3108                   The new definitions of the ivs are placed in LOOP->exit.
3109                   The phi args associated with the edge UPDATE_E in the bb
3110                   UPDATE_E->dest are updated accordingly.
3111
3112      Assumption 1: Like the rest of the vectorizer, this function assumes
3113      a single loop exit that has a single predecessor.
3114
3115      Assumption 2: The phi nodes in the LOOP header and in update_bb are
3116      organized in the same order.
3117
3118      Assumption 3: The access function of the ivs is simple enough (see
3119      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
3120
3121      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
3122      coming out of LOOP on which the ivs of LOOP are used (this is the path
3123      that leads to the epilog loop; other paths skip the epilog loop).  This
3124      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
3125      needs to have its phis updated.
3126  */
3127
3128 static void
3129 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e)
3130 {
3131   basic_block exit_bb = loop->exit_edges[0]->dest;
3132   tree phi, phi1;
3133   basic_block update_bb = update_e->dest;
3134
3135   /* gcc_assert (vect_can_advance_ivs_p (loop)); */
3136
3137   /* Make sure there exists a single-predecessor exit bb:  */
3138   gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
3139
3140   for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
3141        phi && phi1;
3142        phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
3143     {
3144       tree access_fn = NULL;
3145       tree evolution_part;
3146       tree init_expr;
3147       tree step_expr;
3148       tree var, stmt, ni, ni_name;
3149       block_stmt_iterator last_bsi;
3150
3151       /* Skip virtual phi's.  */
3152       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3153         {
3154           if (vect_debug_details (NULL))
3155             fprintf (dump_file, "virtual phi. skip.");
3156           continue;
3157         }
3158
3159       access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
3160       gcc_assert (access_fn);
3161       evolution_part =
3162          unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
3163       gcc_assert (evolution_part != NULL_TREE);
3164
3165       /* FORNOW: We do not support IVs whose evolution function is a polynomial
3166          of degree >= 2 or exponential.  */
3167       gcc_assert (!tree_is_chrec (evolution_part));
3168
3169       step_expr = evolution_part;
3170       init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
3171                                                                loop->num));
3172
3173       ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
3174                   build2 (MULT_EXPR, TREE_TYPE (niters),
3175                        niters, step_expr), init_expr);
3176
3177       var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
3178       add_referenced_tmp_var (var);
3179
3180       ni_name = force_gimple_operand (ni, &stmt, false, var);
3181
3182       /* Insert stmt into exit_bb.  */
3183       last_bsi = bsi_last (exit_bb);
3184       if (stmt)
3185         bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
3186
3187       /* Fix phi expressions in the successor bb.  */
3188       gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
3189                   PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
3190       SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
3191     }
3192 }
3193
3194
3195 /* Function vect_do_peeling_for_loop_bound
3196
3197    Peel the last iterations of the loop represented by LOOP_VINFO.
3198    The peeled iterations form a new epilog loop.  Given that the loop now
3199    iterates NITERS times, the new epilog loop iterates
3200    NITERS % VECTORIZATION_FACTOR times.
3201
3202    The original loop will later be made to iterate
3203    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).  */
3204
3205 static void
3206 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
3207                                 struct loops *loops)
3208 {
3209
3210   tree ni_name, ratio_mult_vf_name;
3211   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3212   struct loop *new_loop;
3213   edge update_e;
3214 #ifdef ENABLE_CHECKING
3215   int loop_num;
3216 #endif
3217
3218   if (vect_debug_details (NULL))
3219     fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
3220
3221   /* Generate the following variables on the preheader of original loop:
3222
3223      ni_name = number of iteration the original loop executes
3224      ratio = ni_name / vf
3225      ratio_mult_vf_name = ratio * vf  */
3226   vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3227                                    &ratio_mult_vf_name, ratio);
3228
3229   /* Update loop info.  */
3230   loop->pre_header = loop_preheader_edge (loop)->src;
3231   loop->pre_header_edges[0] = loop_preheader_edge (loop);
3232
3233 #ifdef ENABLE_CHECKING
3234   loop_num  = loop->num;
3235 #endif
3236   new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
3237                                             ratio_mult_vf_name, ni_name, false);
3238 #ifdef ENABLE_CHECKING
3239   gcc_assert (new_loop);
3240   gcc_assert (loop_num == loop->num);
3241   slpeel_verify_cfg_after_peeling (loop, new_loop);
3242 #endif
3243
3244   /* A guard that controls whether the new_loop is to be executed or skipped
3245      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
3246      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
3247      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
3248      is on the path where the LOOP IVs are used and need to be updated.  */
3249
3250   if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3251     update_e = EDGE_PRED (new_loop->pre_header, 0);
3252   else
3253     update_e = EDGE_PRED (new_loop->pre_header, 1);
3254
3255   /* Update IVs of original loop as if they were advanced
3256      by ratio_mult_vf_name steps.  */
3257   vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e);
3258
3259   /* After peeling we have to reset scalar evolution analyzer.  */
3260   scev_reset ();
3261
3262   return;
3263 }
3264
3265
3266 /* Function vect_gen_niters_for_prolog_loop
3267
3268    Set the number of iterations for the loop represented by LOOP_VINFO
3269    to the minimum between LOOP_NITERS (the original iteration count of the loop)
3270    and the misalignment of DR - the first data reference recorded in
3271    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
3272    this loop, the data reference DR will refer to an aligned location.
3273
3274    The following computation is generated:
3275
3276    compute address misalignment in bytes:
3277    addr_mis = addr & (vectype_size - 1)
3278
3279    prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3280
3281    (elem_size = element type size; an element is the scalar element
3282         whose type is the inner type of the vectype)  */
3283
3284 static tree
3285 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3286 {
3287   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3288   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3289   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3290   tree var, stmt;
3291   tree iters, iters_name;
3292   edge pe;
3293   basic_block new_bb;
3294   tree dr_stmt = DR_STMT (dr);
3295   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3296   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3297   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3298   tree elem_misalign;
3299   tree byte_misalign;
3300   tree new_stmts = NULL_TREE;
3301   tree start_addr =
3302         vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3303   tree ptr_type = TREE_TYPE (start_addr);
3304   tree size = TYPE_SIZE (ptr_type);
3305   tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
3306   tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3307   tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
3308   tree niters_type = TREE_TYPE (loop_niters);
3309   tree elem_size_log =
3310         build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
3311   tree vf_tree = build_int_cst (unsigned_type_node, vf);
3312
3313   pe = loop_preheader_edge (loop);
3314   new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3315   gcc_assert (!new_bb);
3316
3317   /* Create:  byte_misalign = addr & (vectype_size - 1)  */
3318   byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3319
3320   /* Create:  elem_misalign = byte_misalign / element_size  */
3321   elem_misalign =
3322         build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
3323
3324   /* Create:  (niters_type) (VF - elem_misalign)&(VF - 1)  */
3325   iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
3326   iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
3327   iters = fold_convert (niters_type, iters);
3328
3329   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
3330   /* If the loop bound is known at compile time we already verified that it is
3331      greater than vf; since the misalignment ('iters') is at most vf, there's
3332      no need to generate the MIN_EXPR in this case.  */
3333   if (TREE_CODE (loop_niters) != INTEGER_CST)
3334     iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3335
3336   var = create_tmp_var (niters_type, "prolog_loop_niters");
3337   add_referenced_tmp_var (var);
3338   iters_name = force_gimple_operand (iters, &stmt, false, var);
3339
3340   /* Insert stmt on loop preheader edge.  */
3341   pe = loop_preheader_edge (loop);
3342   if (stmt)
3343     {
3344       basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3345       gcc_assert (!new_bb);
3346     }
3347
3348   return iters_name;
3349 }
3350
3351
3352 /* Function vect_update_inits_of_dr
3353
3354    NITERS iterations were peeled from LOOP.  DR represents a data reference
3355    in LOOP.  This function updates the information recorded in DR to
3356    account for the fact that the first NITERS iterations had already been
3357    executed.  Specifically, it updates the OFFSET field of stmt_info.  */
3358
3359 static void
3360 vect_update_inits_of_dr (struct data_reference *dr, tree niters)
3361 {
3362   stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
3363   tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
3364
3365   niters = fold (build2 (MULT_EXPR, TREE_TYPE (niters), niters,
3366                          STMT_VINFO_VECT_STEP (stmt_info)));
3367   offset = fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters));
3368   STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
3369 }
3370
3371
3372 /* Function vect_update_inits_of_drs
3373
3374    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3375    This function updates the information recorded for the data references in
3376    the loop to account for the fact that the first NITERS iterations had
3377    already been executed.  Specifically, it updates the initial_condition of the
3378    access_function of all the data_references in the loop.  */
3379
3380 static void
3381 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3382 {
3383   unsigned int i;
3384   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3385   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3386
3387   if (dump_file && (dump_flags & TDF_DETAILS))
3388     fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3389
3390   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3391     {
3392       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3393       vect_update_inits_of_dr (dr, niters);
3394     }
3395
3396   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3397     {
3398       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3399       vect_update_inits_of_dr (dr, niters);
3400     }
3401 }
3402
3403
3404 /* Function vect_do_peeling_for_alignment
3405
3406    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3407    'niters' is set to the misalignment of one of the data references in the
3408    loop, thereby forcing it to refer to an aligned location at the beginning
3409    of the execution of this loop.  The data reference for which we are
3410    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
3411
3412 static void
3413 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3414 {
3415   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3416   tree niters_of_prolog_loop, ni_name;
3417   tree n_iters;
3418   struct loop *new_loop;
3419
3420   if (vect_debug_details (NULL))
3421     fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3422
3423   ni_name = vect_build_loop_niters (loop_vinfo);
3424   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3425
3426   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
3427   new_loop =
3428         slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3429                                        niters_of_prolog_loop, ni_name, true);
3430 #ifdef ENABLE_CHECKING
3431   gcc_assert (new_loop);
3432   slpeel_verify_cfg_after_peeling (new_loop, loop);
3433 #endif
3434
3435   /* Update number of times loop executes.  */
3436   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3437   LOOP_VINFO_NITERS (loop_vinfo) =
3438     build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3439
3440   /* Update the init conditions of the access functions of all data refs.  */
3441   vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3442
3443   /* After peeling we have to reset scalar evolution analyzer.  */
3444   scev_reset ();
3445
3446   return;
3447 }
3448
3449
3450 /* Function vect_transform_loop.
3451
3452    The analysis phase has determined that the loop is vectorizable.
3453    Vectorize the loop - created vectorized stmts to replace the scalar
3454    stmts in the loop, and update the loop exit condition.  */
3455
3456 static void
3457 vect_transform_loop (loop_vec_info loop_vinfo,
3458                      struct loops *loops ATTRIBUTE_UNUSED)
3459 {
3460   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3461   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3462   int nbbs = loop->num_nodes;
3463   block_stmt_iterator si;
3464   int i;
3465   tree ratio = NULL;
3466   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3467
3468   if (vect_debug_details (NULL))
3469     fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3470
3471
3472   /* Peel the loop if there are data refs with unknown alignment.
3473      Only one data ref with unknown store is allowed.  */
3474
3475   if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3476     vect_do_peeling_for_alignment (loop_vinfo, loops);
3477
3478   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3479      compile time constant), or it is a constant that doesn't divide by the
3480      vectorization factor, then an epilog loop needs to be created.
3481      We therefore duplicate the loop: the original loop will be vectorized,
3482      and will compute the first (n/VF) iterations. The second copy of the loop
3483      will remain scalar and will compute the remaining (n%VF) iterations.
3484      (VF is the vectorization factor).  */
3485
3486   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3487       || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3488           && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3489     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3490   else
3491     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3492                 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3493
3494   /* 1) Make sure the loop header has exactly two entries
3495      2) Make sure we have a preheader basic block.  */
3496
3497   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3498
3499   loop_split_edge_with (loop_preheader_edge (loop), NULL);
3500
3501
3502   /* FORNOW: the vectorizer supports only loops which body consist
3503      of one basic block (header + empty latch). When the vectorizer will
3504      support more involved loop forms, the order by which the BBs are
3505      traversed need to be reconsidered.  */
3506
3507   for (i = 0; i < nbbs; i++)
3508     {
3509       basic_block bb = bbs[i];
3510
3511       for (si = bsi_start (bb); !bsi_end_p (si);)
3512         {
3513           tree stmt = bsi_stmt (si);
3514           stmt_vec_info stmt_info;
3515           bool is_store;
3516
3517           if (vect_debug_details (NULL))
3518             {
3519               fprintf (dump_file, "------>vectorizing statement: ");
3520               print_generic_expr (dump_file, stmt, TDF_SLIM);
3521             }
3522           stmt_info = vinfo_for_stmt (stmt);
3523           gcc_assert (stmt_info);
3524           if (!STMT_VINFO_RELEVANT_P (stmt_info))
3525             {
3526               bsi_next (&si);
3527               continue;
3528             }
3529 #ifdef ENABLE_CHECKING
3530           /* FORNOW: Verify that all stmts operate on the same number of
3531                      units and no inner unrolling is necessary.  */
3532           gcc_assert
3533                 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3534                  == vectorization_factor);
3535 #endif
3536           /* -------- vectorize statement ------------ */
3537           if (vect_debug_details (NULL))
3538             fprintf (dump_file, "transform statement.");
3539
3540           is_store = vect_transform_stmt (stmt, &si);
3541           if (is_store)
3542             {
3543               /* free the attached stmt_vec_info and remove the stmt.  */
3544               stmt_ann_t ann = stmt_ann (stmt);
3545               free (stmt_info);
3546               set_stmt_info (ann, NULL);
3547               bsi_remove (&si);
3548               continue;
3549             }
3550
3551           bsi_next (&si);
3552         }                       /* stmts in BB */
3553     }                           /* BBs in loop */
3554
3555   slpeel_make_loop_iterate_ntimes (loop, ratio);
3556
3557   if (vect_debug_details (loop))
3558     fprintf (dump_file,"Success! loop vectorized.");
3559   if (vect_debug_stats (loop))
3560     fprintf (dump_file, "LOOP VECTORIZED.");
3561 }
3562
3563
3564 /* Function vect_is_simple_use.
3565
3566    Input:
3567    LOOP - the loop that is being vectorized.
3568    OPERAND - operand of a stmt in LOOP.
3569    DEF - the defining stmt in case OPERAND is an SSA_NAME.
3570
3571    Returns whether a stmt with OPERAND can be vectorized.
3572    Supportable operands are constants, loop invariants, and operands that are
3573    defined by the current iteration of the loop. Unsupportable operands are
3574    those that are defined by a previous iteration of the loop (as is the case
3575    in reduction/induction computations).  */
3576
3577 static bool
3578 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3579 {
3580   tree def_stmt;
3581   basic_block bb;
3582
3583   if (def)
3584     *def = NULL_TREE;
3585
3586   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3587     return true;
3588
3589   if (TREE_CODE (operand) != SSA_NAME)
3590     return false;
3591
3592   def_stmt = SSA_NAME_DEF_STMT (operand);
3593   if (def_stmt == NULL_TREE )
3594     {
3595       if (vect_debug_details (NULL))
3596         fprintf (dump_file, "no def_stmt.");
3597       return false;
3598     }
3599
3600   /* empty stmt is expected only in case of a function argument.
3601      (Otherwise - we expect a phi_node or a modify_expr).  */
3602   if (IS_EMPTY_STMT (def_stmt))
3603     {
3604       tree arg = TREE_OPERAND (def_stmt, 0);
3605       if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3606         return true;
3607       if (vect_debug_details (NULL))
3608         {
3609           fprintf (dump_file, "Unexpected empty stmt: ");
3610           print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3611         }
3612       return false;
3613     }
3614
3615   /* phi_node inside the loop indicates an induction/reduction pattern.
3616      This is not supported yet.  */
3617   bb = bb_for_stmt (def_stmt);
3618   if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3619     {
3620       if (vect_debug_details (NULL))
3621         fprintf (dump_file, "reduction/induction - unsupported.");
3622       return false; /* FORNOW: not supported yet.  */
3623     }
3624
3625   /* Expecting a modify_expr or a phi_node.  */
3626   if (TREE_CODE (def_stmt) == MODIFY_EXPR
3627       || TREE_CODE (def_stmt) == PHI_NODE)
3628     {
3629       if (def)
3630         *def = def_stmt;
3631       return true;
3632     }
3633
3634   return false;
3635 }
3636
3637
3638 /* Function vect_analyze_operations.
3639
3640    Scan the loop stmts and make sure they are all vectorizable.  */
3641
3642 static bool
3643 vect_analyze_operations (loop_vec_info loop_vinfo)
3644 {
3645   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3646   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3647   int nbbs = loop->num_nodes;
3648   block_stmt_iterator si;
3649   unsigned int vectorization_factor = 0;
3650   int i;
3651   bool ok;
3652   tree scalar_type;
3653
3654   if (vect_debug_details (NULL))
3655     fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3656
3657   for (i = 0; i < nbbs; i++)
3658     {
3659       basic_block bb = bbs[i];
3660
3661       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3662         {
3663           tree stmt = bsi_stmt (si);
3664           unsigned int nunits;
3665           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3666           tree vectype;
3667
3668           if (vect_debug_details (NULL))
3669             {
3670               fprintf (dump_file, "==> examining statement: ");
3671               print_generic_expr (dump_file, stmt, TDF_SLIM);
3672             }
3673
3674           gcc_assert (stmt_info);
3675
3676           /* skip stmts which do not need to be vectorized.
3677              this is expected to include:
3678              - the COND_EXPR which is the loop exit condition
3679              - any LABEL_EXPRs in the loop
3680              - computations that are used only for array indexing or loop
3681              control  */
3682
3683           if (!STMT_VINFO_RELEVANT_P (stmt_info))
3684             {
3685               if (vect_debug_details (NULL))
3686                 fprintf (dump_file, "irrelevant.");
3687               continue;
3688             }
3689
3690           if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3691             {
3692               if (vect_debug_stats (loop) || vect_debug_details (loop))
3693                 {
3694                   fprintf (dump_file, "not vectorized: vector stmt in loop:");
3695                   print_generic_expr (dump_file, stmt, TDF_SLIM);
3696                 }
3697               return false;
3698             }
3699
3700           if (STMT_VINFO_DATA_REF (stmt_info))
3701             scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3702           else if (TREE_CODE (stmt) == MODIFY_EXPR)
3703             scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3704           else
3705             scalar_type = TREE_TYPE (stmt);
3706
3707           if (vect_debug_details (NULL))
3708             {
3709               fprintf (dump_file, "get vectype for scalar type:  ");
3710               print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3711             }
3712
3713           vectype = get_vectype_for_scalar_type (scalar_type);
3714           if (!vectype)
3715             {
3716               if (vect_debug_stats (loop) || vect_debug_details (loop))
3717                 {
3718                   fprintf (dump_file, "not vectorized: unsupported data-type ");
3719                   print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3720                 }
3721               return false;
3722             }
3723
3724           if (vect_debug_details (NULL))
3725             {
3726               fprintf (dump_file, "vectype: ");
3727               print_generic_expr (dump_file, vectype, TDF_SLIM);
3728             }
3729           STMT_VINFO_VECTYPE (stmt_info) = vectype;
3730
3731           ok = (vectorizable_operation (stmt, NULL, NULL)
3732                 || vectorizable_assignment (stmt, NULL, NULL)
3733                 || vectorizable_load (stmt, NULL, NULL)
3734                 || vectorizable_store (stmt, NULL, NULL));
3735
3736           if (!ok)
3737             {
3738               if (vect_debug_stats (loop) || vect_debug_details (loop))
3739                 {
3740                   fprintf (dump_file, "not vectorized: stmt not supported: ");
3741                   print_generic_expr (dump_file, stmt, TDF_SLIM);
3742                 }
3743               return false;
3744             }
3745
3746           nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3747           if (vect_debug_details (NULL))
3748             fprintf (dump_file, "nunits = %d", nunits);
3749
3750           if (vectorization_factor)
3751             {
3752               /* FORNOW: don't allow mixed units.
3753                  This restriction will be relaxed in the future.  */
3754               if (nunits != vectorization_factor)
3755                 {
3756                   if (vect_debug_stats (loop) || vect_debug_details (loop))
3757                     fprintf (dump_file, "not vectorized: mixed data-types");
3758                   return false;
3759                 }
3760             }
3761           else
3762             vectorization_factor = nunits;
3763
3764 #ifdef ENABLE_CHECKING
3765           gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3766                         * vectorization_factor == UNITS_PER_SIMD_WORD);
3767 #endif
3768         }
3769     }
3770
3771   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
3772
3773   if (vectorization_factor <= 1)
3774     {
3775       if (vect_debug_stats (loop) || vect_debug_details (loop))
3776         fprintf (dump_file, "not vectorized: unsupported data-type");
3777       return false;
3778     }
3779   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3780
3781   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3782     fprintf (dump_file,
3783         "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3784         vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3785
3786   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3787       && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
3788     {
3789       if (vect_debug_stats (loop) || vect_debug_details (loop))
3790         fprintf (dump_file, "not vectorized: iteration count too small.");
3791       return false;
3792     }
3793
3794   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3795       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3796     {
3797       if (vect_debug_stats (loop) || vect_debug_details (loop))
3798         fprintf (dump_file, "epilog loop required.");
3799       if (!vect_can_advance_ivs_p (loop))
3800         {
3801           if (vect_debug_stats (loop) || vect_debug_details (loop))
3802             fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3803           return false;
3804         }
3805       if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3806         {
3807           if (vect_debug_stats (loop) || vect_debug_details (loop))
3808             fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3809           return false;
3810         }
3811     }
3812
3813   return true;
3814 }
3815
3816
3817 /* Function exist_non_indexing_operands_for_use_p
3818
3819    USE is one of the uses attached to STMT. Check if USE is
3820    used in STMT for anything other than indexing an array.  */
3821
3822 static bool
3823 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3824 {
3825   tree operand;
3826   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3827
3828   /* USE corresponds to some operand in STMT. If there is no data
3829      reference in STMT, then any operand that corresponds to USE
3830      is not indexing an array.  */
3831   if (!STMT_VINFO_DATA_REF (stmt_info))
3832     return true;
3833
3834   /* STMT has a data_ref. FORNOW this means that its of one of
3835      the following forms:
3836      -1- ARRAY_REF = var
3837      -2- var = ARRAY_REF
3838      (This should have been verified in analyze_data_refs).
3839
3840      'var' in the second case corresponds to a def, not a use,
3841      so USE cannot correspond to any operands that are not used
3842      for array indexing.
3843
3844      Therefore, all we need to check is if STMT falls into the
3845      first case, and whether var corresponds to USE.  */
3846
3847   if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3848     return false;
3849
3850   operand = TREE_OPERAND (stmt, 1);
3851
3852   if (TREE_CODE (operand) != SSA_NAME)
3853     return false;
3854
3855   if (operand == use)
3856     return true;
3857
3858   return false;
3859 }
3860
3861
3862 /* Function vect_is_simple_iv_evolution.
3863
3864    FORNOW: A simple evolution of an induction variables in the loop is
3865    considered a polynomial evolution with constant step.  */
3866
3867 static bool
3868 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3869                                 tree * step, bool strict)
3870 {
3871   tree init_expr;
3872   tree step_expr;
3873
3874   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3875
3876   /* When there is no evolution in this loop, the evolution function
3877      is not "simple".  */
3878   if (evolution_part == NULL_TREE)
3879     return false;
3880
3881   /* When the evolution is a polynomial of degree >= 2
3882      the evolution function is not "simple".  */
3883   if (tree_is_chrec (evolution_part))
3884     return false;
3885
3886   step_expr = evolution_part;
3887   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
3888
3889   if (vect_debug_details (NULL))
3890     {
3891       fprintf (dump_file, "step: ");
3892       print_generic_expr (dump_file, step_expr, TDF_SLIM);
3893       fprintf (dump_file, ",  init: ");
3894       print_generic_expr (dump_file, init_expr, TDF_SLIM);
3895     }
3896
3897   *init = init_expr;
3898   *step = step_expr;
3899
3900   if (TREE_CODE (step_expr) != INTEGER_CST)
3901     {
3902       if (vect_debug_details (NULL))
3903         fprintf (dump_file, "step unknown.");
3904       return false;
3905     }
3906
3907   if (strict)
3908     if (!integer_onep (step_expr))
3909       {
3910         if (vect_debug_details (NULL))
3911           print_generic_expr (dump_file, step_expr, TDF_SLIM);
3912         return false;
3913       }
3914
3915   return true;
3916 }
3917
3918
3919 /* Function vect_analyze_scalar_cycles.
3920
3921    Examine the cross iteration def-use cycles of scalar variables, by
3922    analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3923    cycles that they represent do not impede vectorization.
3924
3925    FORNOW: Reduction as in the following loop, is not supported yet:
3926               loop1:
3927               for (i=0; i<N; i++)
3928                  sum += a[i];
3929            The cross-iteration cycle corresponding to variable 'sum' will be
3930            considered too complicated and will impede vectorization.
3931
3932    FORNOW: Induction as in the following loop, is not supported yet:
3933               loop2:
3934               for (i=0; i<N; i++)
3935                  a[i] = i;
3936
3937            However, the following loop *is* vectorizable:
3938               loop3:
3939               for (i=0; i<N; i++)
3940                  a[i] = b[i];
3941
3942            In both loops there exists a def-use cycle for the variable i:
3943               loop: i_2 = PHI (i_0, i_1)
3944                     a[i_2] = ...;
3945                     i_1 = i_2 + 1;
3946                     GOTO loop;
3947
3948            The evolution of the above cycle is considered simple enough,
3949            however, we also check that the cycle does not need to be
3950            vectorized, i.e - we check that the variable that this cycle
3951            defines is only used for array indexing or in stmts that do not
3952            need to be vectorized. This is not the case in loop2, but it
3953            *is* the case in loop3.  */
3954
3955 static bool
3956 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3957 {
3958   tree phi;
3959   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3960   basic_block bb = loop->header;
3961   tree dummy;
3962
3963   if (vect_debug_details (NULL))
3964     fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3965
3966   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3967     {
3968       tree access_fn = NULL;
3969
3970       if (vect_debug_details (NULL))
3971         {
3972           fprintf (dump_file, "Analyze phi: ");
3973           print_generic_expr (dump_file, phi, TDF_SLIM);
3974         }
3975
3976       /* Skip virtual phi's. The data dependences that are associated with
3977          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
3978
3979       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3980         {
3981           if (vect_debug_details (NULL))
3982             fprintf (dump_file, "virtual phi. skip.");
3983           continue;
3984         }
3985
3986       /* Analyze the evolution function.  */
3987
3988       /* FORNOW: The only scalar cross-iteration cycles that we allow are
3989          those of loop induction variables; This property is verified here.
3990
3991          Furthermore, if that induction variable is used in an operation
3992          that needs to be vectorized (i.e, is not solely used to index
3993          arrays and check the exit condition) - we do not support its
3994          vectorization yet. This property is verified in vect_is_simple_use,
3995          during vect_analyze_operations.  */
3996
3997       access_fn = /* instantiate_parameters
3998                      (loop,*/
3999          analyze_scalar_evolution (loop, PHI_RESULT (phi));
4000
4001       if (!access_fn)
4002         {
4003           if (vect_debug_stats (loop) || vect_debug_details (loop))
4004             fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
4005           return false;
4006         }
4007
4008       if (vect_debug_details (NULL))
4009         {
4010            fprintf (dump_file, "Access function of PHI: ");
4011            print_generic_expr (dump_file, access_fn, TDF_SLIM);
4012         }
4013
4014       if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
4015                                         &dummy, false))
4016         {
4017           if (vect_debug_stats (loop) || vect_debug_details (loop))
4018             fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
4019           return false;
4020         }
4021     }
4022
4023   return true;
4024 }
4025
4026
4027 /* Function vect_analyze_data_ref_dependence.
4028
4029    Return TRUE if there (might) exist a dependence between a memory-reference
4030    DRA and a memory-reference DRB.  */
4031
4032 static bool
4033 vect_analyze_data_ref_dependence (struct data_reference *dra,
4034                                   struct data_reference *drb,
4035                                   struct loop *loop)
4036 {
4037   bool differ_p;
4038   struct data_dependence_relation *ddr;
4039
4040   if (!array_base_name_differ_p (dra, drb, &differ_p))
4041     {
4042       if (vect_debug_stats (loop) || vect_debug_details (loop))
4043         {
4044           fprintf (dump_file,
4045                 "not vectorized: can't determine dependence between: ");
4046           print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
4047           fprintf (dump_file, " and ");
4048           print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
4049         }
4050       return true;
4051     }
4052
4053   if (differ_p)
4054     return false;
4055
4056   ddr = initialize_data_dependence_relation (dra, drb);
4057   compute_affine_dependence (ddr);
4058
4059   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4060     return false;
4061
4062   if (vect_debug_stats (loop) || vect_debug_details (loop))
4063     {
4064       fprintf (dump_file,
4065         "not vectorized: possible dependence between data-refs ");
4066       print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
4067       fprintf (dump_file, " and ");
4068       print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
4069     }
4070
4071   return true;
4072 }
4073
4074
4075 /* Function vect_analyze_data_ref_dependences.
4076
4077    Examine all the data references in the loop, and make sure there do not
4078    exist any data dependences between them.
4079
4080    TODO: dependences which distance is greater than the vectorization factor
4081          can be ignored.  */
4082
4083 static bool
4084 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
4085 {
4086   unsigned int i, j;
4087   varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4088   varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4089   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4090
4091   /* Examine store-store (output) dependences.  */
4092
4093   if (vect_debug_details (NULL))
4094     fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
4095
4096   if (vect_debug_details (NULL))
4097     fprintf (dump_file, "compare all store-store pairs.");
4098
4099   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
4100     {
4101       for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4102         {
4103           struct data_reference *dra =
4104             VARRAY_GENERIC_PTR (loop_write_refs, i);
4105           struct data_reference *drb =
4106             VARRAY_GENERIC_PTR (loop_write_refs, j);
4107           if (vect_analyze_data_ref_dependence (dra, drb, loop))
4108             return false;
4109         }
4110     }
4111
4112   /* Examine load-store (true/anti) dependences.  */
4113
4114   if (vect_debug_details (NULL))
4115     fprintf (dump_file, "compare all load-store pairs.");
4116
4117   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
4118     {
4119       for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4120         {
4121           struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
4122           struct data_reference *drb =
4123             VARRAY_GENERIC_PTR (loop_write_refs, j);
4124           if (vect_analyze_data_ref_dependence (dra, drb, loop))
4125             return false;
4126         }
4127     }
4128
4129   return true;
4130 }
4131
4132
4133 /* Function vect_compute_data_ref_alignment
4134
4135    Compute the misalignment of the data reference DR.
4136
4137    Output:
4138    1. If during the misalignment computation it is found that the data reference
4139       cannot be vectorized then false is returned.
4140    2. DR_MISALIGNMENT (DR) is defined.
4141
4142    FOR NOW: No analysis is actually performed. Misalignment is calculated
4143    only for trivial cases. TODO.  */
4144
4145 static bool
4146 vect_compute_data_ref_alignment (struct data_reference *dr)
4147 {
4148   tree stmt = DR_STMT (dr);
4149   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4150   tree ref = DR_REF (dr);
4151   tree vectype;
4152   tree base, alignment;
4153   bool base_aligned_p;
4154   tree misalign;
4155
4156   if (vect_debug_details (NULL))
4157     fprintf (dump_file, "vect_compute_data_ref_alignment:");
4158
4159   /* Initialize misalignment to unknown.  */
4160   DR_MISALIGNMENT (dr) = -1;
4161
4162   misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
4163   base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
4164   base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4165   vectype = STMT_VINFO_VECTYPE (stmt_info);
4166
4167   if (!misalign)
4168     {
4169       if (vect_debug_details (NULL))
4170         {
4171           fprintf (dump_file, "Unknown alignment for access: ");
4172           print_generic_expr (dump_file, base, TDF_SLIM);
4173         }
4174       return true;
4175     }
4176
4177   if (!base_aligned_p)
4178     {
4179       if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4180         {
4181           if (vect_debug_details (NULL))
4182             {
4183               fprintf (dump_file, "can't force alignment of ref: ");
4184               print_generic_expr (dump_file, ref, TDF_SLIM);
4185             }
4186           return true;
4187         }
4188
4189       /* Force the alignment of the decl.
4190          NOTE: This is the only change to the code we make during
4191          the analysis phase, before deciding to vectorize the loop.  */
4192       if (vect_debug_details (NULL))
4193         fprintf (dump_file, "force alignment");
4194       DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4195       DECL_USER_ALIGN (base) = 1;
4196     }
4197
4198   /* At this point we assume that the base is aligned.  */
4199   gcc_assert (base_aligned_p
4200               || (TREE_CODE (base) == VAR_DECL
4201                   && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4202
4203   /* Alignment required, in bytes:  */
4204   alignment = size_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
4205
4206   /* Modulo alignment.  */
4207   misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
4208   if (tree_int_cst_sgn (misalign) < 0)
4209     {
4210       /* Negative misalignment value.  */
4211       if (vect_debug_details (NULL))
4212         fprintf (dump_file, "unexpected misalign value");
4213       return false;
4214     }
4215
4216   DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
4217
4218   if (vect_debug_details (NULL))
4219     fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4220
4221   return true;
4222 }
4223
4224
4225 /* Function vect_compute_data_refs_alignment
4226
4227    Compute the misalignment of data references in the loop.
4228    This pass may take place at function granularity instead of at loop
4229    granularity.
4230
4231    FOR NOW: No analysis is actually performed. Misalignment is calculated
4232    only for trivial cases. TODO.  */
4233
4234 static bool
4235 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4236 {
4237   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4238   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4239   unsigned int i;
4240
4241   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4242     {
4243       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4244       if (!vect_compute_data_ref_alignment (dr))
4245         return false;
4246     }
4247
4248   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4249     {
4250       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4251       if (!vect_compute_data_ref_alignment (dr))
4252         return false;
4253     }
4254
4255   return true;
4256 }
4257
4258
4259 /* Function vect_enhance_data_refs_alignment
4260
4261    This pass will use loop versioning and loop peeling in order to enhance
4262    the alignment of data references in the loop.
4263
4264    FOR NOW: we assume that whatever versioning/peeling takes place, only the
4265    original loop is to be vectorized; Any other loops that are created by
4266    the transformations performed in this pass - are not supposed to be
4267    vectorized. This restriction will be relaxed.  */
4268
4269 static void
4270 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4271 {
4272   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4273   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4274   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4275   unsigned int i;
4276
4277   /*
4278      This pass will require a cost model to guide it whether to apply peeling
4279      or versioning or a combination of the two. For example, the scheme that
4280      intel uses when given a loop with several memory accesses, is as follows:
4281      choose one memory access ('p') which alignment you want to force by doing
4282      peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4283      other accesses are not necessarily aligned, or (2) use loop versioning to
4284      generate one loop in which all accesses are aligned, and another loop in
4285      which only 'p' is necessarily aligned.
4286
4287      ("Automatic Intra-Register Vectorization for the Intel Architecture",
4288       Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4289       Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4290
4291      Devising a cost model is the most critical aspect of this work. It will
4292      guide us on which access to peel for, whether to use loop versioning, how
4293      many versions to create, etc. The cost model will probably consist of
4294      generic considerations as well as target specific considerations (on
4295      powerpc for example, misaligned stores are more painful than misaligned
4296      loads).
4297
4298      Here is the general steps involved in alignment enhancements:
4299
4300      -- original loop, before alignment analysis:
4301         for (i=0; i<N; i++){
4302           x = q[i];                     # DR_MISALIGNMENT(q) = unknown
4303           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
4304         }
4305
4306      -- After vect_compute_data_refs_alignment:
4307         for (i=0; i<N; i++){
4308           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4309           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
4310         }
4311
4312      -- Possibility 1: we do loop versioning:
4313      if (p is aligned) {
4314         for (i=0; i<N; i++){    # loop 1A
4315           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4316           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
4317         }
4318      }
4319      else {
4320         for (i=0; i<N; i++){    # loop 1B
4321           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4322           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
4323         }
4324      }
4325
4326      -- Possibility 2: we do loop peeling:
4327      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
4328         x = q[i];
4329         p[i] = y;
4330      }
4331      for (i = 3; i < N; i++){   # loop 2A
4332         x = q[i];                       # DR_MISALIGNMENT(q) = 0
4333         p[i] = y;                       # DR_MISALIGNMENT(p) = unknown
4334      }
4335
4336      -- Possibility 3: combination of loop peeling and versioning:
4337      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
4338         x = q[i];
4339         p[i] = y;
4340      }
4341      if (p is aligned) {
4342         for (i = 3; i<N; i++){  # loop 3A
4343           x = q[i];                     # DR_MISALIGNMENT(q) = 0
4344           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
4345         }
4346      }
4347      else {
4348         for (i = 3; i<N; i++){  # loop 3B
4349           x = q[i];                     # DR_MISALIGNMENT(q) = 0
4350           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
4351         }
4352      }
4353
4354      These loops are later passed to loop_transform to be vectorized. The
4355      vectorizer will use the alignment information to guide the transformation
4356      (whether to generate regular loads/stores, or with special handling for
4357      misalignment).
4358    */
4359
4360   /* (1) Peeling to force alignment.  */
4361
4362   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4363      Considerations:
4364      + How many accesses will become aligned due to the peeling
4365      - How many accesses will become unaligned due to the peeling,
4366        and the cost of misaligned accesses.
4367      - The cost of peeling (the extra runtime checks, the increase
4368        in code size).
4369
4370      The scheme we use FORNOW: peel to force the alignment of the first
4371      misaligned store in the loop.
4372      Rationale: misaligned stores are not yet supported.
4373
4374      TODO: Use a better cost model.  */
4375
4376   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4377     {
4378       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4379       if (!aligned_access_p (dr))
4380         {
4381           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4382           LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4383           break;
4384         }
4385     }
4386
4387   if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4388     {
4389       if (vect_debug_details (loop))
4390         fprintf (dump_file, "Peeling for alignment will not be applied.");
4391       return;
4392     }
4393   else
4394     if (vect_debug_details (loop))
4395       fprintf (dump_file, "Peeling for alignment will be applied.");
4396
4397
4398   /* (1.2) Update the alignment info according to the peeling factor.
4399            If the misalignment of the DR we peel for is M, then the
4400            peeling factor is VF - M, and the misalignment of each access DR_i
4401            in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4402            If the misalignment of the DR we peel for is unknown, then the
4403            misalignment of each access DR_i in the loop is also unknown.
4404
4405            FORNOW: set the misalignment of the accesses to unknown even
4406                    if the peeling factor is known at compile time.
4407
4408            TODO: - if the peeling factor is known at compile time, use that
4409                    when updating the misalignment info of the loop DRs.
4410                  - consider accesses that are known to have the same
4411                    alignment, even if that alignment is unknown.  */
4412
4413   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4414     {
4415       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4416       if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4417         {
4418           DR_MISALIGNMENT (dr) = 0;
4419           if (vect_debug_details (loop) || vect_debug_stats (loop))
4420             fprintf (dump_file, "Alignment of access forced using peeling.");
4421         }
4422       else
4423         DR_MISALIGNMENT (dr) = -1;
4424     }
4425   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4426     {
4427       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4428       if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4429         {
4430           DR_MISALIGNMENT (dr) = 0;
4431           if (vect_debug_details (loop) || vect_debug_stats (loop))
4432             fprintf (dump_file, "Alignment of access forced using peeling.");
4433         }
4434       else
4435         DR_MISALIGNMENT (dr) = -1;
4436     }
4437 }
4438
4439
4440 /* Function vect_analyze_data_refs_alignment
4441
4442    Analyze the alignment of the data-references in the loop.
4443    FOR NOW: Until support for misliagned accesses is in place, only if all
4444    accesses are aligned can the loop be vectorized. This restriction will be
4445    relaxed.  */
4446
4447 static bool
4448 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4449 {
4450   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4451   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4452   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4453   enum dr_alignment_support supportable_dr_alignment;
4454   unsigned int i;
4455
4456   if (vect_debug_details (NULL))
4457     fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4458
4459
4460   /* This pass may take place at function granularity instead of at loop
4461      granularity.  */
4462
4463   if (!vect_compute_data_refs_alignment (loop_vinfo))
4464     {
4465       if (vect_debug_details (loop) || vect_debug_stats (loop))
4466         fprintf (dump_file,
4467                  "not vectorized: can't calculate alignment for data ref.");
4468       return false;
4469     }
4470
4471
4472   /* This pass will decide on using loop versioning and/or loop peeling in
4473      order to enhance the alignment of data references in the loop.  */
4474
4475   vect_enhance_data_refs_alignment (loop_vinfo);
4476
4477
4478   /* Finally, check that all the data references in the loop can be
4479      handled with respect to their alignment.  */
4480
4481   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4482     {
4483       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4484       supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4485       if (!supportable_dr_alignment)
4486         {
4487           if (vect_debug_details (loop) || vect_debug_stats (loop))
4488             fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4489           return false;
4490         }
4491       if (supportable_dr_alignment != dr_aligned
4492           && (vect_debug_details (loop) || vect_debug_stats (loop)))
4493         fprintf (dump_file, "Vectorizing an unaligned access.");
4494     }
4495   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4496     {
4497       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4498       supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4499       if (!supportable_dr_alignment)
4500         {
4501           if (vect_debug_details (loop) || vect_debug_stats (loop))
4502             fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4503           return false;
4504         }
4505       if (supportable_dr_alignment != dr_aligned
4506           && (vect_debug_details (loop) || vect_debug_stats (loop)))
4507         fprintf (dump_file, "Vectorizing an unaligned access.");
4508     }
4509
4510   return true;
4511 }
4512
4513
4514 /* Function vect_analyze_data_ref_access.
4515
4516    Analyze the access pattern of the data-reference DR. For now, a data access
4517    has to consecutive to be considered vectorizable.  */
4518
4519 static bool
4520 vect_analyze_data_ref_access (struct data_reference *dr)
4521 {
4522   tree stmt = DR_STMT (dr);
4523   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4524   tree step = STMT_VINFO_VECT_STEP (stmt_info);
4525   tree scalar_type = TREE_TYPE (DR_REF (dr));
4526
4527   if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
4528     {
4529       if (vect_debug_details (NULL))
4530         fprintf (dump_file, "not consecutive access");
4531       return false;
4532     }
4533   return true;
4534 }
4535
4536
4537 /* Function vect_analyze_data_ref_accesses.
4538
4539    Analyze the access pattern of all the data references in the loop.
4540
4541    FORNOW: the only access pattern that is considered vectorizable is a
4542            simple step 1 (consecutive) access.
4543
4544    FORNOW: handle only arrays and pointer accesses.  */
4545
4546 static bool
4547 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4548 {
4549   unsigned int i;
4550   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4551   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4552
4553   if (vect_debug_details (NULL))
4554     fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4555
4556   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4557     {
4558       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4559       bool ok = vect_analyze_data_ref_access (dr);
4560       if (!ok)
4561         {
4562           if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4563               || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4564             fprintf (dump_file, "not vectorized: complicated access pattern.");
4565           return false;
4566         }
4567     }
4568
4569   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4570     {
4571       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4572       bool ok = vect_analyze_data_ref_access (dr);
4573       if (!ok)
4574         {
4575           if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4576               || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4577             fprintf (dump_file, "not vectorized: complicated access pattern.");
4578           return false;
4579         }
4580     }
4581
4582   return true;
4583 }
4584
4585
4586 /* Function vect_analyze_pointer_ref_access.
4587
4588    Input:
4589    STMT - a stmt that contains a data-ref
4590    MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4591
4592    If the data-ref access is vectorizable, return a data_reference structure
4593    that represents it (DR). Otherwise - return NULL.  */
4594
4595 static struct data_reference *
4596 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4597 {
4598   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4599   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4600   tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4601   tree init, step;
4602   tree reftype, innertype;
4603   tree indx_access_fn;
4604   int loopnum = loop->num;
4605   struct data_reference *dr;
4606
4607   if (!access_fn)
4608     {
4609       if (vect_debug_stats (loop) || vect_debug_details (loop))
4610         fprintf (dump_file, "not vectorized: complicated pointer access.");
4611       return NULL;
4612     }
4613
4614   if (vect_debug_details (NULL))
4615     {
4616       fprintf (dump_file, "Access function of ptr: ");
4617       print_generic_expr (dump_file, access_fn, TDF_SLIM);
4618     }
4619
4620   if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4621     {
4622       if (vect_debug_stats (loop) || vect_debug_details (loop))
4623         fprintf (dump_file, "not vectorized: pointer access is not simple.");
4624       return NULL;
4625     }
4626
4627   STRIP_NOPS (init);
4628
4629   if (!expr_invariant_in_loop_p (loop, init))
4630     {
4631       if (vect_debug_stats (loop) || vect_debug_details (loop))
4632         fprintf (dump_file,
4633                  "not vectorized: initial condition is not loop invariant.");
4634       return NULL;
4635     }
4636
4637   if (TREE_CODE (step) != INTEGER_CST)
4638     {
4639       if (vect_debug_stats (loop) || vect_debug_details (loop))
4640         fprintf (dump_file,
4641                 "not vectorized: non constant step for pointer access.");
4642       return NULL;
4643     }
4644
4645   reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4646   if (TREE_CODE (reftype) != POINTER_TYPE)
4647     {
4648       if (vect_debug_stats (loop) || vect_debug_details (loop))
4649         fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4650       return NULL;
4651     }
4652
4653   reftype = TREE_TYPE (init);
4654   if (TREE_CODE (reftype) != POINTER_TYPE)
4655     {
4656       if (vect_debug_stats (loop) || vect_debug_details (loop))
4657         fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4658       return NULL;
4659     }
4660
4661   innertype = TREE_TYPE (reftype);
4662   if (tree_int_cst_compare (TYPE_SIZE_UNIT (innertype), step))
4663     {
4664       /* FORNOW: support only consecutive access */
4665       if (vect_debug_stats (loop) || vect_debug_details (loop))
4666         fprintf (dump_file, "not vectorized: non consecutive access.");
4667       return NULL;
4668     }
4669
4670   STMT_VINFO_VECT_STEP (stmt_info) = fold_convert (sizetype, step);
4671   if (TREE_CODE (init) == PLUS_EXPR
4672       || TREE_CODE (init) == MINUS_EXPR)
4673     STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4674       fold (size_binop (TREE_CODE (init), size_zero_node,
4675                         fold_convert (sizetype, TREE_OPERAND (init, 1))));
4676   else
4677     STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = size_zero_node;
4678
4679   indx_access_fn =
4680         build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4681   if (vect_debug_details (NULL))
4682     {
4683       fprintf (dump_file, "Access function of ptr indx: ");
4684       print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4685     }
4686   dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4687   return dr;
4688 }
4689
4690
4691 /* Function vect_get_memtag_and_dr.
4692
4693    The function returns the relevant variable for memory tag (for aliasing
4694    purposes). Also data reference structure DR is created.
4695
4696    This function handles three kinds of MEMREF:
4697
4698    It is called from vect_analyze_data_refs with a MEMREF that is either an
4699    ARRAY_REF or an INDIRECT_REF (this is category 1 - "recursion begins").
4700    It builds a DR for them using vect_get_base_and_offset, and calls itself
4701    recursively to retrieve the relevant memtag for the MEMREF, "peeling" the
4702    MEMREF along the way. During the recursive calls, the function may be called
4703    with a MEMREF for which the recursion has to continue - PLUS_EXPR,
4704    MINUS_EXPR, INDIRECT_REF (category 2 - "recursion continues"),
4705    and/or with a MEMREF for which a memtag can be trivially obtained - VAR_DECL
4706    and SSA_NAME (this is category 3 - "recursion stop condition").
4707
4708    When the MEMREF falls into category 1 there is still no data reference struct
4709    (DR) available. It is created by this function, and then, along the recursion,
4710    MEMREF will fall into category 2 or 3, in which case a DR will have already
4711    been created, but the analysis continues to retrieve the MEMTAG.
4712
4713    Input:
4714    MEMREF - data reference in STMT
4715    IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4716
4717    Output:
4718    DR - data_reference struct for MEMREF
4719    return value - the relevant variable for memory tag (for aliasing purposes).
4720
4721 */
4722
4723 static tree
4724 vect_get_memtag_and_dr (tree memref, tree stmt, bool is_read,
4725                         loop_vec_info loop_vinfo,
4726                         tree vectype, struct data_reference **dr)
4727 {
4728   tree symbl, oprnd0, oprnd1;
4729   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4730   tree offset, misalign, step;
4731   tree ref_to_be_analyzed, tag, dr_base;
4732   struct data_reference *new_dr;
4733   bool base_aligned_p;
4734
4735   if (*dr)
4736     {
4737       /* Category 3: recursion stop condition.  */
4738       /* (1) A DR already exists. We only need to get the relevant memtag for
4739          MEMREF, the rest of the data was already initialized.  */
4740
4741       switch (TREE_CODE (memref))
4742         {
4743           /* (1.1) Stop condition: find the relevant memtag and return.  */
4744         case SSA_NAME:
4745           symbl = SSA_NAME_VAR (memref);
4746           tag = get_var_ann (symbl)->type_mem_tag;
4747           if (!tag)
4748             {
4749               tree ptr = TREE_OPERAND (DR_REF ((*dr)), 0);
4750               if (TREE_CODE (ptr) == SSA_NAME)
4751                 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
4752             }
4753           if (!tag)
4754             {
4755               if (vect_debug_details (NULL))
4756                 fprintf (dump_file, "not vectorized: no memtag for ref.");
4757               return NULL_TREE;
4758             }
4759           return tag;
4760
4761         case VAR_DECL:
4762         case PARM_DECL:
4763           return memref;
4764
4765           /* Category 2: recursion continues.  */
4766           /* (1.2) A recursive call to find the relevant memtag is required.  */
4767         case INDIRECT_REF:
4768           symbl = TREE_OPERAND (memref, 0);
4769           break; /* For recursive call.  */
4770
4771         case COMPONENT_REF:
4772           /* Could have recorded more accurate information -
4773              i.e, the actual FIELD_DECL that is being referenced -
4774              but later passes expect VAR_DECL as the nmt.  */
4775           /* Fall through.  */
4776
4777         case ADDR_EXPR:
4778           symbl = STMT_VINFO_VECT_DR_BASE (stmt_info);
4779           break; /* For recursive call.  */
4780
4781         case PLUS_EXPR:
4782         case MINUS_EXPR:
4783           /* Although DR exists, we have to call the function recursively to
4784              build MEMTAG for such expression. This is handled below.  */
4785           oprnd0 = TREE_OPERAND (memref, 0);
4786           oprnd1 = TREE_OPERAND (memref, 1);
4787
4788           STRIP_NOPS (oprnd1);
4789            /* Supported plus/minus expressions are of the form
4790              {address_base + offset}, such that address_base is of type
4791              POINTER/ARRAY, and offset is either an INTEGER_CST of type POINTER,
4792              or it's not of type POINTER/ARRAY.
4793              TODO: swap operands if {offset + address_base}.  */
4794           if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4795                && TREE_CODE (oprnd1) != INTEGER_CST)
4796               || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4797             return NULL_TREE;
4798
4799           symbl = oprnd0;
4800           break; /* For recursive call.  */
4801
4802         default:
4803           return NULL_TREE;
4804         }
4805     }
4806   else
4807     {
4808       /* Category 1: recursion begins.  */
4809       /* (2) A DR does not exist yet and must be built, followed by a
4810          recursive call to get the relevant memtag for MEMREF.  */
4811
4812       switch (TREE_CODE (memref))
4813         {
4814         case INDIRECT_REF:
4815           new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4816           if (!new_dr)
4817             return NULL_TREE;
4818           *dr = new_dr;
4819           symbl = DR_BASE_NAME (new_dr);
4820           ref_to_be_analyzed = DR_BASE_NAME (new_dr);
4821           break;
4822
4823         case ARRAY_REF:
4824           new_dr = analyze_array (stmt, memref, is_read);
4825           *dr = new_dr;
4826           symbl = DR_BASE_NAME (new_dr);
4827           ref_to_be_analyzed = memref;
4828           break;
4829
4830         default:
4831           /* TODO: Support data-refs of form a[i].p for unions and single
4832              field structures.  */
4833           return NULL_TREE;
4834         }
4835
4836       offset = size_zero_node;
4837       misalign = size_zero_node;
4838       step = size_zero_node;
4839
4840       /* Analyze data-ref, find its base, initial offset from the base, step,
4841          and alignment.  */
4842       dr_base = vect_get_base_and_offset (new_dr, ref_to_be_analyzed,
4843                                           vectype, loop_vinfo, &offset,
4844                                           &misalign, &step, &base_aligned_p);
4845       if (!dr_base)
4846         return NULL_TREE;
4847
4848       /* Initialize information according to above analysis.  */
4849       /* Since offset and step of a pointer can be also set in
4850          vect_analyze_pointer_ref_access, we combine the values here. */
4851       if (STMT_VINFO_VECT_INIT_OFFSET (stmt_info))
4852         STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4853           fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
4854                         STMT_VINFO_VECT_INIT_OFFSET (stmt_info)));
4855       else
4856         STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
4857
4858       if (step && STMT_VINFO_VECT_STEP (stmt_info))
4859         STMT_VINFO_VECT_STEP (stmt_info) =
4860           size_binop (PLUS_EXPR, step, STMT_VINFO_VECT_STEP (stmt_info));
4861       else
4862         STMT_VINFO_VECT_STEP (stmt_info) = step;
4863
4864       STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned_p;
4865       STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
4866       STMT_VINFO_VECT_DR_BASE (stmt_info) = dr_base;
4867     }
4868
4869   if (!symbl)
4870     return NULL_TREE;
4871   /* Recursive call to retrieve the relevant memtag.  */
4872   tag = vect_get_memtag_and_dr (symbl, stmt, is_read, loop_vinfo, vectype, dr);
4873   return tag;
4874 }
4875
4876
4877
4878 /* Function vect_analyze_data_refs.
4879
4880    Find all the data references in the loop.
4881
4882    The general structure of the analysis of data refs in the vectorizer is as
4883    follows:
4884    1- vect_analyze_data_refs(loop):
4885       Find and analyze all data-refs in the loop:
4886           foreach ref
4887              ref_stmt.memtag =  vect_get_memtag_and_dr (ref)
4888    1.1- vect_get_memtag_and_dr(ref):
4889       Analyze ref, and build a DR (data_referece struct) for it;
4890       call vect_get_base_and_offset to compute base, initial_offset,
4891       step and alignment. Set ref_stmt.base, ref_stmt.initial_offset,
4892       ref_stmt.alignment, and ref_stmt.step accordingly.
4893    1.1.1- vect_get_base_and_offset():
4894       Calculate base, initial_offset, step and alignment.
4895       For ARRAY_REFs and COMPONENT_REFs use call get_inner_reference.
4896    2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR
4897    3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
4898    4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
4899
4900    FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4901            which base is really an array (not a pointer) and which alignment
4902            can be forced. This restriction will be relaxed.  */
4903
4904 static bool
4905 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4906 {
4907   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4908   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4909   int nbbs = loop->num_nodes;
4910   block_stmt_iterator si;
4911   int j;
4912   struct data_reference *dr;
4913
4914   if (vect_debug_details (NULL))
4915     fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4916
4917   for (j = 0; j < nbbs; j++)
4918     {
4919       basic_block bb = bbs[j];
4920       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4921         {
4922           bool is_read = false;
4923           tree stmt = bsi_stmt (si);
4924           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4925           v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4926           v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4927           vuse_optype vuses = STMT_VUSE_OPS (stmt);
4928           varray_type *datarefs = NULL;
4929           int nvuses, nv_may_defs, nv_must_defs;
4930           tree memref = NULL;
4931           tree symbl;
4932           tree scalar_type, vectype;
4933
4934           /* Assumption: there exists a data-ref in stmt, if and only if
4935              it has vuses/vdefs.  */
4936
4937           if (!vuses && !v_may_defs && !v_must_defs)
4938             continue;
4939
4940           nvuses = NUM_VUSES (vuses);
4941           nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4942           nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4943
4944           if (nvuses && (nv_may_defs || nv_must_defs))
4945             {
4946               if (vect_debug_details (NULL))
4947                 {
4948                   fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4949                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4950                 }
4951               return false;
4952             }
4953
4954           if (TREE_CODE (stmt) != MODIFY_EXPR)
4955             {
4956               if (vect_debug_details (NULL))
4957                 {
4958                   fprintf (dump_file, "unexpected vops in stmt: ");
4959                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4960                 }
4961               return false;
4962             }
4963
4964           if (vuses)
4965             {
4966               memref = TREE_OPERAND (stmt, 1);
4967               datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4968               is_read = true;
4969             }
4970           else /* vdefs */
4971             {
4972               memref = TREE_OPERAND (stmt, 0);
4973               datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4974               is_read = false;
4975             }
4976
4977           scalar_type = TREE_TYPE (memref);
4978           vectype = get_vectype_for_scalar_type (scalar_type);
4979           if (!vectype)
4980             {
4981               if (vect_debug_details (NULL))
4982                 {
4983                   fprintf (dump_file, "no vectype for stmt: ");
4984                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4985                   fprintf (dump_file, " scalar_type: ");
4986                   print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4987                 }
4988               /* It is not possible to vectorize this data reference.  */
4989               return false;
4990             }
4991           /* Analyze MEMREF. If it is of a supported form, build data_reference
4992              struct for it (DR) and find memtag for aliasing purposes.  */
4993           dr = NULL;
4994           symbl = vect_get_memtag_and_dr (memref, stmt, is_read, loop_vinfo,
4995                                           vectype, &dr);
4996           if (!symbl)
4997             {
4998               if (vect_debug_stats (loop) || vect_debug_details (loop))
4999                 {
5000                   fprintf (dump_file, "not vectorized: unhandled data ref: ");
5001                   print_generic_expr (dump_file, stmt, TDF_SLIM);
5002                 }
5003               return false;
5004             }
5005           STMT_VINFO_MEMTAG (stmt_info) = symbl;
5006           STMT_VINFO_VECTYPE (stmt_info) = vectype;
5007           VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5008           STMT_VINFO_DATA_REF (stmt_info) = dr;
5009         }
5010     }
5011
5012   return true;
5013 }
5014
5015
5016 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
5017
5018 /* Function vect_mark_relevant.
5019
5020    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
5021
5022 static void
5023 vect_mark_relevant (varray_type *worklist, tree stmt)
5024 {
5025   stmt_vec_info stmt_info;
5026
5027   if (vect_debug_details (NULL))
5028     fprintf (dump_file, "mark relevant.");
5029
5030   if (TREE_CODE (stmt) == PHI_NODE)
5031     {
5032       VARRAY_PUSH_TREE (*worklist, stmt);
5033       return;
5034     }
5035
5036   stmt_info = vinfo_for_stmt (stmt);
5037
5038   if (!stmt_info)
5039     {
5040       if (vect_debug_details (NULL))
5041         {
5042           fprintf (dump_file, "mark relevant: no stmt info!!.");
5043           print_generic_expr (dump_file, stmt, TDF_SLIM);
5044         }
5045       return;
5046     }
5047
5048   if (STMT_VINFO_RELEVANT_P (stmt_info))
5049     {
5050       if (vect_debug_details (NULL))
5051         fprintf (dump_file, "already marked relevant.");
5052       return;
5053     }
5054
5055   STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5056   VARRAY_PUSH_TREE (*worklist, stmt);
5057 }
5058
5059
5060 /* Function vect_stmt_relevant_p.
5061
5062    Return true if STMT in loop that is represented by LOOP_VINFO is
5063    "relevant for vectorization".
5064
5065    A stmt is considered "relevant for vectorization" if:
5066    - it has uses outside the loop.
5067    - it has vdefs (it alters memory).
5068    - control stmts in the loop (except for the exit condition).
5069
5070    CHECKME: what other side effects would the vectorizer allow?  */
5071
5072 static bool
5073 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5074 {
5075   v_may_def_optype v_may_defs;
5076   v_must_def_optype v_must_defs;
5077   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5078   int i;
5079   dataflow_t df;
5080   int num_uses;
5081
5082   /* cond stmt other than loop exit cond.  */
5083   if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5084     return true;
5085
5086   /* changing memory.  */
5087   v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5088   v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5089   if (v_may_defs || v_must_defs)
5090     {
5091       if (vect_debug_details (NULL))
5092         fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5093       return true;
5094     }
5095
5096   /* uses outside the loop.  */
5097   df = get_immediate_uses (stmt);
5098   num_uses = num_immediate_uses (df);
5099   for (i = 0; i < num_uses; i++)
5100     {
5101       tree use = immediate_use (df, i);
5102       basic_block bb = bb_for_stmt (use);
5103       if (!flow_bb_inside_loop_p (loop, bb))
5104         {
5105           if (vect_debug_details (NULL))
5106             fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5107           return true;
5108         }
5109     }
5110
5111   return false;
5112 }
5113
5114
5115 /* Function vect_mark_stmts_to_be_vectorized.
5116
5117    Not all stmts in the loop need to be vectorized. For example:
5118
5119      for i...
5120        for j...
5121    1.    T0 = i + j
5122    2.    T1 = a[T0]
5123
5124    3.    j = j + 1
5125
5126    Stmt 1 and 3 do not need to be vectorized, because loop control and
5127    addressing of vectorized data-refs are handled differently.
5128
5129    This pass detects such stmts.  */
5130
5131 static bool
5132 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5133 {
5134   varray_type worklist;
5135   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5136   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5137   unsigned int nbbs = loop->num_nodes;
5138   block_stmt_iterator si;
5139   tree stmt;
5140   stmt_ann_t ann;
5141   unsigned int i;
5142   int j;
5143   use_optype use_ops;
5144   stmt_vec_info stmt_info;
5145
5146   if (vect_debug_details (NULL))
5147     fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5148
5149   VARRAY_TREE_INIT (worklist, 64, "work list");
5150
5151   /* 1. Init worklist.  */
5152
5153   for (i = 0; i < nbbs; i++)
5154     {
5155       basic_block bb = bbs[i];
5156       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5157         {
5158           stmt = bsi_stmt (si);
5159
5160           if (vect_debug_details (NULL))
5161             {
5162               fprintf (dump_file, "init: stmt relevant? ");
5163               print_generic_expr (dump_file, stmt, TDF_SLIM);
5164             }
5165
5166           stmt_info = vinfo_for_stmt (stmt);
5167           STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5168
5169           if (vect_stmt_relevant_p (stmt, loop_vinfo))
5170             vect_mark_relevant (&worklist, stmt);
5171         }
5172     }
5173
5174
5175   /* 2. Process_worklist */
5176
5177   while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5178     {
5179       stmt = VARRAY_TOP_TREE (worklist);
5180       VARRAY_POP (worklist);
5181
5182       if (vect_debug_details (NULL))
5183         {
5184           fprintf (dump_file, "worklist: examine stmt: ");
5185           print_generic_expr (dump_file, stmt, TDF_SLIM);
5186         }
5187
5188       /* Examine the USES in this statement. Mark all the statements which
5189          feed this statement's uses as "relevant", unless the USE is used as
5190          an array index.  */
5191
5192       if (TREE_CODE (stmt) == PHI_NODE)
5193         {
5194           /* follow the def-use chain inside the loop.  */
5195           for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5196             {
5197               tree arg = PHI_ARG_DEF (stmt, j);
5198               tree def_stmt = NULL_TREE;
5199               basic_block bb;
5200               if (!vect_is_simple_use (arg, loop, &def_stmt))
5201                 {
5202                   if (vect_debug_details (NULL))
5203                     fprintf (dump_file, "worklist: unsupported use.");
5204                   varray_clear (worklist);
5205                   return false;
5206                 }
5207               if (!def_stmt)
5208                 continue;
5209
5210               if (vect_debug_details (NULL))
5211                 {
5212                   fprintf (dump_file, "worklist: def_stmt: ");
5213                   print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5214                 }
5215
5216               bb = bb_for_stmt (def_stmt);
5217               if (flow_bb_inside_loop_p (loop, bb))
5218                 vect_mark_relevant (&worklist, def_stmt);
5219             }
5220         }
5221
5222       ann = stmt_ann (stmt);
5223       use_ops = USE_OPS (ann);
5224
5225       for (i = 0; i < NUM_USES (use_ops); i++)
5226         {
5227           tree use = USE_OP (use_ops, i);
5228
5229           /* We are only interested in uses that need to be vectorized. Uses
5230              that are used for address computation are not considered relevant.
5231            */
5232           if (exist_non_indexing_operands_for_use_p (use, stmt))
5233             {
5234               tree def_stmt = NULL_TREE;
5235               basic_block bb;
5236               if (!vect_is_simple_use (use, loop, &def_stmt))
5237                 {
5238                   if (vect_debug_details (NULL))
5239                     fprintf (dump_file, "worklist: unsupported use.");
5240                   varray_clear (worklist);
5241                   return false;
5242                 }
5243
5244               if (!def_stmt)
5245                 continue;
5246
5247               if (vect_debug_details (NULL))
5248                 {
5249                   fprintf (dump_file, "worklist: examine use %d: ", i);
5250                   print_generic_expr (dump_file, use, TDF_SLIM);
5251                 }
5252
5253               bb = bb_for_stmt (def_stmt);
5254               if (flow_bb_inside_loop_p (loop, bb))
5255                 vect_mark_relevant (&worklist, def_stmt);
5256             }
5257         }
5258     }                           /* while worklist */
5259
5260   varray_clear (worklist);
5261   return true;
5262 }
5263
5264
5265 /* Function vect_can_advance_ivs_p
5266
5267    In case the number of iterations that LOOP iterates in unknown at compile
5268    time, an epilog loop will be generated, and the loop induction variables
5269    (IVs) will be "advanced" to the value they are supposed to take just before
5270    the epilog loop.  Here we check that the access function of the loop IVs
5271    and the expression that represents the loop bound are simple enough.
5272    These restrictions will be relaxed in the future.  */
5273
5274 static bool
5275 vect_can_advance_ivs_p (struct loop *loop)
5276 {
5277   basic_block bb = loop->header;
5278   tree phi;
5279
5280   /* Analyze phi functions of the loop header.  */
5281
5282   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5283     {
5284       tree access_fn = NULL;
5285       tree evolution_part;
5286
5287       if (vect_debug_details (NULL))
5288         {
5289           fprintf (dump_file, "Analyze phi: ");
5290           print_generic_expr (dump_file, phi, TDF_SLIM);
5291         }
5292
5293       /* Skip virtual phi's. The data dependences that are associated with
5294          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
5295
5296       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5297         {
5298           if (vect_debug_details (NULL))
5299             fprintf (dump_file, "virtual phi. skip.");
5300           continue;
5301         }
5302
5303       /* Analyze the evolution function.  */
5304
5305       access_fn = instantiate_parameters
5306         (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5307
5308       if (!access_fn)
5309         {
5310           if (vect_debug_details (NULL))
5311             fprintf (dump_file, "No Access function.");
5312           return false;
5313         }
5314
5315       if (vect_debug_details (NULL))
5316         {
5317           fprintf (dump_file, "Access function of PHI: ");
5318           print_generic_expr (dump_file, access_fn, TDF_SLIM);
5319         }
5320
5321       evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5322
5323       if (evolution_part == NULL_TREE)
5324         return false;
5325
5326       /* FORNOW: We do not transform initial conditions of IVs
5327          which evolution functions are a polynomial of degree >= 2.  */
5328
5329       if (tree_is_chrec (evolution_part))
5330         return false;
5331     }
5332
5333   return true;
5334 }
5335
5336
5337 /* Function vect_get_loop_niters.
5338
5339    Determine how many iterations the loop is executed.
5340    If an expression that represents the number of iterations
5341    can be constructed, place it in NUMBER_OF_ITERATIONS.
5342    Return the loop exit condition.  */
5343
5344 static tree
5345 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5346 {
5347   tree niters;
5348
5349   if (vect_debug_details (NULL))
5350     fprintf (dump_file, "\n<<get_loop_niters>>\n");
5351
5352   niters = number_of_iterations_in_loop (loop);
5353
5354   if (niters != NULL_TREE
5355       && niters != chrec_dont_know)
5356     {
5357       *number_of_iterations = niters;
5358
5359       if (vect_debug_details (NULL))
5360         {
5361           fprintf (dump_file, "==> get_loop_niters:" );
5362           print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5363         }
5364     }
5365
5366   return get_loop_exit_condition (loop);
5367 }
5368
5369
5370 /* Function vect_analyze_loop_form.
5371
5372    Verify the following restrictions (some may be relaxed in the future):
5373    - it's an inner-most loop
5374    - number of BBs = 2 (which are the loop header and the latch)
5375    - the loop has a pre-header
5376    - the loop has a single entry and exit
5377    - the loop exit condition is simple enough, and the number of iterations
5378      can be analyzed (a countable loop).  */
5379
5380 static loop_vec_info
5381 vect_analyze_loop_form (struct loop *loop)
5382 {
5383   loop_vec_info loop_vinfo;
5384   tree loop_cond;
5385   tree number_of_iterations = NULL;
5386   bool rescan = false;
5387
5388   if (vect_debug_details (loop))
5389     fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5390
5391   if (loop->inner
5392       || !loop->single_exit
5393       || loop->num_nodes != 2
5394       || EDGE_COUNT (loop->header->preds) != 2
5395       || loop->num_entries != 1)
5396     {
5397       if (vect_debug_stats (loop) || vect_debug_details (loop))
5398         {
5399           fprintf (dump_file, "not vectorized: bad loop form. ");
5400           if (loop->inner)
5401             fprintf (dump_file, "nested loop.");
5402           else if (!loop->single_exit)
5403             fprintf (dump_file, "multiple exits.");
5404           else if (loop->num_nodes != 2)
5405             fprintf (dump_file, "too many BBs in loop.");
5406           else if (EDGE_COUNT (loop->header->preds) != 2)
5407             fprintf (dump_file, "too many incoming edges.");
5408           else if (loop->num_entries != 1)
5409             fprintf (dump_file, "too many entries.");
5410         }
5411
5412       return NULL;
5413     }
5414
5415   /* We assume that the loop exit condition is at the end of the loop. i.e,
5416      that the loop is represented as a do-while (with a proper if-guard
5417      before the loop if needed), where the loop header contains all the
5418      executable statements, and the latch is empty.  */
5419   if (!empty_block_p (loop->latch))
5420     {
5421       if (vect_debug_stats (loop) || vect_debug_details (loop))
5422         fprintf (dump_file, "not vectorized: unexpectd loop form.");
5423       return NULL;
5424     }
5425
5426   /* Make sure we have a preheader basic block.  */
5427   if (!loop->pre_header)
5428     {
5429       rescan = true;
5430       loop_split_edge_with (loop_preheader_edge (loop), NULL);
5431     }
5432
5433   /* Make sure there exists a single-predecessor exit bb:  */
5434   if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5435     {
5436       rescan = true;
5437       loop_split_edge_with (loop->exit_edges[0], NULL);
5438     }
5439
5440   if (rescan)
5441     {
5442       flow_loop_scan (loop, LOOP_ALL);
5443       /* Flow loop scan does not update loop->single_exit field.  */
5444       loop->single_exit = loop->exit_edges[0];
5445     }
5446
5447   if (empty_block_p (loop->header))
5448     {
5449       if (vect_debug_stats (loop) || vect_debug_details (loop))
5450         fprintf (dump_file, "not vectorized: empty loop.");
5451       return NULL;
5452     }
5453
5454   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5455   if (!loop_cond)
5456     {
5457       if (vect_debug_stats (loop) || vect_debug_details (loop))
5458         fprintf (dump_file, "not vectorized: complicated exit condition.");
5459       return NULL;
5460     }
5461
5462   if (!number_of_iterations)
5463     {
5464       if (vect_debug_stats (loop) || vect_debug_details (loop))
5465         fprintf (dump_file,
5466                  "not vectorized: number of iterations cannot be computed.");
5467       return NULL;
5468     }
5469
5470   if (chrec_contains_undetermined (number_of_iterations))
5471     {
5472       if (vect_debug_details (NULL))
5473         fprintf (dump_file, "Infinite number of iterations.");
5474       return false;
5475     }
5476
5477   loop_vinfo = new_loop_vec_info (loop);
5478   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5479
5480   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5481     {
5482       if (vect_debug_details (loop))
5483         {
5484           fprintf (dump_file, "loop bound unknown.\n");
5485           fprintf (dump_file, "Symbolic number of iterations is ");
5486           print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5487         }
5488     }
5489   else
5490   if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5491     {
5492       if (vect_debug_stats (loop) || vect_debug_details (loop))
5493         fprintf (dump_file, "not vectorized: number of iterations = 0.");
5494       return NULL;
5495     }
5496
5497   LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5498
5499   return loop_vinfo;
5500 }
5501
5502
5503 /* Function vect_analyze_loop.
5504
5505    Apply a set of analyses on LOOP, and create a loop_vec_info struct
5506    for it. The different analyses will record information in the
5507    loop_vec_info struct.  */
5508
5509 static loop_vec_info
5510 vect_analyze_loop (struct loop *loop)
5511 {
5512   bool ok;
5513   loop_vec_info loop_vinfo;
5514
5515   if (vect_debug_details (NULL))
5516     fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5517
5518   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
5519
5520   loop_vinfo = vect_analyze_loop_form (loop);
5521   if (!loop_vinfo)
5522     {
5523       if (vect_debug_details (loop))
5524         fprintf (dump_file, "bad loop form.");
5525       return NULL;
5526     }
5527
5528   /* Find all data references in the loop (which correspond to vdefs/vuses)
5529      and analyze their evolution in the loop.
5530
5531      FORNOW: Handle only simple, array references, which
5532      alignment can be forced, and aligned pointer-references.  */
5533
5534   ok = vect_analyze_data_refs (loop_vinfo);
5535   if (!ok)
5536     {
5537       if (vect_debug_details (loop))
5538         fprintf (dump_file, "bad data references.");
5539       destroy_loop_vec_info (loop_vinfo);
5540       return NULL;
5541     }
5542
5543   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
5544
5545   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5546   if (!ok)
5547     {
5548       if (vect_debug_details (loop))
5549         fprintf (dump_file, "unexpected pattern.");
5550       if (vect_debug_details (loop))
5551         fprintf (dump_file, "not vectorized: unexpected pattern.");
5552       destroy_loop_vec_info (loop_vinfo);
5553       return NULL;
5554     }
5555
5556   /* Check that all cross-iteration scalar data-flow cycles are OK.
5557      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
5558
5559   ok = vect_analyze_scalar_cycles (loop_vinfo);
5560   if (!ok)
5561     {
5562       if (vect_debug_details (loop))
5563         fprintf (dump_file, "bad scalar cycle.");
5564       destroy_loop_vec_info (loop_vinfo);
5565       return NULL;
5566     }
5567
5568   /* Analyze data dependences between the data-refs in the loop.
5569      FORNOW: fail at the first data dependence that we encounter.  */
5570
5571   ok = vect_analyze_data_ref_dependences (loop_vinfo);
5572   if (!ok)
5573     {
5574       if (vect_debug_details (loop))
5575         fprintf (dump_file, "bad data dependence.");
5576       destroy_loop_vec_info (loop_vinfo);
5577       return NULL;
5578     }
5579
5580   /* Analyze the access patterns of the data-refs in the loop (consecutive,
5581      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
5582
5583   ok = vect_analyze_data_ref_accesses (loop_vinfo);
5584   if (!ok)
5585     {
5586       if (vect_debug_details (loop))
5587         fprintf (dump_file, "bad data access.");
5588       destroy_loop_vec_info (loop_vinfo);
5589       return NULL;
5590     }
5591
5592   /* Analyze the alignment of the data-refs in the loop.
5593      FORNOW: Only aligned accesses are handled.  */
5594
5595   ok = vect_analyze_data_refs_alignment (loop_vinfo);
5596   if (!ok)
5597     {
5598       if (vect_debug_details (loop))
5599         fprintf (dump_file, "bad data alignment.");
5600       destroy_loop_vec_info (loop_vinfo);
5601       return NULL;
5602     }
5603
5604   /* Scan all the operations in the loop and make sure they are
5605      vectorizable.  */
5606
5607   ok = vect_analyze_operations (loop_vinfo);
5608   if (!ok)
5609     {
5610       if (vect_debug_details (loop))
5611         fprintf (dump_file, "bad operation or unsupported loop bound.");
5612       destroy_loop_vec_info (loop_vinfo);
5613       return NULL;
5614     }
5615
5616   LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5617
5618   return loop_vinfo;
5619 }
5620
5621
5622 /* Function need_imm_uses_for.
5623
5624    Return whether we ought to include information for 'var'
5625    when calculating immediate uses.  For this pass we only want use
5626    information for non-virtual variables.  */
5627
5628 static bool
5629 need_imm_uses_for (tree var)
5630 {
5631   return is_gimple_reg (var);
5632 }
5633
5634
5635 /* Function vectorize_loops.
5636
5637    Entry Point to loop vectorization phase.  */
5638
5639 void
5640 vectorize_loops (struct loops *loops)
5641 {
5642   unsigned int i, loops_num;
5643   unsigned int num_vectorized_loops = 0;
5644
5645   /* Does the target support SIMD?  */
5646   /* FORNOW: until more sophisticated machine modelling is in place.  */
5647   if (!UNITS_PER_SIMD_WORD)
5648     {
5649       if (vect_debug_details (NULL))
5650         fprintf (dump_file, "vectorizer: target vector size is not defined.");
5651       return;
5652     }
5653
5654 #ifdef ENABLE_CHECKING
5655   verify_loop_closed_ssa ();
5656 #endif
5657
5658   compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5659
5660   /*  ----------- Analyze loops. -----------  */
5661
5662   /* If some loop was duplicated, it gets bigger number
5663      than all previously defined loops. This fact allows us to run
5664      only over initial loops skipping newly generated ones.  */
5665   loops_num = loops->num;
5666   for (i = 1; i < loops_num; i++)
5667     {
5668       loop_vec_info loop_vinfo;
5669       struct loop *loop = loops->parray[i];
5670
5671       if (!loop)
5672         continue;
5673
5674       loop_vinfo = vect_analyze_loop (loop);
5675       loop->aux = loop_vinfo;
5676
5677       if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5678         continue;
5679
5680       vect_transform_loop (loop_vinfo, loops);
5681       num_vectorized_loops++;
5682     }
5683
5684   if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5685     fprintf (dump_file, "\nvectorized %u loops in function.\n",
5686              num_vectorized_loops);
5687
5688   /*  ----------- Finalize. -----------  */
5689
5690   free_df ();
5691   for (i = 1; i < loops_num; i++)
5692     {
5693       struct loop *loop = loops->parray[i];
5694       loop_vec_info loop_vinfo;
5695
5696       if (!loop)
5697         continue;
5698       loop_vinfo = loop->aux;
5699       destroy_loop_vec_info (loop_vinfo);
5700       loop->aux = NULL;
5701     }
5702
5703   rewrite_into_ssa (false);
5704   rewrite_into_loop_closed_ssa (); /* FORNOW */
5705   bitmap_clear (vars_to_rename);
5706 }