gcc/tree-vectorizer.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003, 2004 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20 02111-1307, USA.  */
  21
  22 /* Loop Vectorization Pass.
  23
  24    This pass tries to vectorize loops. This first implementation focuses on
  25    simple inner-most loops, with no conditional control flow, and a set of
  26    simple operations which vector form can be expressed using existing
  27    tree codes (PLUS, MULT etc).
  28
  29    For example, the vectorizer transforms the following simple loop:
  30
  31         short a[N]; short b[N]; short c[N]; int i;
  32
  33         for (i=0; i<N; i++){
  34           a[i] = b[i] + c[i];
  35         }
  36
  37    as if it was manually vectorized by rewriting the source code into:
  38
  39         typedef int __attribute__((mode(V8HI))) v8hi;
  40         short a[N];  short b[N]; short c[N];   int i;
  41         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  42         v8hi va, vb, vc;
  43
  44         for (i=0; i<N/8; i++){
  45           vb = pb[i];
  46           vc = pc[i];
  47           va = vb + vc;
  48           pa[i] = va;
  49         }
  50
  51         The main entry to this pass is vectorize_loops(), in which
  52    the vectorizer applies a set of analyses on a given set of loops,
  53    followed by the actual vectorization transformation for the loops that
  54    had successfully passed the analysis phase.
  55
  56         Throughout this pass we make a distinction between two types of
  57    data: scalars (which are represented by SSA_NAMES), and memory references
  58    ("data-refs"). These two types of data require different handling both
  59    during analysis and transformation. The types of data-refs that the
  60    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  61    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  62    accesses are required to have a  simple (consecutive) access pattern.
  63
  64    Analysis phase:
  65    ===============
  66         The driver for the analysis phase is vect_analyze_loop_nest().
  67    It applies a set of analyses, some of which rely on the scalar evolution
  68    analyzer (scev) developed by Sebastian Pop.
  69
  70         During the analysis phase the vectorizer records some information
  71    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  72    loop, as well as general information about the loop as a whole, which is
  73    recorded in a "loop_vec_info" struct attached to each loop.
  74
  75    Transformation phase:
  76    =====================
  77         The loop transformation phase scans all the stmts in the loop, and
  78    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  79    the loop that needs to be vectorized. It insert the vector code sequence
  80    just before the scalar stmt S, and records a pointer to the vector code
  81    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
  82    attached to S). This pointer will be used for the vectorization of following
  83    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
  84    otherwise, we rely on dead code elimination for removing it.
  85
  86         For example, say stmt S1 was vectorized into stmt VS1:
  87
  88    VS1: vb = px[i];
  89    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  90    S2:  a = b;
  91
  92    To vectorize stmt S2, the vectorizer first finds the stmt that defines
  93    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
  94    vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
  95    resulting sequence would be:
  96
  97    VS1: vb = px[i];
  98    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  99    VS2: va = vb;
 100    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 101
 102         Operands that are not SSA_NAMEs, are data-refs that appear in
 103    load/store operations (like 'x[i]' in S1), and are handled differently.
 104
 105    Target modeling:
 106    =================
 107         Currently the only target specific information that is used is the
 108    size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
 109    support different sizes of vectors, for now will need to specify one value
 110    for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
 111
 112         Since we only vectorize operations which vector form can be
 113    expressed using existing tree codes, to verify that an operation is
 114    supported, the vectorizer checks the relevant optab at the relevant
 115    machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
 116    the value found is CODE_FOR_nothing, then there's no target support, and
 117    we can't vectorize the stmt.
 118
 119    For additional information on this project see:
 120    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 121 */
 122
 123 #include "config.h"
 124 #include "system.h"
 125 #include "coretypes.h"
 126 #include "tm.h"
 127 #include "errors.h"
 128 #include "ggc.h"
 129 #include "tree.h"
 130 #include "target.h"
 131
 132 #include "rtl.h"
 133 #include "basic-block.h"
 134 #include "diagnostic.h"
 135 #include "tree-flow.h"
 136 #include "tree-dump.h"
 137 #include "timevar.h"
 138 #include "cfgloop.h"
 139 #include "cfglayout.h"
 140 #include "expr.h"
 141 #include "optabs.h"
 142 #include "toplev.h"
 143 #include "tree-chrec.h"
 144 #include "tree-data-ref.h"
 145 #include "tree-scalar-evolution.h"
 146 #include "tree-vectorizer.h"
 147 #include "tree-pass.h"
 148 #include "langhooks.h"
 149
 150
 151 /*************************************************************************
 152   Simple Loop Peeling Utilities
 153  *************************************************************************/
 154
 155 /* Entry point for peeling of simple loops.
 156    Peel the first/last iterations of a loop.
 157    It can be used outside of the vectorizer for loops that are simple enough
 158    (see function documentation).  In the vectorizer it is used to peel the
 159    last few iterations when the loop bound is unknown or does not evenly
 160    divide by the vectorization factor, and to peel the first few iterations
 161    to force the alignment of data references in the loop.  */
 162 struct loop *slpeel_tree_peel_loop_to_edge
 163   (struct loop *, struct loops *, edge, tree, tree, bool);
 164 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
 165   (struct loop *, struct loops *, edge);
 166 static void slpeel_update_phis_for_duplicate_loop
 167   (struct loop *, struct loop *, bool after);
 168 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
 169 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
 170 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
 171 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
 172 static void allocate_new_names (bitmap);
 173 static void rename_use_op (use_operand_p);
 174 static void rename_def_op (def_operand_p, tree);
 175 static void rename_variables_in_bb (basic_block);
 176 static void free_new_names (bitmap);
 177 static void rename_variables_in_loop (struct loop *);
 178 #ifdef ENABLE_CHECKING
 179 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
 180 #endif
 181
 182
 183 /*************************************************************************
 184   Vectorization Utilities.
 185  *************************************************************************/
 186
 187 /* Main analysis functions.  */
 188 static loop_vec_info vect_analyze_loop (struct loop *);
 189 static loop_vec_info vect_analyze_loop_form (struct loop *);
 190 static bool vect_analyze_data_refs (loop_vec_info);
 191 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
 192 static bool vect_analyze_scalar_cycles (loop_vec_info);
 193 static bool vect_analyze_data_ref_accesses (loop_vec_info);
 194 static bool vect_analyze_data_refs_alignment (loop_vec_info);
 195 static bool vect_compute_data_refs_alignment (loop_vec_info);
 196 static bool vect_analyze_operations (loop_vec_info);
 197
 198 /* Main code transformation functions.  */
 199 static void vect_transform_loop (loop_vec_info, struct loops *);
 200 static bool vect_transform_stmt (tree, block_stmt_iterator *);
 201 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
 202 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
 203 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
 204 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
 205 static enum dr_alignment_support vect_supportable_dr_alignment
 206   (struct data_reference *);
 207 static void vect_align_data_ref (tree);
 208 static void vect_enhance_data_refs_alignment (loop_vec_info);
 209
 210 /* Utility functions for the analyses.  */
 211 static bool vect_is_simple_use (tree , struct loop *, tree *);
 212 static bool exist_non_indexing_operands_for_use_p (tree, tree);
 213 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
 214 static void vect_mark_relevant (varray_type *, tree);
 215 static bool vect_stmt_relevant_p (tree, loop_vec_info);
 216 static tree vect_get_loop_niters (struct loop *, tree *);
 217 static bool vect_compute_data_ref_alignment (struct data_reference *);
 218 static bool vect_analyze_data_ref_access (struct data_reference *);
 219 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
 220 static struct data_reference * vect_analyze_pointer_ref_access
 221   (tree, tree, bool);
 222 static bool vect_can_advance_ivs_p (struct loop *);
 223 static tree vect_get_base_and_offset (struct data_reference *, tree, tree,
 224                                       loop_vec_info, tree *, tree *, tree *,
 225                                       bool*);
 226 static struct data_reference * vect_analyze_pointer_ref_access
 227   (tree, tree, bool);
 228 static tree vect_get_ptr_offset (tree, tree, tree *);
 229 static tree vect_get_memtag_and_dr
 230   (tree, tree, bool, loop_vec_info, tree, struct data_reference **);
 231 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
 232                                       tree *, tree *);
 233
 234 /* Utility functions for the code transformation.  */
 235 static tree vect_create_destination_var (tree, tree);
 236 static tree vect_create_data_ref_ptr
 237   (tree, block_stmt_iterator *, tree, tree *, bool);
 238 static tree vect_create_index_for_vector_ref
 239   (struct loop *, block_stmt_iterator *);
 240 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
 241 static tree get_vectype_for_scalar_type (tree);
 242 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
 243 static tree vect_get_vec_def_for_operand (tree, tree);
 244 static tree vect_init_vector (tree, tree);
 245 static void vect_finish_stmt_generation
 246   (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
 247
 248 /* Utility function dealing with loop peeling (not peeling itself).  */
 249 static void vect_generate_tmps_on_preheader
 250   (loop_vec_info, tree *, tree *, tree *);
 251 static tree vect_build_loop_niters (loop_vec_info);
 252 static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge);
 253 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
 254 static void vect_update_inits_of_dr (struct data_reference *, tree niters);
 255 static void vect_update_inits_of_drs (loop_vec_info, tree);
 256 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
 257 static void vect_do_peeling_for_loop_bound
 258   (loop_vec_info, tree *, struct loops *);
 259
 260 /* Utilities for creation and deletion of vec_info structs.  */
 261 loop_vec_info new_loop_vec_info (struct loop *loop);
 262 void destroy_loop_vec_info (loop_vec_info);
 263 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
 264
 265 static bool vect_debug_stats (struct loop *loop);
 266 static bool vect_debug_details (struct loop *loop);
 267
 268 \f
 269 /*************************************************************************
 270   Simple Loop Peeling Utilities
 271
 272   Utilities to support loop peeling for vectorization purposes.
 273  *************************************************************************/
 274
 275
 276 /* For each definition in DEFINITIONS this function allocates
 277    new ssa name.  */
 278
 279 static void
 280 allocate_new_names (bitmap definitions)
 281 {
 282   unsigned ver;
 283   bitmap_iterator bi;
 284
 285   EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
 286     {
 287       tree def = ssa_name (ver);
 288       tree *new_name_ptr = xmalloc (sizeof (tree));
 289
 290       bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
 291
 292       *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
 293       SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
 294
 295       SSA_NAME_AUX (def) = new_name_ptr;
 296     }
 297 }
 298
 299
 300 /* Renames the use *OP_P.  */
 301
 302 static void
 303 rename_use_op (use_operand_p op_p)
 304 {
 305   tree *new_name_ptr;
 306
 307   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
 308     return;
 309
 310   new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
 311
 312   /* Something defined outside of the loop.  */
 313   if (!new_name_ptr)
 314     return;
 315
 316   /* An ordinary ssa name defined in the loop.  */
 317
 318   SET_USE (op_p, *new_name_ptr);
 319 }
 320
 321
 322 /* Renames the def *OP_P in statement STMT.  */
 323
 324 static void
 325 rename_def_op (def_operand_p op_p, tree stmt)
 326 {
 327   tree *new_name_ptr;
 328
 329   if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
 330     return;
 331
 332   new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
 333
 334   /* Something defined outside of the loop.  */
 335   if (!new_name_ptr)
 336     return;
 337
 338   /* An ordinary ssa name defined in the loop.  */
 339
 340   SET_DEF (op_p, *new_name_ptr);
 341   SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
 342 }
 343
 344
 345 /* Renames the variables in basic block BB.  */
 346
 347 static void
 348 rename_variables_in_bb (basic_block bb)
 349 {
 350   tree phi;
 351   block_stmt_iterator bsi;
 352   tree stmt;
 353   stmt_ann_t ann;
 354   use_optype uses;
 355   vuse_optype vuses;
 356   def_optype defs;
 357   v_may_def_optype v_may_defs;
 358   v_must_def_optype v_must_defs;
 359   unsigned i;
 360   edge e;
 361   edge_iterator ei;
 362   struct loop *loop = bb->loop_father;
 363
 364   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
 365     rename_def_op (PHI_RESULT_PTR (phi), phi);
 366
 367   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 368     {
 369       stmt = bsi_stmt (bsi);
 370       get_stmt_operands (stmt);
 371       ann = stmt_ann (stmt);
 372
 373       uses = USE_OPS (ann);
 374       for (i = 0; i < NUM_USES (uses); i++)
 375         rename_use_op (USE_OP_PTR (uses, i));
 376
 377       defs = DEF_OPS (ann);
 378       for (i = 0; i < NUM_DEFS (defs); i++)
 379         rename_def_op (DEF_OP_PTR (defs, i), stmt);
 380
 381       vuses = VUSE_OPS (ann);
 382       for (i = 0; i < NUM_VUSES (vuses); i++)
 383         rename_use_op (VUSE_OP_PTR (vuses, i));
 384
 385       v_may_defs = V_MAY_DEF_OPS (ann);
 386       for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
 387         {
 388           rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
 389           rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
 390         }
 391
 392       v_must_defs = V_MUST_DEF_OPS (ann);
 393       for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
 394         {
 395           rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
 396           rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
 397         }
 398     }
 399
 400   FOR_EACH_EDGE (e, ei, bb->succs)
 401     {
 402       if (!flow_bb_inside_loop_p (loop, e->dest))
 403         continue;
 404       for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
 405         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
 406     }
 407 }
 408
 409
 410 /* Releases the structures holding the new ssa names.  */
 411
 412 static void
 413 free_new_names (bitmap definitions)
 414 {
 415   unsigned ver;
 416   bitmap_iterator bi;
 417
 418   EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
 419     {
 420       tree def = ssa_name (ver);
 421
 422       if (SSA_NAME_AUX (def))
 423         {
 424           free (SSA_NAME_AUX (def));
 425           SSA_NAME_AUX (def) = NULL;
 426         }
 427     }
 428 }
 429
 430
 431 /* Renames variables in new generated LOOP.  */
 432
 433 static void
 434 rename_variables_in_loop (struct loop *loop)
 435 {
 436   unsigned i;
 437   basic_block *bbs;
 438
 439   bbs = get_loop_body (loop);
 440
 441   for (i = 0; i < loop->num_nodes; i++)
 442     rename_variables_in_bb (bbs[i]);
 443
 444   free (bbs);
 445 }
 446
 447
 448 /* Update the PHI nodes of NEW_LOOP.
 449
 450    NEW_LOOP is a duplicate of ORIG_LOOP.
 451    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 452    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 453    executes before it.  */
 454
 455 static void
 456 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 457                                        struct loop *new_loop, bool after)
 458 {
 459   tree *new_name_ptr, new_ssa_name;
 460   tree phi_new, phi_orig;
 461   tree def;
 462   edge orig_loop_latch = loop_latch_edge (orig_loop);
 463   edge orig_entry_e = loop_preheader_edge (orig_loop);
 464   edge new_loop_exit_e = new_loop->exit_edges[0];
 465   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 466   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 467
 468   /*
 469      step 1. For each loop-header-phi:
 470              Add the first phi argument for the phi in NEW_LOOP
 471             (the one associated with the entry of NEW_LOOP)
 472
 473      step 2. For each loop-header-phi:
 474              Add the second phi argument for the phi in NEW_LOOP
 475             (the one associated with the latch of NEW_LOOP)
 476
 477      step 3. Update the phis in the successor block of NEW_LOOP.
 478
 479         case 1: NEW_LOOP was placed before ORIG_LOOP:
 480                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 481                 Updating the phis in the successor block can therefore be done
 482                 along with the scanning of the loop header phis, because the
 483                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 484                 phi nodes, organized in the same order.
 485
 486         case 2: NEW_LOOP was placed after ORIG_LOOP:
 487                 The successor block of NEW_LOOP is the original exit block of
 488                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 489                 We postpone updating these phis to a later stage (when
 490                 loop guards are added).
 491    */
 492
 493
 494   /* Scan the phis in the headers of the old and new loops
 495      (they are organized in exactly the same order).  */
 496
 497   for (phi_new = phi_nodes (new_loop->header),
 498        phi_orig = phi_nodes (orig_loop->header);
 499        phi_new && phi_orig;
 500        phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
 501     {
 502       /* step 1.  */
 503       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 504       add_phi_arg (phi_new, def, new_loop_entry_e);
 505
 506       /* step 2.  */
 507       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 508       if (TREE_CODE (def) != SSA_NAME)
 509         continue;
 510
 511       new_name_ptr = SSA_NAME_AUX (def);
 512       if (!new_name_ptr)
 513         /* Something defined outside of the loop.  */
 514         continue;
 515
 516       /* An ordinary ssa name defined in the loop.  */
 517       new_ssa_name = *new_name_ptr;
 518       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
 519
 520       /* step 3 (case 1).  */
 521       if (!after)
 522         {
 523           gcc_assert (new_loop_exit_e == orig_entry_e);
 524           SET_PHI_ARG_DEF (phi_orig,
 525                            phi_arg_from_edge (phi_orig, new_loop_exit_e),
 526                            new_ssa_name);
 527         }
 528     }
 529 }
 530
 531
 532 /* Update PHI nodes for a guard of the LOOP.
 533
 534    Input:
 535    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 536         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 537         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 538         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 539         We denote this bb NEW_MERGE_BB because it had a single predecessor (the
 540         LOOP header) before the guard code was added, and now it became a merge
 541         point of two paths - the path that ends with the LOOP exit-edge, and
 542         the path that ends with GUARD_EDGE.
 543
 544         This function creates and updates the relevant phi nodes to account for
 545         the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
 546         1. Create phi nodes at NEW_MERGE_BB.
 547         2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 548            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 549            was added:
 550
 551         ===> The CFG before the guard-code was added:
 552         LOOP_header_bb:
 553           if (exit_loop) goto update_bb : LOOP_header_bb
 554         update_bb:
 555
 556         ==> The CFG after the guard-code was added:
 557         guard_bb:
 558           if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
 559         LOOP_header_bb:
 560           if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
 561         new_merge_bb:
 562           goto update_bb
 563         update_bb:
 564
 565    - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
 566         UPDATE_BB are loop entry phis, like the phis in the LOOP header,
 567         organized in the same order.
 568         If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
 569         loop exit phis.
 570
 571    - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
 572         "original" loop).  FALSE if LOOP is an original loop (not a newly
 573         created copy).  The SSA_NAME_AUX fields of the defs in the original
 574         loop are the corresponding new ssa-names used in the new duplicated
 575         loop copy.  IS_NEW_LOOP indicates which of the two args of the phi
 576         nodes in UPDATE_BB takes the original ssa-name, and which takes the
 577         new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
 578         the LOOP-exit-edge takes the new-name, and the phi-arg that is
 579         associated with GUARD_EDGE takes the original name.  If IS_NEW_LOOP is
 580         FALSE, it's the other way around.
 581   */
 582
 583 static void
 584 slpeel_update_phi_nodes_for_guard (edge guard_edge,
 585                                    struct loop *loop,
 586                                    bool entry_phis,
 587                                    bool is_new_loop)
 588 {
 589   tree orig_phi, new_phi, update_phi;
 590   tree guard_arg, loop_arg;
 591   basic_block new_merge_bb = guard_edge->dest;
 592   edge e = EDGE_SUCC (new_merge_bb, 0);
 593   basic_block update_bb = e->dest;
 594   basic_block orig_bb = (entry_phis ? loop->header : update_bb);
 595
 596   for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
 597        orig_phi && update_phi;
 598        orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
 599     {
 600       /* 1. Generate new phi node in NEW_MERGE_BB:  */
 601       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 602                                  new_merge_bb);
 603
 604       /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 605             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 606       if (entry_phis)
 607         {
 608           loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
 609                                             EDGE_SUCC (loop->latch, 0));
 610           guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
 611         }
 612       else /* exit phis */
 613         {
 614           tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 615           tree *new_name_ptr = SSA_NAME_AUX (orig_def);
 616           tree new_name;
 617
 618           if (new_name_ptr)
 619             new_name = *new_name_ptr;
 620           else
 621             /* Something defined outside of the loop  */
 622             new_name = orig_def;
 623
 624           if (is_new_loop)
 625             {
 626               guard_arg = orig_def;
 627               loop_arg = new_name;
 628             }
 629           else
 630             {
 631               guard_arg = new_name;
 632               loop_arg = orig_def;
 633             }
 634         }
 635       add_phi_arg (new_phi, loop_arg, loop->exit_edges[0]);
 636       add_phi_arg (new_phi, guard_arg, guard_edge);
 637
 638       /* 3. Update phi in successor block.  */
 639       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 640                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 641       SET_PHI_ARG_DEF (update_phi, phi_arg_from_edge (update_phi, e),
 642                        PHI_RESULT (new_phi));
 643     }
 644
 645   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 646 }
 647
 648
 649 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 650    that starts at zero, increases by one and its limit is NITERS.
 651
 652    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 653
 654 static void
 655 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 656 {
 657   tree indx_before_incr, indx_after_incr, cond_stmt, cond;
 658   tree orig_cond;
 659   edge exit_edge = loop->exit_edges[0];
 660   block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
 661   tree begin_label = tree_block_label (loop->latch);
 662   tree exit_label = tree_block_label (loop->single_exit->dest);
 663   tree init = build_int_cst (TREE_TYPE (niters), 0);
 664   tree step = build_int_cst (TREE_TYPE (niters), 1);
 665   tree then_label;
 666   tree else_label;
 667
 668   orig_cond = get_loop_exit_condition (loop);
 669   gcc_assert (orig_cond);
 670   create_iv (init, step, NULL_TREE, loop,
 671              &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
 672
 673   /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
 674      back to the exit condition statement.  */
 675   bsi_next (&loop_exit_bsi);
 676   gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
 677
 678   if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
 679     {
 680       cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
 681       then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 682       else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 683     }
 684   else /* 'then' edge loops back.  */
 685     {
 686       cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
 687       then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 688       else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 689     }
 690
 691   cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
 692                      then_label, else_label);
 693   bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
 694
 695   /* Remove old loop exit test:  */
 696   bsi_remove (&loop_exit_bsi);
 697
 698   if (vect_debug_stats (loop) || vect_debug_details (loop))
 699     print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
 700
 701   loop->nb_iterations = niters;
 702 }
 703
 704
 705 /* Given LOOP this function generates a new copy of it and puts it
 706    on E which is either the entry or exit of LOOP.  */
 707
 708 static struct loop *
 709 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
 710                                         edge e)
 711 {
 712   struct loop *new_loop;
 713   basic_block *new_bbs, *bbs;
 714   bool at_exit;
 715   bool was_imm_dom;
 716   basic_block exit_dest;
 717   tree phi, phi_arg;
 718
 719   at_exit = (e == loop->exit_edges[0]);
 720   if (!at_exit && e != loop_preheader_edge (loop))
 721     {
 722       if (dump_file && (dump_flags & TDF_DETAILS))
 723           fprintf (dump_file, "Edge is not an entry nor an exit edge.\n");
 724       return NULL;
 725     }
 726
 727   bbs = get_loop_body (loop);
 728
 729   /* Check whether duplication is possible.  */
 730   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 731     {
 732       if (vect_debug_stats (loop) || vect_debug_details (loop))
 733           fprintf (dump_file, "Cannot copy basic blocks.\n");
 734       free (bbs);
 735       return NULL;
 736     }
 737
 738   /* Generate new loop structure.  */
 739   new_loop = duplicate_loop (loops, loop, loop->outer);
 740   if (!new_loop)
 741     {
 742       if (vect_debug_stats (loop) || vect_debug_details (loop))
 743           fprintf (dump_file, "duplicate_loop returns NULL.\n");
 744       free (bbs);
 745       return NULL;
 746     }
 747
 748   exit_dest = loop->exit_edges[0]->dest;
 749   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 750                                           exit_dest) == loop->header ?
 751                  true : false);
 752
 753   new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
 754
 755   copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
 756
 757   /* Duplicating phi args at exit bbs as coming
 758      also from exit of duplicated loop.  */
 759   for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
 760     {
 761       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
 762       if (phi_arg)
 763         {
 764           edge new_loop_exit_edge;
 765
 766           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 767             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 768           else
 769             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 770
 771           add_phi_arg (phi, phi_arg, new_loop_exit_edge);
 772         }
 773     }
 774
 775   if (at_exit) /* Add the loop copy at exit.  */
 776     {
 777       redirect_edge_and_branch_force (e, new_loop->header);
 778       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 779       if (was_imm_dom)
 780         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 781     }
 782   else /* Add the copy at entry.  */
 783     {
 784       edge new_exit_e;
 785       edge entry_e = loop_preheader_edge (loop);
 786       basic_block preheader = entry_e->src;
 787
 788       if (!flow_bb_inside_loop_p (new_loop,
 789                                   EDGE_SUCC (new_loop->header, 0)->dest))
 790         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 791       else
 792         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 793
 794       redirect_edge_and_branch_force (new_exit_e, loop->header);
 795       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 796                                new_exit_e->src);
 797
 798       /* We have to add phi args to the loop->header here as coming
 799          from new_exit_e edge.  */
 800       for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 801         {
 802           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 803           if (phi_arg)
 804             add_phi_arg (phi, phi_arg, new_exit_e);
 805         }
 806
 807       redirect_edge_and_branch_force (entry_e, new_loop->header);
 808       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 809     }
 810
 811   flow_loop_scan (new_loop, LOOP_ALL);
 812   flow_loop_scan (loop, LOOP_ALL);
 813   free (new_bbs);
 814   free (bbs);
 815
 816   return new_loop;
 817 }
 818
 819
 820 /* Given the condition statement COND, put it as the last statement
 821    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 822    Assumes that this is the single exit of the guarded loop.
 823    Returns the skip edge.  */
 824
 825 static edge
 826 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
 827                         basic_block dom_bb)
 828 {
 829   block_stmt_iterator bsi;
 830   edge new_e, enter_e;
 831   tree cond_stmt, then_label, else_label;
 832
 833   enter_e = EDGE_SUCC (guard_bb, 0);
 834   enter_e->flags &= ~EDGE_FALLTHRU;
 835   enter_e->flags |= EDGE_FALSE_VALUE;
 836   bsi = bsi_last (guard_bb);
 837
 838   then_label = build1 (GOTO_EXPR, void_type_node,
 839                        tree_block_label (exit_bb));
 840   else_label = build1 (GOTO_EXPR, void_type_node,
 841                        tree_block_label (enter_e->dest));
 842   cond_stmt = build3 (COND_EXPR, void_type_node, cond,
 843                      then_label, else_label);
 844   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
 845   /* Add new edge to connect entry block to the second loop.  */
 846   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 847   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 848   return new_e;
 849 }
 850
 851
 852 /* This function verifies that the following restrictions apply to LOOP:
 853    (1) it is innermost
 854    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 855    (3) it is single entry, single exit
 856    (4) its exit condition is the last stmt in the header
 857    (5) E is the entry/exit edge of LOOP.
 858  */
 859
 860 static bool
 861 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
 862 {
 863   edge exit_e = loop->exit_edges [0];
 864   edge entry_e = loop_preheader_edge (loop);
 865   tree orig_cond = get_loop_exit_condition (loop);
 866   block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
 867
 868   if (any_marked_for_rewrite_p ())
 869     return false;
 870
 871   if (loop->inner
 872       /* All loops have an outer scope; the only case loop->outer is NULL is for
 873          the function itself.  */
 874       || !loop->outer
 875       || loop->num_nodes != 2
 876       || !empty_block_p (loop->latch)
 877       || loop->num_exits != 1
 878       || loop->num_entries != 1
 879       /* Verify that new loop exit condition can be trivially modified.  */
 880       || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
 881       || (e != exit_e && e != entry_e))
 882     return false;
 883
 884   return true;
 885 }
 886
 887 #ifdef ENABLE_CHECKING
 888 static void
 889 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 890                                  struct loop *second_loop)
 891 {
 892   basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
 893   basic_block loop2_entry_bb = second_loop->pre_header;
 894   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
 895
 896   /* A guard that controls whether the second_loop is to be executed or skipped
 897      is placed in first_loop->exit.  first_loopt->exit therefore has two
 898      successors - one is the preheader of second_loop, and the other is a bb
 899      after second_loop.
 900    */
 901   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
 902
 903
 904   /* 1. Verify that one of the successors of first_loopt->exit is the preheader
 905         of second_loop.  */
 906
 907   /* The preheader of new_loop is expected to have two predessors:
 908      first_loop->exit and the block that precedes first_loop.  */
 909
 910   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
 911               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
 912                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
 913                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
 914                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
 915
 916   /* Verify that the other successor of first_loopt->exit is after the
 917      second_loop.  */
 918   /* TODO */
 919 }
 920 #endif
 921
 922 /* Function slpeel_tree_peel_loop_to_edge.
 923
 924    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
 925    that is placed on the entry (exit) edge E of LOOP. After this transformation
 926    we have two loops one after the other - first-loop iterates FIRST_NITERS
 927    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
 928
 929    Input:
 930    - LOOP: the loop to be peeled.
 931    - E: the exit or entry edge of LOOP.
 932         If it is the entry edge, we peel the first iterations of LOOP. In this
 933         case first-loop is LOOP, and second-loop is the newly created loop.
 934         If it is the exit edge, we peel the last iterations of LOOP. In this
 935         case, first-loop is the newly created loop, and second-loop is LOOP.
 936    - NITERS: the number of iterations that LOOP iterates.
 937    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
 938    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
 939         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
 940         is false, the caller of this function may want to take care of this
 941         (this can be useful if we don't want new stmts added to first-loop).
 942
 943    Output:
 944    The function returns a pointer to the new loop-copy, or NULL if it failed
 945    to perform the transformation.
 946
 947    The function generates two if-then-else guards: one before the first loop,
 948    and the other before the second loop:
 949    The first guard is:
 950      if (FIRST_NITERS == 0) then skip the first loop,
 951      and go directly to the second loop.
 952    The second guard is:
 953      if (FIRST_NITERS == NITERS) then skip the second loop.
 954
 955    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
 956    FORNOW the resulting code will not be in loop-closed-ssa form.
 957 */
 958
 959 struct loop*
 960 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
 961                                edge e, tree first_niters,
 962                                tree niters, bool update_first_loop_count)
 963 {
 964   struct loop *new_loop = NULL, *first_loop, *second_loop;
 965   edge skip_e;
 966   tree pre_condition;
 967   bitmap definitions;
 968   basic_block bb_before_second_loop, bb_after_second_loop;
 969   basic_block bb_before_first_loop;
 970   basic_block bb_between_loops;
 971   edge exit_e = loop->exit_edges [0];
 972
 973   if (!slpeel_can_duplicate_loop_p (loop, e))
 974     return NULL;
 975
 976   /* We have to initialize cfg_hooks. Then, when calling
 977    cfg_hooks->split_edge, the function tree_split_edge
 978    is actually called and, when calling cfg_hooks->duplicate_block,
 979    the function tree_duplicate_bb is called.  */
 980   tree_register_cfg_hooks ();
 981
 982
 983   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
 984         Resulting CFG would be:
 985
 986         first_loop:
 987         do {
 988         } while ...
 989
 990         second_loop:
 991         do {
 992         } while ...
 993
 994         orig_exit_bb:
 995    */
 996
 997   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
 998     {
 999       if (vect_debug_stats (loop) || vect_debug_details (loop))
1000         fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1001       return NULL;
1002     }
1003
1004   if (e == exit_e)
1005     {
1006       /* NEW_LOOP was placed after LOOP.  */
1007       first_loop = loop;
1008       second_loop = new_loop;
1009     }
1010   else
1011     {
1012       /* NEW_LOOP was placed before LOOP.  */
1013       first_loop = new_loop;
1014       second_loop = loop;
1015     }
1016
1017   definitions = marked_ssa_names ();
1018   allocate_new_names (definitions);
1019   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1020   rename_variables_in_loop (new_loop);
1021
1022
1023   /* 2. Add the guard that controls whether the first loop is executed.
1024         Resulting CFG would be:
1025
1026         bb_before_first_loop:
1027         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1028                                GOTO first-loop
1029
1030         first_loop:
1031         do {
1032         } while ...
1033
1034         bb_before_second_loop:
1035
1036         second_loop:
1037         do {
1038         } while ...
1039
1040         orig_exit_bb:
1041    */
1042
1043   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1044   add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1045   bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1046   add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1047   flow_loop_scan (first_loop, LOOP_ALL);
1048   flow_loop_scan (second_loop, LOOP_ALL);
1049
1050   pre_condition =
1051         build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1052   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1053                                   bb_before_second_loop, bb_before_first_loop);
1054   slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1055                                      first_loop == new_loop);
1056
1057
1058   /* 3. Add the guard that controls whether the second loop is executed.
1059         Resulting CFG would be:
1060
1061         bb_before_first_loop:
1062         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1063                                GOTO first-loop
1064
1065         first_loop:
1066         do {
1067         } while ...
1068
1069         bb_between_loops:
1070         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1071                                     GOTO bb_before_second_loop
1072
1073         bb_before_second_loop:
1074
1075         second_loop:
1076         do {
1077         } while ...
1078
1079         bb_after_second_loop:
1080
1081         orig_exit_bb:
1082    */
1083
1084   bb_between_loops = split_edge (first_loop->exit_edges[0]);
1085   add_bb_to_loop (bb_between_loops, first_loop->outer);
1086   bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1087   add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1088   flow_loop_scan (first_loop, LOOP_ALL);
1089   flow_loop_scan (second_loop, LOOP_ALL);
1090
1091   pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1092   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1093                                   bb_after_second_loop, bb_before_first_loop);
1094   slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1095                                      second_loop == new_loop);
1096
1097   /* Flow loop scan does not update loop->single_exit field.  */
1098   first_loop->single_exit = first_loop->exit_edges[0];
1099   second_loop->single_exit = second_loop->exit_edges[0];
1100
1101   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1102    */
1103   if (update_first_loop_count)
1104     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1105
1106   free_new_names (definitions);
1107   BITMAP_XFREE (definitions);
1108   unmark_all_for_rewrite ();
1109
1110   return new_loop;
1111 }
1112
1113 \f
1114 /* Here the proper Vectorizer starts.  */
1115
1116 /*************************************************************************
1117   Vectorization Utilities.
1118  *************************************************************************/
1119
1120 /* Function new_stmt_vec_info.
1121
1122    Create and initialize a new stmt_vec_info struct for STMT.  */
1123
1124 stmt_vec_info
1125 new_stmt_vec_info (tree stmt, struct loop *loop)
1126 {
1127   stmt_vec_info res;
1128   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1129
1130   STMT_VINFO_TYPE (res) = undef_vec_info_type;
1131   STMT_VINFO_STMT (res) = stmt;
1132   STMT_VINFO_LOOP (res) = loop;
1133   STMT_VINFO_RELEVANT_P (res) = 0;
1134   STMT_VINFO_VECTYPE (res) = NULL;
1135   STMT_VINFO_VEC_STMT (res) = NULL;
1136   STMT_VINFO_DATA_REF (res) = NULL;
1137   STMT_VINFO_MEMTAG (res) = NULL;
1138   STMT_VINFO_VECT_DR_BASE (res) = NULL;
1139   STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1140   STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1141   STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1142   STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1143
1144   return res;
1145 }
1146
1147
1148 /* Function new_loop_vec_info.
1149
1150    Create and initialize a new loop_vec_info struct for LOOP, as well as
1151    stmt_vec_info structs for all the stmts in LOOP.  */
1152
1153 loop_vec_info
1154 new_loop_vec_info (struct loop *loop)
1155 {
1156   loop_vec_info res;
1157   basic_block *bbs;
1158   block_stmt_iterator si;
1159   unsigned int i;
1160
1161   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1162
1163   bbs = get_loop_body (loop);
1164
1165   /* Create stmt_info for all stmts in the loop.  */
1166   for (i = 0; i < loop->num_nodes; i++)
1167     {
1168       basic_block bb = bbs[i];
1169       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1170         {
1171           tree stmt = bsi_stmt (si);
1172           stmt_ann_t ann;
1173
1174           get_stmt_operands (stmt);
1175           ann = stmt_ann (stmt);
1176           set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1177         }
1178     }
1179
1180   LOOP_VINFO_LOOP (res) = loop;
1181   LOOP_VINFO_BBS (res) = bbs;
1182   LOOP_VINFO_EXIT_COND (res) = NULL;
1183   LOOP_VINFO_NITERS (res) = NULL;
1184   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1185   LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1186   LOOP_VINFO_VECT_FACTOR (res) = 0;
1187   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1188                            "loop_write_datarefs");
1189   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1190                            "loop_read_datarefs");
1191   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1192
1193   return res;
1194 }
1195
1196
1197 /* Function destroy_loop_vec_info.
1198
1199    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1200    stmts in the loop.  */
1201
1202 void
1203 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1204 {
1205   struct loop *loop;
1206   basic_block *bbs;
1207   int nbbs;
1208   block_stmt_iterator si;
1209   int j;
1210
1211   if (!loop_vinfo)
1212     return;
1213
1214   loop = LOOP_VINFO_LOOP (loop_vinfo);
1215
1216   bbs = LOOP_VINFO_BBS (loop_vinfo);
1217   nbbs = loop->num_nodes;
1218
1219   for (j = 0; j < nbbs; j++)
1220     {
1221       basic_block bb = bbs[j];
1222       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1223         {
1224           tree stmt = bsi_stmt (si);
1225           stmt_ann_t ann = stmt_ann (stmt);
1226           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1227           free (stmt_info);
1228           set_stmt_info (ann, NULL);
1229         }
1230     }
1231
1232   free (LOOP_VINFO_BBS (loop_vinfo));
1233   varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1234   varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1235
1236   free (loop_vinfo);
1237 }
1238
1239
1240 /* Function debug_loop_stats.
1241
1242    For vectorization statistics dumps.  */
1243
1244 static bool
1245 vect_debug_stats (struct loop *loop)
1246 {
1247   basic_block bb;
1248   block_stmt_iterator si;
1249   tree node = NULL_TREE;
1250
1251   if (!dump_file || !(dump_flags & TDF_STATS))
1252     return false;
1253
1254   if (!loop)
1255     {
1256       fprintf (dump_file, "\n");
1257       return true;
1258     }
1259
1260   if (!loop->header)
1261     return false;
1262
1263   bb = loop->header;
1264
1265   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1266     {
1267       node = bsi_stmt (si);
1268       if (node && EXPR_P (node) && EXPR_LOCUS (node))
1269         break;
1270     }
1271
1272   if (node && EXPR_P (node) && EXPR_LOCUS (node)
1273       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1274     {
1275       fprintf (dump_file, "\nloop at %s:%d: ",
1276         EXPR_FILENAME (node), EXPR_LINENO (node));
1277       return true;
1278     }
1279
1280   return false;
1281 }
1282
1283
1284 /* Function debug_loop_details.
1285
1286    For vectorization debug dumps.  */
1287
1288 static bool
1289 vect_debug_details (struct loop *loop)
1290 {
1291    basic_block bb;
1292    block_stmt_iterator si;
1293    tree node = NULL_TREE;
1294
1295   if (!dump_file || !(dump_flags & TDF_DETAILS))
1296     return false;
1297
1298   if (!loop)
1299     {
1300       fprintf (dump_file, "\n");
1301       return true;
1302     }
1303
1304   if (!loop->header)
1305     return false;
1306
1307   bb = loop->header;
1308
1309   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1310     {
1311       node = bsi_stmt (si);
1312       if (node && EXPR_P (node) && EXPR_LOCUS (node))
1313         break;
1314     }
1315
1316   if (node && EXPR_P (node) && EXPR_LOCUS (node)
1317       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1318     {
1319       fprintf (dump_file, "\nloop at %s:%d: ",
1320                EXPR_FILENAME (node), EXPR_LINENO (node));
1321       return true;
1322     }
1323
1324   return false;
1325 }
1326
1327
1328 /* Function vect_get_ptr_offset
1329
1330    Compute the OFFSET modulo vector-type alignment of pointer REF in bits.  */
1331
1332 static tree
1333 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1334                      tree vectype ATTRIBUTE_UNUSED,
1335                      tree *offset ATTRIBUTE_UNUSED)
1336 {
1337   /* TODO: Use alignment information.  */
1338   return NULL_TREE;
1339 }
1340
1341
1342 /* Function vect_analyze_offset_expr
1343
1344    Given an offset expression EXPR received from get_inner_reference, analyze
1345    it and create an expression for INITIAL_OFFSET by substituting the variables
1346    of EXPR with initial_condition of the corresponding access_fn in the loop.
1347    E.g.,
1348       for i
1349          for (j = 3; j < N; j++)
1350             a[j].b[i][j] = 0;
1351
1352    For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
1353    subsituted, since its access_fn in the inner loop is i. 'j' will be
1354    substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
1355    C` =  3 * C_j + C.
1356
1357    Compute MISALIGN (the misalignment of the data reference initial access from
1358    its base) if possible. Misalignment can be calculated only if all the
1359    variables can be substitued with constants, or if a variable is multiplied
1360    by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
1361    be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
1362    of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
1363    VECTYPE_ALIGNMENT computation in the caller of this function).
1364
1365    STEP is an evolution of the data reference in this loop in bytes.
1366    In the above example, STEP is C_j.
1367
1368    Return FALSE, if the analysis fails, e.g., there is no access_fn for a
1369    variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
1370    are NULL_TREEs. Otherwise, return TRUE.
1371
1372 */
1373
1374 static bool
1375 vect_analyze_offset_expr (tree expr,
1376                           struct loop *loop,
1377                           tree vectype_alignment,
1378                           tree *initial_offset,
1379                           tree *misalign,
1380                           tree *step)
1381 {
1382   tree oprnd0;
1383   tree oprnd1;
1384   tree left_offset = size_zero_node;
1385   tree right_offset = size_zero_node;
1386   tree left_misalign = size_zero_node;
1387   tree right_misalign = size_zero_node;
1388   tree left_step = size_zero_node;
1389   tree right_step = size_zero_node;
1390   enum tree_code code;
1391   tree init, evolution, def_stmt;
1392
1393   STRIP_NOPS (expr);
1394
1395   *step = NULL_TREE;
1396   *misalign = NULL_TREE;
1397   *initial_offset = NULL_TREE;
1398
1399   /* Stop conditions:
1400      1. Constant.  */
1401   if (TREE_CONSTANT (expr))
1402     {
1403       *initial_offset = fold_convert (sizetype, expr);
1404       *misalign = fold_convert (sizetype, expr);
1405       *step = size_zero_node;
1406       return true;
1407     }
1408
1409   /* 2. Variable. Try to substitute with initial_condition of the corresponding
1410      access_fn in the current loop.  */
1411   if (SSA_VAR_P (expr))
1412     {
1413       tree access_fn = analyze_scalar_evolution (loop, expr);
1414
1415       if (access_fn == chrec_dont_know)
1416         /* No access_fn.  */
1417         return false;
1418
1419       init = initial_condition_in_loop_num (access_fn, loop->num);
1420       if (init == expr)
1421         {
1422           def_stmt = SSA_NAME_DEF_STMT (init);
1423           if (def_stmt
1424               && !IS_EMPTY_STMT (def_stmt)
1425               && flow_bb_inside_loop_p (loop, bb_for_stmt (def_stmt)))
1426             /* Not enough information: may be not loop invariant.
1427                E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
1428                initial_condition is D, but it depends on i - loop's induction
1429                variable.  */
1430             return false;
1431         }
1432
1433       evolution = evolution_part_in_loop_num (access_fn, loop->num);
1434       if (evolution && TREE_CODE (evolution) != INTEGER_CST)
1435         /* Evolution is not constant.  */
1436         return false;
1437
1438       if (TREE_CONSTANT (init))
1439         *misalign = fold_convert (sizetype, init);
1440       else
1441         /* Not constant, misalignment cannot be calculated.  */
1442         *misalign = NULL_TREE;
1443
1444       *initial_offset = fold_convert (sizetype, init);
1445
1446       *step = evolution ? fold_convert (sizetype, evolution) : size_zero_node;
1447       return true;
1448     }
1449
1450   /* Recursive computation.  */
1451   oprnd0 = TREE_OPERAND (expr, 0);
1452   oprnd1 = TREE_OPERAND (expr, 1);
1453
1454   if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
1455                                 &left_misalign, &left_step)
1456       || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
1457                                     &right_offset, &right_misalign, &right_step))
1458       return false;
1459
1460   /* The type of the operation: plus, minus or mult.  */
1461   code = TREE_CODE (expr);
1462   switch (code)
1463     {
1464     case MULT_EXPR:
1465       if (!TREE_CONSTANT (right_offset))
1466         /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
1467            sized types.
1468            FORNOW: We don't support such cases.  */
1469         return false;
1470
1471       /* Misalignment computation.  */
1472       if (SSA_VAR_P (left_offset))
1473         {
1474           /* If the left side contains variable that cannot be substituted with
1475              constant, we check if the right side is a multiple of ALIGNMENT.  */
1476           if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
1477                                          vectype_alignment)))
1478             *misalign = size_zero_node;
1479           else
1480             /* If the remainder is not zero or the right side isn't constant, we
1481                can't compute  misalignment.  */
1482             *misalign = NULL_TREE;
1483         }
1484       else
1485         {
1486           /* The left operand was successfully substituted with constant.  */
1487           if (left_misalign)
1488             /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
1489                NULL_TREE.  */
1490             *misalign  = size_binop (code, left_misalign, right_misalign);
1491           else
1492             *misalign = NULL_TREE;
1493         }
1494
1495       /* Step calculation.  */
1496       /* Multiply the step by the right operand.  */
1497       *step  = size_binop (MULT_EXPR, left_step, right_offset);
1498       break;
1499
1500     case PLUS_EXPR:
1501     case MINUS_EXPR:
1502       /* Combine the recursive calculations for step and misalignment.  */
1503       *step = size_binop (code, left_step, right_step);
1504
1505       if (left_misalign && right_misalign)
1506         *misalign  = size_binop (code, left_misalign, right_misalign);
1507       else
1508         *misalign = NULL_TREE;
1509
1510       break;
1511
1512     default:
1513       gcc_unreachable ();
1514     }
1515
1516   /* Compute offset.  */
1517   *initial_offset = fold_convert (sizetype,
1518                                   fold (build2 (code, TREE_TYPE (left_offset),
1519                                                 left_offset,
1520                                                 right_offset)));
1521   return true;
1522 }
1523
1524
1525 /* Function vect_get_base_and_offset
1526
1527    Return the BASE of the data reference EXPR.
1528    If VECTYPE is given, also compute the INITIAL_OFFSET from BASE, MISALIGN and
1529    STEP.
1530    E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1531    'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1532    instantiated with initial_conditions of access_functions of variables,
1533    modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1534
1535    Function get_inner_reference is used for the above in case of ARRAY_REF and
1536    COMPONENT_REF.
1537
1538    Input:
1539    EXPR - the memory reference that is being analyzed
1540    DR - the data_reference struct of the _original_ memory reference
1541         (Note: DR_REF (DR) is not necessarily EXPR)
1542    VECTYPE - the type that defines the alignment (i.e, we compute
1543              alignment relative to TYPE_ALIGN(VECTYPE))
1544
1545    Output:
1546    BASE (returned value) - the base of the data reference EXPR.
1547                            E.g, if EXPR is a.b[k].c[i][j] the returned
1548                            base is a.
1549    INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1550    MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1551               computation is impossible
1552    STEP - evolution of the DR_REF in the loop
1553    BASE_ALIGNED_P - indicates if BASE is aligned
1554
1555    If something unexpected is encountered (an unsupported form of data-ref),
1556    then NULL_TREE is returned.  */
1557
1558 static tree
1559 vect_get_base_and_offset (struct data_reference *dr,
1560                           tree expr,
1561                           tree vectype,
1562                           loop_vec_info loop_vinfo,
1563                           tree *initial_offset,
1564                           tree *misalign,
1565                           tree *step,
1566                           bool *base_aligned_p)
1567 {
1568   tree this_offset = size_zero_node;
1569   tree this_misalign = size_zero_node;
1570   tree this_step = size_zero_node;
1571   tree base = NULL_TREE;
1572   tree next_ref;
1573   tree oprnd0, oprnd1;
1574   enum tree_code code = TREE_CODE (expr);
1575   HOST_WIDE_INT pbitsize;
1576   HOST_WIDE_INT pbitpos;
1577   tree poffset;
1578   enum machine_mode pmode;
1579   int punsignedp, pvolatilep;
1580   tree bit_pos_in_bytes;
1581   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1582
1583   *base_aligned_p = false;
1584
1585   switch (code)
1586     {
1587     /* These cases end the recursion:  */
1588     case VAR_DECL:
1589     case PARM_DECL:
1590       *initial_offset = size_zero_node;
1591       *step = size_zero_node;
1592       *misalign = size_zero_node;
1593       if (DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1594         *base_aligned_p = true;
1595       return expr;
1596
1597     case SSA_NAME:
1598       if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1599         return NULL_TREE;
1600
1601       if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1602         {
1603           base = vect_get_ptr_offset (expr, vectype, misalign);
1604           if (base)
1605             *base_aligned_p = true;
1606         }
1607       else
1608         {
1609           *base_aligned_p = true;
1610           *misalign = size_zero_node;
1611         }
1612       *initial_offset = size_zero_node;
1613       *step = size_zero_node;
1614       return expr;
1615
1616     case INTEGER_CST:
1617       *initial_offset = fold_convert (sizetype, expr);
1618       *misalign = fold_convert (sizetype, expr);
1619       *step = size_zero_node;
1620       return expr;
1621
1622     /* These cases continue the recursion:  */
1623     case ADDR_EXPR:
1624       oprnd0 = TREE_OPERAND (expr, 0);
1625       next_ref = oprnd0;
1626       break;
1627
1628     case INDIRECT_REF:
1629       oprnd0 = TREE_OPERAND (expr, 0);
1630       next_ref = oprnd0;
1631       break;
1632
1633     case PLUS_EXPR:
1634     case MINUS_EXPR:
1635       oprnd0 = TREE_OPERAND (expr, 0);
1636       oprnd1 = TREE_OPERAND (expr, 1);
1637
1638       /* In case we have a PLUS_EXPR of the form
1639          (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1640          This is verified in vect_get_memtag_and_dr.  */
1641       base = vect_get_base_and_offset (dr, oprnd1, vectype, loop_vinfo,
1642                                        &this_offset, &this_misalign,
1643                                        &this_step, base_aligned_p);
1644       /* Offset was already computed in vect_analyze_pointer_ref_access.  */
1645       this_offset = size_zero_node;
1646
1647       if (!base)
1648         this_misalign = NULL_TREE;
1649
1650       next_ref = oprnd0;
1651       break;
1652
1653     default:
1654       if (!handled_component_p (expr))
1655         /* Unsupported expression.  */
1656         return NULL_TREE;
1657
1658       /* Find the base and the offset from it.  */
1659       next_ref = get_inner_reference (expr, &pbitsize, &pbitpos, &poffset,
1660                                       &pmode, &punsignedp, &pvolatilep);
1661       if (!next_ref)
1662         return NULL_TREE;
1663
1664       if (poffset
1665           && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1666                                         &this_offset, &this_misalign,
1667                                         &this_step))
1668         {
1669           /* Failed to compute offset or step.  */
1670           *step = NULL_TREE;
1671           *initial_offset = NULL_TREE;
1672           *misalign = NULL_TREE;
1673           return NULL_TREE;
1674         }
1675
1676       /* Add bit position to OFFSET and MISALIGN.  */
1677
1678       bit_pos_in_bytes = size_int (pbitpos/BITS_PER_UNIT);
1679       /* Check that there is no remainder in bits.  */
1680       if (pbitpos%BITS_PER_UNIT)
1681         {
1682           if (vect_debug_details (NULL))
1683             fprintf (dump_file, "bit offset alignment.");
1684           return NULL_TREE;
1685         }
1686       this_offset = fold (size_binop (PLUS_EXPR, bit_pos_in_bytes,
1687                                       fold_convert (sizetype, this_offset)));
1688       if (this_misalign)
1689         this_misalign = size_binop (PLUS_EXPR, this_misalign, bit_pos_in_bytes);
1690
1691       /* Continue the recursion to refine the base (get_inner_reference returns
1692          &a for &a[i], and not a).  */
1693       break;
1694     }
1695
1696   base = vect_get_base_and_offset (dr, next_ref, vectype, loop_vinfo,
1697                                    initial_offset, misalign, step,
1698                                    base_aligned_p);
1699   if (base)
1700     {
1701       /* Combine the results.  */
1702       if (this_misalign && *misalign)
1703         *misalign = size_binop (PLUS_EXPR, *misalign, this_misalign);
1704       else
1705         *misalign = NULL_TREE;
1706
1707       *step = size_binop (PLUS_EXPR, *step, this_step);
1708
1709       *initial_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (*initial_offset),
1710                                       *initial_offset, this_offset));
1711
1712       if (vect_debug_details (NULL))
1713         {
1714           print_generic_expr (dump_file, expr, TDF_SLIM);
1715           fprintf (dump_file, "\n --> total offset for ref: ");
1716           print_generic_expr (dump_file, *initial_offset, TDF_SLIM);
1717           fprintf (dump_file, "\n --> total misalign for ref: ");
1718           print_generic_expr (dump_file, *misalign, TDF_SLIM);
1719           fprintf (dump_file, "\n --> total step for ref: ");
1720           print_generic_expr (dump_file, *step, TDF_SLIM);
1721         }
1722     }
1723   return base;
1724 }
1725
1726
1727 /* Function vect_force_dr_alignment_p.
1728
1729    Returns whether the alignment of a DECL can be forced to be aligned
1730    on ALIGNMENT bit boundary.  */
1731
1732 static bool
1733 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1734 {
1735   if (TREE_CODE (decl) != VAR_DECL)
1736     return false;
1737
1738   if (DECL_EXTERNAL (decl))
1739     return false;
1740
1741   if (TREE_ASM_WRITTEN (decl))
1742     return false;
1743
1744   if (TREE_STATIC (decl))
1745     return (alignment <= MAX_OFILE_ALIGNMENT);
1746   else
1747     /* This is not 100% correct.  The absolute correct stack alignment
1748        is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1749        PREFERRED_STACK_BOUNDARY is honored by all translation units.
1750        However, until someone implements forced stack alignment, SSE
1751        isn't really usable without this.  */
1752     return (alignment <= PREFERRED_STACK_BOUNDARY);
1753 }
1754
1755
1756 /* Function vect_get_new_vect_var.
1757
1758    Returns a name for a new variable. The current naming scheme appends the
1759    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1760    the name of vectorizer generated variables, and appends that to NAME if
1761    provided.  */
1762
1763 static tree
1764 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1765 {
1766   const char *prefix;
1767   int prefix_len;
1768   tree new_vect_var;
1769
1770   if (var_kind == vect_simple_var)
1771     prefix = "vect_";
1772   else
1773     prefix = "vect_p";
1774
1775   prefix_len = strlen (prefix);
1776
1777   if (name)
1778     new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1779   else
1780     new_vect_var = create_tmp_var (type, prefix);
1781
1782   return new_vect_var;
1783 }
1784
1785
1786 /* Function vect_create_index_for_vector_ref.
1787
1788    Create (and return) an index variable, along with it's update chain in the
1789    loop. This variable will be used to access a memory location in a vector
1790    operation.
1791
1792    Input:
1793    LOOP: The loop being vectorized.
1794    BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1795         function can be added here, or in the loop pre-header.
1796
1797    Output:
1798    Return an index that will be used to index a vector array.  It is expected
1799    that a pointer to the first vector will be used as the base address for the
1800    indexed reference.
1801
1802    FORNOW: we are not trying to be efficient, just creating a new index each
1803    time from scratch.  At this time all vector references could use the same
1804    index.
1805
1806    TODO: create only one index to be used by all vector references.  Record
1807    the index in the LOOP_VINFO the first time this procedure is called and
1808    return it on subsequent calls.  The increment of this index must be placed
1809    just before the conditional expression that ends the single block loop.  */
1810
1811 static tree
1812 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1813 {
1814   tree init, step;
1815   tree indx_before_incr, indx_after_incr;
1816
1817   /* It is assumed that the base pointer used for vectorized access contains
1818      the address of the first vector.  Therefore the index used for vectorized
1819      access must be initialized to zero and incremented by 1.  */
1820
1821   init = integer_zero_node;
1822   step = integer_one_node;
1823
1824   /* Assuming that bsi_insert is used with BSI_NEW_STMT  */
1825   create_iv (init, step, NULL_TREE, loop, bsi, false,
1826         &indx_before_incr, &indx_after_incr);
1827
1828   return indx_before_incr;
1829 }
1830
1831
1832 /* Function vect_create_addr_base_for_vector_ref.
1833
1834    Create an expression that computes the address of the first memory location
1835    that will be accessed for a data reference.
1836
1837    Input:
1838    STMT: The statement containing the data reference.
1839    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1840    OFFSET: Optional. If supplied, it is be added to the initial address.
1841
1842    Output:
1843    1. Return an SSA_NAME whose value is the address of the memory location of
1844       the first vector of the data reference.
1845    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1846       these statement(s) which define the returned SSA_NAME.
1847
1848    FORNOW: We are only handling array accesses with step 1.  */
1849
1850 static tree
1851 vect_create_addr_base_for_vector_ref (tree stmt,
1852                                       tree *new_stmt_list,
1853                                       tree offset)
1854 {
1855   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1856   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1857   tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1858   tree base_name = unshare_expr (DR_BASE_NAME (dr));
1859   tree ref = DR_REF (dr);
1860   tree scalar_type = TREE_TYPE (ref);
1861   tree scalar_ptr_type = build_pointer_type (scalar_type);
1862   tree vec_stmt;
1863   tree new_temp;
1864   tree addr_base, addr_expr;
1865   tree dest, new_stmt;
1866   tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
1867
1868   if (TREE_CODE (TREE_TYPE (data_ref_base)) != POINTER_TYPE)
1869     /* After the analysis stage, we expect to get here only with RECORD_TYPE
1870        and ARRAY_TYPE. */
1871     /* Add '&' to ref_base.  */
1872     data_ref_base = build_fold_addr_expr (data_ref_base);
1873   else
1874     {
1875       /* Create '(scalar_type*) base' for pointers.  */
1876       tree dest, new_stmt, new_temp, vec_stmt, tmp_base;
1877       tree scalar_array_type = build_array_type (scalar_type, 0);
1878       tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1879       tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1880       add_referenced_tmp_var (array_ptr);
1881
1882       dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1883       add_referenced_tmp_var (dest);
1884       tmp_base = force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1885       append_to_statement_list_force (new_stmt,  new_stmt_list);
1886
1887       vec_stmt = fold_convert (scalar_array_ptr_type, tmp_base);
1888       vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1889       new_temp = make_ssa_name (array_ptr, vec_stmt);
1890       TREE_OPERAND (vec_stmt, 0) = new_temp;
1891       append_to_statement_list_force (vec_stmt,  new_stmt_list);
1892       data_ref_base = new_temp;
1893     }
1894
1895   /* Create base_offset */
1896   dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
1897   add_referenced_tmp_var (dest);
1898   base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
1899   append_to_statement_list_force (new_stmt, new_stmt_list);
1900
1901   if (offset)
1902     {
1903       tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
1904       add_referenced_tmp_var (tmp);
1905       offset = fold (build2 (MULT_EXPR, TREE_TYPE (offset), offset,
1906                              STMT_VINFO_VECT_STEP (stmt_info)));
1907       base_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (base_offset), base_offset,
1908                                   offset));
1909       base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
1910       append_to_statement_list_force (new_stmt, new_stmt_list);
1911     }
1912
1913   /* base + base_offset */
1914   addr_base = fold (build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
1915                             base_offset));
1916
1917   /* addr_expr = addr_base */
1918   addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1919                                      get_name (base_name));
1920   add_referenced_tmp_var (addr_expr);
1921   vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1922   new_temp = make_ssa_name (addr_expr, vec_stmt);
1923   TREE_OPERAND (vec_stmt, 0) = new_temp;
1924   append_to_statement_list_force (vec_stmt, new_stmt_list);
1925
1926   if (vect_debug_details (NULL))
1927     {
1928       fprintf (dump_file, "created ");
1929       print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
1930       fprintf (dump_file, "\n");
1931     }
1932   return new_temp;
1933 }
1934
1935
1936 /* Function get_vectype_for_scalar_type.
1937
1938    Returns the vector type corresponding to SCALAR_TYPE as supported
1939    by the target.  */
1940
1941 static tree
1942 get_vectype_for_scalar_type (tree scalar_type)
1943 {
1944   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1945   int nbytes = GET_MODE_SIZE (inner_mode);
1946   int nunits;
1947   tree vectype;
1948
1949   if (nbytes == 0)
1950     return NULL_TREE;
1951
1952   /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1953      is expected.  */
1954   nunits = UNITS_PER_SIMD_WORD / nbytes;
1955
1956   vectype = build_vector_type (scalar_type, nunits);
1957   if (vect_debug_details (NULL))
1958     {
1959       fprintf (dump_file, "get vectype with %d units of type ", nunits);
1960       print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1961     }
1962
1963   if (!vectype)
1964     return NULL_TREE;
1965
1966   if (vect_debug_details (NULL))
1967     {
1968       fprintf (dump_file, "vectype: ");
1969       print_generic_expr (dump_file, vectype, TDF_SLIM);
1970     }
1971
1972   if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1973     {
1974       /* TODO: tree-complex.c sometimes can parallelize operations
1975          on generic vectors.  We can vectorize the loop in that case,
1976          but then we should re-run the lowering pass.  */
1977       if (vect_debug_details (NULL))
1978         fprintf (dump_file, "mode not supported by target.");
1979       return NULL_TREE;
1980     }
1981
1982   return vectype;
1983 }
1984
1985
1986 /* Function vect_align_data_ref.
1987
1988    Handle mislignment of a memory accesses.
1989
1990    FORNOW: Can't handle misaligned accesses.
1991    Make sure that the dataref is aligned.  */
1992
1993 static void
1994 vect_align_data_ref (tree stmt)
1995 {
1996   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1997   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1998
1999   /* FORNOW: can't handle misaligned accesses;
2000              all accesses expected to be aligned.  */
2001   gcc_assert (aligned_access_p (dr));
2002 }
2003
2004
2005 /* Function vect_create_data_ref_ptr.
2006
2007    Create a memory reference expression for vector access, to be used in a
2008    vector load/store stmt. The reference is based on a new pointer to vector
2009    type (vp).
2010
2011    Input:
2012    1. STMT: a stmt that references memory. Expected to be of the form
2013          MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
2014    2. BSI: block_stmt_iterator where new stmts can be added.
2015    3. OFFSET (optional): an offset to be added to the initial address accessed
2016         by the data-ref in STMT.
2017    4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2018         pointing to the initial address.
2019
2020    Output:
2021    1. Declare a new ptr to vector_type, and have it point to the base of the
2022       data reference (initial addressed accessed by the data reference).
2023       For example, for vector of type V8HI, the following code is generated:
2024
2025       v8hi *vp;
2026       vp = (v8hi *)initial_address;
2027
2028       if OFFSET is not supplied:
2029          initial_address = &a[init];
2030       if OFFSET is supplied:
2031          initial_address = &a[init + OFFSET];
2032
2033       Return the initial_address in INITIAL_ADDRESS.
2034
2035    2. Create a data-reference in the loop based on the new vector pointer vp,
2036       and using a new index variable 'idx' as follows:
2037
2038       vp' = vp + update
2039
2040       where if ONLY_INIT is true:
2041          update = zero
2042       and otherwise
2043          update = idx + vector_type_size
2044
2045       Return the pointer vp'.
2046
2047
2048    FORNOW: handle only aligned and consecutive accesses.  */
2049
2050 static tree
2051 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
2052                           tree *initial_address, bool only_init)
2053 {
2054   tree base_name;
2055   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2056   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2057   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2058   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2059   tree vect_ptr_type;
2060   tree vect_ptr;
2061   tree tag;
2062   v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2063   v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2064   vuse_optype vuses = STMT_VUSE_OPS (stmt);
2065   int nvuses, nv_may_defs, nv_must_defs;
2066   int i;
2067   tree new_temp;
2068   tree vec_stmt;
2069   tree new_stmt_list = NULL_TREE;
2070   tree idx;
2071   edge pe = loop_preheader_edge (loop);
2072   basic_block new_bb;
2073   tree vect_ptr_init;
2074   tree vectype_size;
2075   tree ptr_update;
2076   tree data_ref_ptr;
2077   tree type, tmp, size;
2078
2079   base_name = unshare_expr (DR_BASE_NAME (dr));
2080   if (vect_debug_details (NULL))
2081     {
2082       tree data_ref_base = base_name;
2083       fprintf (dump_file, "create array_ref of type: ");
2084       print_generic_expr (dump_file, vectype, TDF_SLIM);
2085       if (TREE_CODE (data_ref_base) == VAR_DECL)
2086         fprintf (dump_file, "\nvectorizing a one dimensional array ref: ");
2087       else if (TREE_CODE (data_ref_base) == ARRAY_REF)
2088         fprintf (dump_file, "\nvectorizing a multidimensional array ref: ");
2089       else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2090         fprintf (dump_file, "\nvectorizing a record based array ref: ");
2091       else if (TREE_CODE (data_ref_base) == SSA_NAME)
2092         fprintf (dump_file, "\nvectorizing a pointer ref: ");
2093       print_generic_expr (dump_file, base_name, TDF_SLIM);
2094     }
2095
2096   /** (1) Create the new vector-pointer variable:  **/
2097
2098   vect_ptr_type = build_pointer_type (vectype);
2099   vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2100                                     get_name (base_name));
2101   add_referenced_tmp_var (vect_ptr);
2102
2103
2104   /** (2) Handle aliasing information of the new vector-pointer:  **/
2105
2106   tag = STMT_VINFO_MEMTAG (stmt_info);
2107   gcc_assert (tag);
2108   get_var_ann (vect_ptr)->type_mem_tag = tag;
2109
2110   /* Mark for renaming all aliased variables
2111      (i.e, the may-aliases of the type-mem-tag).  */
2112   nvuses = NUM_VUSES (vuses);
2113   nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
2114   nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
2115   for (i = 0; i < nvuses; i++)
2116     {
2117       tree use = VUSE_OP (vuses, i);
2118       if (TREE_CODE (use) == SSA_NAME)
2119         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
2120     }
2121   for (i = 0; i < nv_may_defs; i++)
2122     {
2123       tree def = V_MAY_DEF_RESULT (v_may_defs, i);
2124       if (TREE_CODE (def) == SSA_NAME)
2125         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2126     }
2127   for (i = 0; i < nv_must_defs; i++)
2128     {
2129       tree def = V_MUST_DEF_RESULT (v_must_defs, i);
2130       if (TREE_CODE (def) == SSA_NAME)
2131         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2132     }
2133
2134
2135   /** (3) Calculate the initial address the vector-pointer, and set
2136           the vector-pointer to point to it before the loop:  **/
2137
2138   /* Create: (&(base[init_val+offset]) in the loop preheader.  */
2139   new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2140                                                    offset);
2141   pe = loop_preheader_edge (loop);
2142   new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
2143   gcc_assert (!new_bb);
2144   *initial_address = new_temp;
2145
2146   /* Create: p = (vectype *) initial_base  */
2147   vec_stmt = fold_convert (vect_ptr_type, new_temp);
2148   vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2149   new_temp = make_ssa_name (vect_ptr, vec_stmt);
2150   TREE_OPERAND (vec_stmt, 0) = new_temp;
2151   new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
2152   gcc_assert (!new_bb);
2153   vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
2154
2155
2156   /** (4) Handle the updating of the vector-pointer inside the loop: **/
2157
2158   if (only_init) /* No update in loop is required.  */
2159     return vect_ptr_init;
2160
2161   idx = vect_create_index_for_vector_ref (loop, bsi);
2162
2163   /* Create: update = idx * vectype_size  */
2164   tmp = create_tmp_var (integer_type_node, "update");
2165   add_referenced_tmp_var (tmp);
2166   size = TYPE_SIZE (vect_ptr_type);
2167   type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2168   ptr_update = create_tmp_var (type, "update");
2169   add_referenced_tmp_var (ptr_update);
2170   vectype_size = TYPE_SIZE_UNIT (vectype);
2171   vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
2172   vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
2173   new_temp = make_ssa_name (tmp, vec_stmt);
2174   TREE_OPERAND (vec_stmt, 0) = new_temp;
2175   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2176   vec_stmt = fold_convert (type, new_temp);
2177   vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
2178   new_temp = make_ssa_name (ptr_update, vec_stmt);
2179   TREE_OPERAND (vec_stmt, 0) = new_temp;
2180   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2181
2182   /* Create: data_ref_ptr = vect_ptr_init + update  */
2183   vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
2184   vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2185   new_temp = make_ssa_name (vect_ptr, vec_stmt);
2186   TREE_OPERAND (vec_stmt, 0) = new_temp;
2187   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2188   data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
2189
2190   return data_ref_ptr;
2191 }
2192
2193
2194 /* Function vect_create_destination_var.
2195
2196    Create a new temporary of type VECTYPE.  */
2197
2198 static tree
2199 vect_create_destination_var (tree scalar_dest, tree vectype)
2200 {
2201   tree vec_dest;
2202   const char *new_name;
2203
2204   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2205
2206   new_name = get_name (scalar_dest);
2207   if (!new_name)
2208     new_name = "var_";
2209   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
2210   add_referenced_tmp_var (vec_dest);
2211
2212   return vec_dest;
2213 }
2214
2215
2216 /* Function vect_init_vector.
2217
2218    Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2219    the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2220    used in the vectorization of STMT.  */
2221
2222 static tree
2223 vect_init_vector (tree stmt, tree vector_var)
2224 {
2225   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2226   struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2227   tree new_var;
2228   tree init_stmt;
2229   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2230   tree vec_oprnd;
2231   edge pe;
2232   tree new_temp;
2233   basic_block new_bb;
2234
2235   new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2236   add_referenced_tmp_var (new_var);
2237
2238   init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2239   new_temp = make_ssa_name (new_var, init_stmt);
2240   TREE_OPERAND (init_stmt, 0) = new_temp;
2241
2242   pe = loop_preheader_edge (loop);
2243   new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2244   gcc_assert (!new_bb);
2245
2246   if (vect_debug_details (NULL))
2247     {
2248       fprintf (dump_file, "created new init_stmt: ");
2249       print_generic_expr (dump_file, init_stmt, TDF_SLIM);
2250     }
2251
2252   vec_oprnd = TREE_OPERAND (init_stmt, 0);
2253   return vec_oprnd;
2254 }
2255
2256
2257 /* Function vect_get_vec_def_for_operand.
2258
2259    OP is an operand in STMT. This function returns a (vector) def that will be
2260    used in the vectorized stmt for STMT.
2261
2262    In the case that OP is an SSA_NAME which is defined in the loop, then
2263    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2264
2265    In case OP is an invariant or constant, a new stmt that creates a vector def
2266    needs to be introduced.  */
2267
2268 static tree
2269 vect_get_vec_def_for_operand (tree op, tree stmt)
2270 {
2271   tree vec_oprnd;
2272   tree vec_stmt;
2273   tree def_stmt;
2274   stmt_vec_info def_stmt_info = NULL;
2275   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2276   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2277   int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2278   struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2279   basic_block bb;
2280   tree vec_inv;
2281   tree t = NULL_TREE;
2282   tree def;
2283   int i;
2284
2285   if (vect_debug_details (NULL))
2286     {
2287       fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2288       print_generic_expr (dump_file, op, TDF_SLIM);
2289     }
2290
2291   /** ===> Case 1: operand is a constant.  **/
2292
2293   if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2294     {
2295       /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
2296
2297       tree vec_cst;
2298
2299       /* Build a tree with vector elements.  */
2300       if (vect_debug_details (NULL))
2301         fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2302
2303       for (i = nunits - 1; i >= 0; --i)
2304         {
2305           t = tree_cons (NULL_TREE, op, t);
2306         }
2307       vec_cst = build_vector (vectype, t);
2308       return vect_init_vector (stmt, vec_cst);
2309     }
2310
2311   gcc_assert (TREE_CODE (op) == SSA_NAME);
2312
2313   /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it.  **/
2314
2315   def_stmt = SSA_NAME_DEF_STMT (op);
2316   def_stmt_info = vinfo_for_stmt (def_stmt);
2317
2318   if (vect_debug_details (NULL))
2319     {
2320       fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2321       print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2322     }
2323
2324
2325   /** ==> Case 2.1: operand is defined inside the loop.  **/
2326
2327   if (def_stmt_info)
2328     {
2329       /* Get the def from the vectorized stmt.  */
2330
2331       vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2332       gcc_assert (vec_stmt);
2333       vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2334       return vec_oprnd;
2335     }
2336
2337
2338   /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2339                     it is a reduction/induction.  **/
2340
2341   bb = bb_for_stmt (def_stmt);
2342   if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2343     {
2344       if (vect_debug_details (NULL))
2345         fprintf (dump_file, "reduction/induction - unsupported.");
2346       internal_error ("no support for reduction/induction"); /* FORNOW */
2347     }
2348
2349
2350   /** ==> Case 2.3: operand is defined outside the loop -
2351                     it is a loop invariant.  */
2352
2353   switch (TREE_CODE (def_stmt))
2354     {
2355     case PHI_NODE:
2356       def = PHI_RESULT (def_stmt);
2357       break;
2358     case MODIFY_EXPR:
2359       def = TREE_OPERAND (def_stmt, 0);
2360       break;
2361     case NOP_EXPR:
2362       def = TREE_OPERAND (def_stmt, 0);
2363       gcc_assert (IS_EMPTY_STMT (def_stmt));
2364       def = op;
2365       break;
2366     default:
2367       if (vect_debug_details (NULL))
2368         {
2369           fprintf (dump_file, "unsupported defining stmt: ");
2370           print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2371         }
2372       internal_error ("unsupported defining stmt");
2373     }
2374
2375   /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}'  */
2376
2377   if (vect_debug_details (NULL))
2378     fprintf (dump_file, "Create vector_inv.");
2379
2380   for (i = nunits - 1; i >= 0; --i)
2381     {
2382       t = tree_cons (NULL_TREE, def, t);
2383     }
2384
2385   vec_inv = build_constructor (vectype, t);
2386   return vect_init_vector (stmt, vec_inv);
2387 }
2388
2389
2390 /* Function vect_finish_stmt_generation.
2391
2392    Insert a new stmt.  */
2393
2394 static void
2395 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2396 {
2397   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2398
2399   if (vect_debug_details (NULL))
2400     {
2401       fprintf (dump_file, "add new stmt: ");
2402       print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2403     }
2404
2405   /* Make sure bsi points to the stmt that is being vectorized.  */
2406
2407   /* Assumption: any stmts created for the vectorization of stmt S were
2408      inserted before S. BSI is expected to point to S or some new stmt before S.
2409    */
2410
2411   while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2412     bsi_next (bsi);
2413   gcc_assert (stmt == bsi_stmt (*bsi));
2414 }
2415
2416
2417 /* Function vectorizable_assignment.
2418
2419    Check if STMT performs an assignment (copy) that can be vectorized.
2420    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2421    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2422    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2423
2424 static bool
2425 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2426 {
2427   tree vec_dest;
2428   tree scalar_dest;
2429   tree op;
2430   tree vec_oprnd;
2431   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2432   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2433   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2434   tree new_temp;
2435
2436   /* Is vectorizable assignment?  */
2437
2438   if (TREE_CODE (stmt) != MODIFY_EXPR)
2439     return false;
2440
2441   scalar_dest = TREE_OPERAND (stmt, 0);
2442   if (TREE_CODE (scalar_dest) != SSA_NAME)
2443     return false;
2444
2445   op = TREE_OPERAND (stmt, 1);
2446   if (!vect_is_simple_use (op, loop, NULL))
2447     {
2448       if (vect_debug_details (NULL))
2449         fprintf (dump_file, "use not simple.");
2450       return false;
2451     }
2452
2453   if (!vec_stmt) /* transformation not required.  */
2454     {
2455       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2456       return true;
2457     }
2458
2459   /** Trasform.  **/
2460   if (vect_debug_details (NULL))
2461     fprintf (dump_file, "transform assignment.");
2462
2463   /* Handle def.  */
2464   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2465
2466   /* Handle use.  */
2467   op = TREE_OPERAND (stmt, 1);
2468   vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2469
2470   /* Arguments are ready. create the new vector stmt.  */
2471   *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2472   new_temp = make_ssa_name (vec_dest, *vec_stmt);
2473   TREE_OPERAND (*vec_stmt, 0) = new_temp;
2474   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2475
2476   return true;
2477 }
2478
2479
2480 /* Function vectorizable_operation.
2481
2482    Check if STMT performs a binary or unary operation that can be vectorized.
2483    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2484    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2485    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2486
2487 static bool
2488 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2489 {
2490   tree vec_dest;
2491   tree scalar_dest;
2492   tree operation;
2493   tree op0, op1 = NULL;
2494   tree vec_oprnd0, vec_oprnd1=NULL;
2495   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2496   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2497   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2498   int i;
2499   enum tree_code code;
2500   enum machine_mode vec_mode;
2501   tree new_temp;
2502   int op_type;
2503   tree op;
2504   optab optab;
2505
2506   /* Is STMT a vectorizable binary/unary operation?   */
2507   if (TREE_CODE (stmt) != MODIFY_EXPR)
2508     return false;
2509
2510   if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2511     return false;
2512
2513   operation = TREE_OPERAND (stmt, 1);
2514   code = TREE_CODE (operation);
2515   optab = optab_for_tree_code (code, vectype);
2516
2517   /* Support only unary or binary operations.  */
2518   op_type = TREE_CODE_LENGTH (code);
2519   if (op_type != unary_op && op_type != binary_op)
2520     {
2521       if (vect_debug_details (NULL))
2522         fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2523       return false;
2524     }
2525
2526   for (i = 0; i < op_type; i++)
2527     {
2528       op = TREE_OPERAND (operation, i);
2529       if (!vect_is_simple_use (op, loop, NULL))
2530         {
2531           if (vect_debug_details (NULL))
2532             fprintf (dump_file, "use not simple.");
2533           return false;
2534         }
2535     }
2536
2537   /* Supportable by target?  */
2538   if (!optab)
2539     {
2540       if (vect_debug_details (NULL))
2541         fprintf (dump_file, "no optab.");
2542       return false;
2543     }
2544   vec_mode = TYPE_MODE (vectype);
2545   if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2546     {
2547       if (vect_debug_details (NULL))
2548         fprintf (dump_file, "op not supported by target.");
2549       return false;
2550     }
2551
2552   if (!vec_stmt) /* transformation not required.  */
2553     {
2554       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2555       return true;
2556     }
2557
2558   /** Transform.  **/
2559
2560   if (vect_debug_details (NULL))
2561     fprintf (dump_file, "transform binary/unary operation.");
2562
2563   /* Handle def.  */
2564   scalar_dest = TREE_OPERAND (stmt, 0);
2565   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2566
2567   /* Handle uses.  */
2568   op0 = TREE_OPERAND (operation, 0);
2569   vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2570
2571   if (op_type == binary_op)
2572     {
2573       op1 = TREE_OPERAND (operation, 1);
2574       vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2575     }
2576
2577   /* Arguments are ready. create the new vector stmt.  */
2578
2579   if (op_type == binary_op)
2580     *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2581                 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2582   else
2583     *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2584                 build1 (code, vectype, vec_oprnd0));
2585   new_temp = make_ssa_name (vec_dest, *vec_stmt);
2586   TREE_OPERAND (*vec_stmt, 0) = new_temp;
2587   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2588
2589   return true;
2590 }
2591
2592
2593 /* Function vectorizable_store.
2594
2595    Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2596    can be vectorized.
2597    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2598    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2599    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2600
2601 static bool
2602 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2603 {
2604   tree scalar_dest;
2605   tree data_ref;
2606   tree op;
2607   tree vec_oprnd1;
2608   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2609   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2610   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2611   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2612   enum machine_mode vec_mode;
2613   tree dummy;
2614   enum dr_alignment_support alignment_support_cheme;
2615
2616   /* Is vectorizable store? */
2617
2618   if (TREE_CODE (stmt) != MODIFY_EXPR)
2619     return false;
2620
2621   scalar_dest = TREE_OPERAND (stmt, 0);
2622   if (TREE_CODE (scalar_dest) != ARRAY_REF
2623       && TREE_CODE (scalar_dest) != INDIRECT_REF)
2624     return false;
2625
2626   op = TREE_OPERAND (stmt, 1);
2627   if (!vect_is_simple_use (op, loop, NULL))
2628     {
2629       if (vect_debug_details (NULL))
2630         fprintf (dump_file, "use not simple.");
2631       return false;
2632     }
2633
2634   vec_mode = TYPE_MODE (vectype);
2635   /* FORNOW. In some cases can vectorize even if data-type not supported
2636      (e.g. - array initialization with 0).  */
2637   if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2638     return false;
2639
2640   if (!STMT_VINFO_DATA_REF (stmt_info))
2641     return false;
2642
2643
2644   if (!vec_stmt) /* transformation not required.  */
2645     {
2646       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2647       return true;
2648     }
2649
2650   /** Trasform.  **/
2651
2652   if (vect_debug_details (NULL))
2653     fprintf (dump_file, "transform store");
2654
2655   alignment_support_cheme = vect_supportable_dr_alignment (dr);
2656   gcc_assert (alignment_support_cheme);
2657   gcc_assert (alignment_support_cheme = dr_aligned);  /* FORNOW */
2658
2659   /* Handle use - get the vectorized def from the defining stmt.  */
2660   vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2661
2662   /* Handle def.  */
2663   /* FORNOW: make sure the data reference is aligned.  */
2664   vect_align_data_ref (stmt);
2665   data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2666   data_ref = build_fold_indirect_ref (data_ref);
2667
2668   /* Arguments are ready. create the new vector stmt.  */
2669   *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2670   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2671
2672   return true;
2673 }
2674
2675
2676 /* vectorizable_load.
2677
2678    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2679    can be vectorized.
2680    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2681    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2682    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2683
2684 static bool
2685 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2686 {
2687   tree scalar_dest;
2688   tree vec_dest = NULL;
2689   tree data_ref = NULL;
2690   tree op;
2691   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2692   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2693   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2694   tree new_temp;
2695   int mode;
2696   tree init_addr;
2697   tree new_stmt;
2698   tree dummy;
2699   basic_block new_bb;
2700   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2701   edge pe = loop_preheader_edge (loop);
2702   enum dr_alignment_support alignment_support_cheme;
2703
2704   /* Is vectorizable load? */
2705
2706   if (TREE_CODE (stmt) != MODIFY_EXPR)
2707     return false;
2708
2709   scalar_dest = TREE_OPERAND (stmt, 0);
2710   if (TREE_CODE (scalar_dest) != SSA_NAME)
2711     return false;
2712
2713   op = TREE_OPERAND (stmt, 1);
2714   if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2715     return false;
2716
2717   if (!STMT_VINFO_DATA_REF (stmt_info))
2718     return false;
2719
2720   mode = (int) TYPE_MODE (vectype);
2721
2722   /* FORNOW. In some cases can vectorize even if data-type not supported
2723     (e.g. - data copies).  */
2724   if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2725     {
2726       if (vect_debug_details (loop))
2727         fprintf (dump_file, "Aligned load, but unsupported type.");
2728       return false;
2729     }
2730
2731   if (!vec_stmt) /* transformation not required.  */
2732     {
2733       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2734       return true;
2735     }
2736
2737   /** Trasform.  **/
2738
2739   if (vect_debug_details (NULL))
2740     fprintf (dump_file, "transform load.");
2741
2742   alignment_support_cheme = vect_supportable_dr_alignment (dr);
2743   gcc_assert (alignment_support_cheme);
2744
2745   if (alignment_support_cheme == dr_aligned
2746       || alignment_support_cheme == dr_unaligned_supported)
2747     {
2748       /* Create:
2749          p = initial_addr;
2750          indx = 0;
2751          loop {
2752            vec_dest = *(p);
2753            indx = indx + 1;
2754          }
2755       */
2756
2757       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2758       data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2759       if (aligned_access_p (dr))
2760         data_ref = build_fold_indirect_ref (data_ref);
2761       else
2762         {
2763           int mis = DR_MISALIGNMENT (dr);
2764           tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
2765           tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
2766           data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2767         }
2768       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2769       new_temp = make_ssa_name (vec_dest, new_stmt);
2770       TREE_OPERAND (new_stmt, 0) = new_temp;
2771       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2772     }
2773   else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2774     {
2775       /* Create:
2776          p1 = initial_addr;
2777          msq_init = *(floor(p1))
2778          p2 = initial_addr + VS - 1;
2779          magic = have_builtin ? builtin_result : initial_address;
2780          indx = 0;
2781          loop {
2782            p2' = p2 + indx * vectype_size
2783            lsq = *(floor(p2'))
2784            vec_dest = realign_load (msq, lsq, magic)
2785            indx = indx + 1;
2786            msq = lsq;
2787          }
2788       */
2789
2790       tree offset;
2791       tree magic;
2792       tree phi_stmt;
2793       tree msq_init;
2794       tree msq, lsq;
2795       tree dataref_ptr;
2796       tree params;
2797
2798       /* <1> Create msq_init = *(floor(p1)) in the loop preheader  */
2799       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2800       data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2801                                            &init_addr, true);
2802       data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2803       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2804       new_temp = make_ssa_name (vec_dest, new_stmt);
2805       TREE_OPERAND (new_stmt, 0) = new_temp;
2806       new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2807       gcc_assert (!new_bb);
2808       msq_init = TREE_OPERAND (new_stmt, 0);
2809
2810
2811       /* <2> Create lsq = *(floor(p2')) in the loop  */
2812       offset = build_int_cst (integer_type_node,
2813                               GET_MODE_NUNITS (TYPE_MODE (vectype)));
2814       offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2815       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2816       dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2817       data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2818       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2819       new_temp = make_ssa_name (vec_dest, new_stmt);
2820       TREE_OPERAND (new_stmt, 0) = new_temp;
2821       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2822       lsq = TREE_OPERAND (new_stmt, 0);
2823
2824
2825       /* <3> */
2826       if (targetm.vectorize.builtin_mask_for_load)
2827         {
2828           /* Create permutation mask, if required, in loop preheader.  */
2829           tree builtin_decl;
2830           params = build_tree_list (NULL_TREE, init_addr);
2831           vec_dest = vect_create_destination_var (scalar_dest, vectype);
2832           builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2833           new_stmt = build_function_call_expr (builtin_decl, params);
2834           new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2835           new_temp = make_ssa_name (vec_dest, new_stmt);
2836           TREE_OPERAND (new_stmt, 0) = new_temp;
2837           new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2838           gcc_assert (!new_bb);
2839           magic = TREE_OPERAND (new_stmt, 0);
2840
2841           /* Since we have just created a CALL_EXPR, we may need to
2842              rename call-clobbered variables.  */
2843           mark_call_clobbered_vars_to_rename ();
2844         }
2845       else
2846         {
2847           /* Use current address instead of init_addr for reduced reg pressure.
2848            */
2849           magic = dataref_ptr;
2850         }
2851
2852
2853       /* <4> Create msq = phi <msq_init, lsq> in loop  */
2854       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2855       msq = make_ssa_name (vec_dest, NULL_TREE);
2856       phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2857       SSA_NAME_DEF_STMT (msq) = phi_stmt;
2858       add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
2859       add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
2860
2861
2862       /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop  */
2863       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2864       new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2865       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2866       new_temp = make_ssa_name (vec_dest, new_stmt);
2867       TREE_OPERAND (new_stmt, 0) = new_temp;
2868       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2869     }
2870   else
2871     gcc_unreachable ();
2872
2873   *vec_stmt = new_stmt;
2874   return true;
2875 }
2876
2877
2878 /* Function vect_supportable_dr_alignment
2879
2880    Return whether the data reference DR is supported with respect to its
2881    alignment.  */
2882
2883 static enum dr_alignment_support
2884 vect_supportable_dr_alignment (struct data_reference *dr)
2885 {
2886   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2887   enum machine_mode mode = (int) TYPE_MODE (vectype);
2888
2889   if (aligned_access_p (dr))
2890     return dr_aligned;
2891
2892   /* Possibly unaligned access.  */
2893
2894   if (DR_IS_READ (dr))
2895     {
2896       if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2897           && (!targetm.vectorize.builtin_mask_for_load
2898               || targetm.vectorize.builtin_mask_for_load ()))
2899         return dr_unaligned_software_pipeline;
2900
2901       if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
2902         /* Can't software pipeline the loads, but can at least do them.  */
2903         return dr_unaligned_supported;
2904     }
2905
2906   /* Unsupported.  */
2907   return dr_unaligned_unsupported;
2908 }
2909
2910
2911 /* Function vect_transform_stmt.
2912
2913    Create a vectorized stmt to replace STMT, and insert it at BSI.  */
2914
2915 static bool
2916 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2917 {
2918   bool is_store = false;
2919   tree vec_stmt = NULL_TREE;
2920   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2921   bool done;
2922
2923   switch (STMT_VINFO_TYPE (stmt_info))
2924     {
2925     case op_vec_info_type:
2926       done = vectorizable_operation (stmt, bsi, &vec_stmt);
2927       gcc_assert (done);
2928       break;
2929
2930     case assignment_vec_info_type:
2931       done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2932       gcc_assert (done);
2933       break;
2934
2935     case load_vec_info_type:
2936       done = vectorizable_load (stmt, bsi, &vec_stmt);
2937       gcc_assert (done);
2938       break;
2939
2940     case store_vec_info_type:
2941       done = vectorizable_store (stmt, bsi, &vec_stmt);
2942       gcc_assert (done);
2943       is_store = true;
2944       break;
2945     default:
2946       if (vect_debug_details (NULL))
2947         fprintf (dump_file, "stmt not supported.");
2948       gcc_unreachable ();
2949     }
2950
2951   STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2952
2953   return is_store;
2954 }
2955
2956
2957 /* This function builds ni_name = number of iterations loop executes
2958    on the loop preheader.  */
2959
2960 static tree
2961 vect_build_loop_niters (loop_vec_info loop_vinfo)
2962 {
2963   tree ni_name, stmt, var;
2964   edge pe;
2965   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2966   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2967
2968   var = create_tmp_var (TREE_TYPE (ni), "niters");
2969   add_referenced_tmp_var (var);
2970   ni_name = force_gimple_operand (ni, &stmt, false, var);
2971
2972   pe = loop_preheader_edge (loop);
2973   if (stmt)
2974     {
2975       basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2976       gcc_assert (!new_bb);
2977     }
2978
2979   return ni_name;
2980 }
2981
2982
2983 /* This function generates the following statements:
2984
2985  ni_name = number of iterations loop executes
2986  ratio = ni_name / vf
2987  ratio_mult_vf_name = ratio * vf
2988
2989  and places them at the loop preheader edge.  */
2990
2991 static void
2992 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2993                                  tree *ni_name_ptr,
2994                                  tree *ratio_mult_vf_name_ptr,
2995                                  tree *ratio_name_ptr)
2996 {
2997
2998   edge pe;
2999   basic_block new_bb;
3000   tree stmt, ni_name;
3001   tree var;
3002   tree ratio_name;
3003   tree ratio_mult_vf_name;
3004   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3005   tree ni = LOOP_VINFO_NITERS (loop_vinfo);
3006   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3007   tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
3008
3009   pe = loop_preheader_edge (loop);
3010
3011   /* Generate temporary variable that contains
3012      number of iterations loop executes.  */
3013
3014   ni_name = vect_build_loop_niters (loop_vinfo);
3015
3016   /* Create: ratio = ni >> log2(vf) */
3017
3018   var = create_tmp_var (TREE_TYPE (ni), "bnd");
3019   add_referenced_tmp_var (var);
3020   ratio_name = make_ssa_name (var, NULL_TREE);
3021   stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
3022            build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
3023   SSA_NAME_DEF_STMT (ratio_name) = stmt;
3024
3025   pe = loop_preheader_edge (loop);
3026   new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3027   gcc_assert (!new_bb);
3028
3029   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
3030
3031   var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
3032   add_referenced_tmp_var (var);
3033   ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
3034   stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
3035            build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
3036   SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
3037
3038   pe = loop_preheader_edge (loop);
3039   new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3040   gcc_assert (!new_bb);
3041
3042   *ni_name_ptr = ni_name;
3043   *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
3044   *ratio_name_ptr = ratio_name;
3045
3046   return;
3047 }
3048
3049
3050 /*   Function vect_update_ivs_after_vectorizer.
3051
3052      "Advance" the induction variables of LOOP to the value they should take
3053      after the execution of LOOP.  This is currently necessary because the
3054      vectorizer does not handle induction variables that are used after the
3055      loop.  Such a situation occurs when the last iterations of LOOP are
3056      peeled, because:
3057      1. We introduced new uses after LOOP for IVs that were not originally used
3058         after LOOP: the IVs of LOOP are now used by an epilog loop.
3059      2. LOOP is going to be vectorized; this means that it will iterate N/VF
3060         times, whereas the loop IVs should be bumped N times.
3061
3062      Input:
3063      - LOOP - a loop that is going to be vectorized. The last few iterations
3064               of LOOP were peeled.
3065      - NITERS - the number of iterations that LOOP executes (before it is
3066                 vectorized). i.e, the number of times the ivs should be bumped.
3067      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
3068                   coming out from LOOP on which there are uses of the LOOP ivs
3069                   (this is the path from LOOP->exit to epilog_loop->preheader).
3070
3071                   The new definitions of the ivs are placed in LOOP->exit.
3072                   The phi args associated with the edge UPDATE_E in the bb
3073                   UPDATE_E->dest are updated accordingly.
3074
3075      Assumption 1: Like the rest of the vectorizer, this function assumes
3076      a single loop exit that has a single predecessor.
3077
3078      Assumption 2: The phi nodes in the LOOP header and in update_bb are
3079      organized in the same order.
3080
3081      Assumption 3: The access function of the ivs is simple enough (see
3082      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
3083
3084      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
3085      coming out of LOOP on which the ivs of LOOP are used (this is the path
3086      that leads to the epilog loop; other paths skip the epilog loop).  This
3087      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
3088      needs to have its phis updated.
3089  */
3090
3091 static void
3092 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e)
3093 {
3094   basic_block exit_bb = loop->exit_edges[0]->dest;
3095   tree phi, phi1;
3096   basic_block update_bb = update_e->dest;
3097
3098   /* gcc_assert (vect_can_advance_ivs_p (loop)); */
3099
3100   /* Make sure there exists a single-predecessor exit bb:  */
3101   gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
3102
3103   for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
3104        phi && phi1;
3105        phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
3106     {
3107       tree access_fn = NULL;
3108       tree evolution_part;
3109       tree init_expr;
3110       tree step_expr;
3111       tree var, stmt, ni, ni_name;
3112       block_stmt_iterator last_bsi;
3113
3114       /* Skip virtual phi's.  */
3115       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3116         {
3117           if (vect_debug_details (NULL))
3118             fprintf (dump_file, "virtual phi. skip.");
3119           continue;
3120         }
3121
3122       access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
3123       gcc_assert (access_fn);
3124       evolution_part =
3125          unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
3126       gcc_assert (evolution_part != NULL_TREE);
3127
3128       /* FORNOW: We do not support IVs whose evolution function is a polynomial
3129          of degree >= 2 or exponential.  */
3130       gcc_assert (!tree_is_chrec (evolution_part));
3131
3132       step_expr = evolution_part;
3133       init_expr = unshare_expr (initial_condition (access_fn));
3134
3135       ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
3136                   build2 (MULT_EXPR, TREE_TYPE (niters),
3137                        niters, step_expr), init_expr);
3138
3139       var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
3140       add_referenced_tmp_var (var);
3141
3142       ni_name = force_gimple_operand (ni, &stmt, false, var);
3143
3144       /* Insert stmt into exit_bb.  */
3145       last_bsi = bsi_last (exit_bb);
3146       if (stmt)
3147         bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
3148
3149       /* Fix phi expressions in the successor bb.  */
3150       gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
3151                   PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
3152       SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
3153     }
3154 }
3155
3156
3157 /* Function vect_do_peeling_for_loop_bound
3158
3159    Peel the last iterations of the loop represented by LOOP_VINFO.
3160    The peeled iterations form a new epilog loop.  Given that the loop now
3161    iterates NITERS times, the new epilog loop iterates
3162    NITERS % VECTORIZATION_FACTOR times.
3163
3164    The original loop will later be made to iterate
3165    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).  */
3166
3167 static void
3168 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
3169                                 struct loops *loops)
3170 {
3171
3172   tree ni_name, ratio_mult_vf_name;
3173   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3174   struct loop *new_loop;
3175   edge update_e;
3176 #ifdef ENABLE_CHECKING
3177   int loop_num;
3178 #endif
3179
3180   if (vect_debug_details (NULL))
3181     fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
3182
3183   /* Generate the following variables on the preheader of original loop:
3184
3185      ni_name = number of iteration the original loop executes
3186      ratio = ni_name / vf
3187      ratio_mult_vf_name = ratio * vf  */
3188   vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3189                                    &ratio_mult_vf_name, ratio);
3190
3191   /* Update loop info.  */
3192   loop->pre_header = loop_preheader_edge (loop)->src;
3193   loop->pre_header_edges[0] = loop_preheader_edge (loop);
3194
3195 #ifdef ENABLE_CHECKING
3196   loop_num  = loop->num;
3197 #endif
3198   new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
3199                                             ratio_mult_vf_name, ni_name, false);
3200 #ifdef ENABLE_CHECKING
3201   gcc_assert (new_loop);
3202   gcc_assert (loop_num == loop->num);
3203   slpeel_verify_cfg_after_peeling (loop, new_loop);
3204 #endif
3205
3206   /* A guard that controls whether the new_loop is to be executed or skipped
3207      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
3208      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
3209      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
3210      is on the path where the LOOP IVs are used and need to be updated.  */
3211
3212   if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3213     update_e = EDGE_PRED (new_loop->pre_header, 0);
3214   else
3215     update_e = EDGE_PRED (new_loop->pre_header, 1);
3216
3217   /* Update IVs of original loop as if they were advanced
3218      by ratio_mult_vf_name steps.  */
3219   vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e);
3220
3221   /* After peeling we have to reset scalar evolution analyzer.  */
3222   scev_reset ();
3223
3224   return;
3225 }
3226
3227
3228 /* Function vect_gen_niters_for_prolog_loop
3229
3230    Set the number of iterations for the loop represented by LOOP_VINFO
3231    to the minimum between LOOP_NITERS (the original iteration count of the loop)
3232    and the misalignment of DR - the first data reference recorded in
3233    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
3234    this loop, the data reference DR will refer to an aligned location.
3235
3236    The following computation is generated:
3237
3238    compute address misalignment in bytes:
3239    addr_mis = addr & (vectype_size - 1)
3240
3241    prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3242
3243    (elem_size = element type size; an element is the scalar element
3244         whose type is the inner type of the vectype)  */
3245
3246 static tree
3247 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3248 {
3249   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3250   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3251   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3252   tree var, stmt;
3253   tree iters, iters_name;
3254   edge pe;
3255   basic_block new_bb;
3256   tree dr_stmt = DR_STMT (dr);
3257   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3258   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3259   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3260   tree elem_misalign;
3261   tree byte_misalign;
3262   tree new_stmts = NULL_TREE;
3263   tree start_addr =
3264         vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3265   tree ptr_type = TREE_TYPE (start_addr);
3266   tree size = TYPE_SIZE (ptr_type);
3267   tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
3268   tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3269   tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
3270   tree niters_type = TREE_TYPE (loop_niters);
3271   tree elem_size_log =
3272         build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
3273   tree vf_tree = build_int_cst (unsigned_type_node, vf);
3274
3275   pe = loop_preheader_edge (loop);
3276   new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3277   gcc_assert (!new_bb);
3278
3279   /* Create:  byte_misalign = addr & (vectype_size - 1)  */
3280   byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3281
3282   /* Create:  elem_misalign = byte_misalign / element_size  */
3283   elem_misalign =
3284         build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
3285
3286   /* Create:  (niters_type) (VF - elem_misalign)&(VF - 1)  */
3287   iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
3288   iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
3289   iters = fold_convert (niters_type, iters);
3290
3291   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
3292   /* If the loop bound is known at compile time we already verified that it is
3293      greater than vf; since the misalignment ('iters') is at most vf, there's
3294      no need to generate the MIN_EXPR in this case.  */
3295   if (!TREE_CONSTANT (loop_niters))
3296     iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3297
3298   var = create_tmp_var (niters_type, "prolog_loop_niters");
3299   add_referenced_tmp_var (var);
3300   iters_name = force_gimple_operand (iters, &stmt, false, var);
3301
3302   /* Insert stmt on loop preheader edge.  */
3303   pe = loop_preheader_edge (loop);
3304   if (stmt)
3305     {
3306       basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3307       gcc_assert (!new_bb);
3308     }
3309
3310   return iters_name;
3311 }
3312
3313
3314 /* Function vect_update_inits_of_dr
3315
3316    NITERS iterations were peeled from LOOP.  DR represents a data reference
3317    in LOOP.  This function updates the information recorded in DR to
3318    account for the fact that the first NITERS iterations had already been
3319    executed.  Specifically, it updates the OFFSET field of stmt_info.  */
3320
3321 static void
3322 vect_update_inits_of_dr (struct data_reference *dr, tree niters)
3323 {
3324   stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
3325   tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
3326
3327   niters = fold (build2 (MULT_EXPR, TREE_TYPE (niters), niters,
3328                          STMT_VINFO_VECT_STEP (stmt_info)));
3329   offset = fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters));
3330   STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
3331 }
3332
3333
3334 /* Function vect_update_inits_of_drs
3335
3336    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3337    This function updates the information recorded for the data references in
3338    the loop to account for the fact that the first NITERS iterations had
3339    already been executed.  Specifically, it updates the initial_condition of the
3340    access_function of all the data_references in the loop.  */
3341
3342 static void
3343 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3344 {
3345   unsigned int i;
3346   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3347   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3348
3349   if (dump_file && (dump_flags & TDF_DETAILS))
3350     fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3351
3352   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3353     {
3354       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3355       vect_update_inits_of_dr (dr, niters);
3356     }
3357
3358   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3359     {
3360       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3361       vect_update_inits_of_dr (dr, niters);
3362     }
3363 }
3364
3365
3366 /* Function vect_do_peeling_for_alignment
3367
3368    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3369    'niters' is set to the misalignment of one of the data references in the
3370    loop, thereby forcing it to refer to an aligned location at the beginning
3371    of the execution of this loop.  The data reference for which we are
3372    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
3373
3374 static void
3375 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3376 {
3377   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3378   tree niters_of_prolog_loop, ni_name;
3379   tree n_iters;
3380   struct loop *new_loop;
3381
3382   if (vect_debug_details (NULL))
3383     fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3384
3385   ni_name = vect_build_loop_niters (loop_vinfo);
3386   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3387
3388   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
3389   new_loop =
3390         slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3391                                        niters_of_prolog_loop, ni_name, true);
3392 #ifdef ENABLE_CHECKING
3393   gcc_assert (new_loop);
3394   slpeel_verify_cfg_after_peeling (new_loop, loop);
3395 #endif
3396
3397   /* Update number of times loop executes.  */
3398   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3399   LOOP_VINFO_NITERS (loop_vinfo) =
3400     build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3401
3402   /* Update the init conditions of the access functions of all data refs.  */
3403   vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3404
3405   /* After peeling we have to reset scalar evolution analyzer.  */
3406   scev_reset ();
3407
3408   return;
3409 }
3410
3411
3412 /* Function vect_transform_loop.
3413
3414    The analysis phase has determined that the loop is vectorizable.
3415    Vectorize the loop - created vectorized stmts to replace the scalar
3416    stmts in the loop, and update the loop exit condition.  */
3417
3418 static void
3419 vect_transform_loop (loop_vec_info loop_vinfo,
3420                      struct loops *loops ATTRIBUTE_UNUSED)
3421 {
3422   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3423   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3424   int nbbs = loop->num_nodes;
3425   block_stmt_iterator si;
3426   int i;
3427   tree ratio = NULL;
3428   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3429
3430   if (vect_debug_details (NULL))
3431     fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3432
3433
3434   /* Peel the loop if there are data refs with unknown alignment.
3435      Only one data ref with unknown store is allowed.  */
3436
3437   if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3438     vect_do_peeling_for_alignment (loop_vinfo, loops);
3439
3440   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3441      compile time constant), or it is a constant that doesn't divide by the
3442      vectorization factor, then an epilog loop needs to be created.
3443      We therefore duplicate the loop: the original loop will be vectorized,
3444      and will compute the first (n/VF) iterations. The second copy of the loop
3445      will remain scalar and will compute the remaining (n%VF) iterations.
3446      (VF is the vectorization factor).  */
3447
3448   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3449       || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3450           && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3451     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3452   else
3453     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3454                 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3455
3456   /* 1) Make sure the loop header has exactly two entries
3457      2) Make sure we have a preheader basic block.  */
3458
3459   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3460
3461   loop_split_edge_with (loop_preheader_edge (loop), NULL);
3462
3463
3464   /* FORNOW: the vectorizer supports only loops which body consist
3465      of one basic block (header + empty latch). When the vectorizer will
3466      support more involved loop forms, the order by which the BBs are
3467      traversed need to be reconsidered.  */
3468
3469   for (i = 0; i < nbbs; i++)
3470     {
3471       basic_block bb = bbs[i];
3472
3473       for (si = bsi_start (bb); !bsi_end_p (si);)
3474         {
3475           tree stmt = bsi_stmt (si);
3476           stmt_vec_info stmt_info;
3477           bool is_store;
3478
3479           if (vect_debug_details (NULL))
3480             {
3481               fprintf (dump_file, "------>vectorizing statement: ");
3482               print_generic_expr (dump_file, stmt, TDF_SLIM);
3483             }
3484           stmt_info = vinfo_for_stmt (stmt);
3485           gcc_assert (stmt_info);
3486           if (!STMT_VINFO_RELEVANT_P (stmt_info))
3487             {
3488               bsi_next (&si);
3489               continue;
3490             }
3491 #ifdef ENABLE_CHECKING
3492           /* FORNOW: Verify that all stmts operate on the same number of
3493                      units and no inner unrolling is necessary.  */
3494           gcc_assert
3495                 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3496                  == vectorization_factor);
3497 #endif
3498           /* -------- vectorize statement ------------ */
3499           if (vect_debug_details (NULL))
3500             fprintf (dump_file, "transform statement.");
3501
3502           is_store = vect_transform_stmt (stmt, &si);
3503           if (is_store)
3504             {
3505               /* free the attached stmt_vec_info and remove the stmt.  */
3506               stmt_ann_t ann = stmt_ann (stmt);
3507               free (stmt_info);
3508               set_stmt_info (ann, NULL);
3509               bsi_remove (&si);
3510               continue;
3511             }
3512
3513           bsi_next (&si);
3514         }                       /* stmts in BB */
3515     }                           /* BBs in loop */
3516
3517   slpeel_make_loop_iterate_ntimes (loop, ratio);
3518
3519   if (vect_debug_details (loop))
3520     fprintf (dump_file,"Success! loop vectorized.");
3521   if (vect_debug_stats (loop))
3522     fprintf (dump_file, "LOOP VECTORIZED.");
3523 }
3524
3525
3526 /* Function vect_is_simple_use.
3527
3528    Input:
3529    LOOP - the loop that is being vectorized.
3530    OPERAND - operand of a stmt in LOOP.
3531    DEF - the defining stmt in case OPERAND is an SSA_NAME.
3532
3533    Returns whether a stmt with OPERAND can be vectorized.
3534    Supportable operands are constants, loop invariants, and operands that are
3535    defined by the current iteration of the loop. Unsupportable operands are
3536    those that are defined by a previous iteration of the loop (as is the case
3537    in reduction/induction computations).  */
3538
3539 static bool
3540 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3541 {
3542   tree def_stmt;
3543   basic_block bb;
3544
3545   if (def)
3546     *def = NULL_TREE;
3547
3548   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3549     return true;
3550
3551   if (TREE_CODE (operand) != SSA_NAME)
3552     return false;
3553
3554   def_stmt = SSA_NAME_DEF_STMT (operand);
3555   if (def_stmt == NULL_TREE )
3556     {
3557       if (vect_debug_details (NULL))
3558         fprintf (dump_file, "no def_stmt.");
3559       return false;
3560     }
3561
3562   /* empty stmt is expected only in case of a function argument.
3563      (Otherwise - we expect a phi_node or a modify_expr).  */
3564   if (IS_EMPTY_STMT (def_stmt))
3565     {
3566       tree arg = TREE_OPERAND (def_stmt, 0);
3567       if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3568         return true;
3569       if (vect_debug_details (NULL))
3570         {
3571           fprintf (dump_file, "Unexpected empty stmt: ");
3572           print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3573         }
3574       return false;
3575     }
3576
3577   /* phi_node inside the loop indicates an induction/reduction pattern.
3578      This is not supported yet.  */
3579   bb = bb_for_stmt (def_stmt);
3580   if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3581     {
3582       if (vect_debug_details (NULL))
3583         fprintf (dump_file, "reduction/induction - unsupported.");
3584       return false; /* FORNOW: not supported yet.  */
3585     }
3586
3587   /* Expecting a modify_expr or a phi_node.  */
3588   if (TREE_CODE (def_stmt) == MODIFY_EXPR
3589       || TREE_CODE (def_stmt) == PHI_NODE)
3590     {
3591       if (def)
3592         *def = def_stmt;
3593       return true;
3594     }
3595
3596   return false;
3597 }
3598
3599
3600 /* Function vect_analyze_operations.
3601
3602    Scan the loop stmts and make sure they are all vectorizable.  */
3603
3604 static bool
3605 vect_analyze_operations (loop_vec_info loop_vinfo)
3606 {
3607   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3608   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3609   int nbbs = loop->num_nodes;
3610   block_stmt_iterator si;
3611   unsigned int vectorization_factor = 0;
3612   int i;
3613   bool ok;
3614   tree scalar_type;
3615
3616   if (vect_debug_details (NULL))
3617     fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3618
3619   for (i = 0; i < nbbs; i++)
3620     {
3621       basic_block bb = bbs[i];
3622
3623       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3624         {
3625           tree stmt = bsi_stmt (si);
3626           unsigned int nunits;
3627           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3628           tree vectype;
3629
3630           if (vect_debug_details (NULL))
3631             {
3632               fprintf (dump_file, "==> examining statement: ");
3633               print_generic_expr (dump_file, stmt, TDF_SLIM);
3634             }
3635
3636           gcc_assert (stmt_info);
3637
3638           /* skip stmts which do not need to be vectorized.
3639              this is expected to include:
3640              - the COND_EXPR which is the loop exit condition
3641              - any LABEL_EXPRs in the loop
3642              - computations that are used only for array indexing or loop
3643              control  */
3644
3645           if (!STMT_VINFO_RELEVANT_P (stmt_info))
3646             {
3647               if (vect_debug_details (NULL))
3648                 fprintf (dump_file, "irrelevant.");
3649               continue;
3650             }
3651
3652           if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3653             {
3654               if (vect_debug_stats (loop) || vect_debug_details (loop))
3655                 {
3656                   fprintf (dump_file, "not vectorized: vector stmt in loop:");
3657                   print_generic_expr (dump_file, stmt, TDF_SLIM);
3658                 }
3659               return false;
3660             }
3661
3662           if (STMT_VINFO_DATA_REF (stmt_info))
3663             scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3664           else if (TREE_CODE (stmt) == MODIFY_EXPR)
3665             scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3666           else
3667             scalar_type = TREE_TYPE (stmt);
3668
3669           if (vect_debug_details (NULL))
3670             {
3671               fprintf (dump_file, "get vectype for scalar type:  ");
3672               print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3673             }
3674
3675           vectype = get_vectype_for_scalar_type (scalar_type);
3676           if (!vectype)
3677             {
3678               if (vect_debug_stats (loop) || vect_debug_details (loop))
3679                 {
3680                   fprintf (dump_file, "not vectorized: unsupported data-type ");
3681                   print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3682                 }
3683               return false;
3684             }
3685
3686           if (vect_debug_details (NULL))
3687             {
3688               fprintf (dump_file, "vectype: ");
3689               print_generic_expr (dump_file, vectype, TDF_SLIM);
3690             }
3691           STMT_VINFO_VECTYPE (stmt_info) = vectype;
3692
3693           ok = (vectorizable_operation (stmt, NULL, NULL)
3694                 || vectorizable_assignment (stmt, NULL, NULL)
3695                 || vectorizable_load (stmt, NULL, NULL)
3696                 || vectorizable_store (stmt, NULL, NULL));
3697
3698           if (!ok)
3699             {
3700               if (vect_debug_stats (loop) || vect_debug_details (loop))
3701                 {
3702                   fprintf (dump_file, "not vectorized: stmt not supported: ");
3703                   print_generic_expr (dump_file, stmt, TDF_SLIM);
3704                 }
3705               return false;
3706             }
3707
3708           nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3709           if (vect_debug_details (NULL))
3710             fprintf (dump_file, "nunits = %d", nunits);
3711
3712           if (vectorization_factor)
3713             {
3714               /* FORNOW: don't allow mixed units.
3715                  This restriction will be relaxed in the future.  */
3716               if (nunits != vectorization_factor)
3717                 {
3718                   if (vect_debug_stats (loop) || vect_debug_details (loop))
3719                     fprintf (dump_file, "not vectorized: mixed data-types");
3720                   return false;
3721                 }
3722             }
3723           else
3724             vectorization_factor = nunits;
3725
3726 #ifdef ENABLE_CHECKING
3727           gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3728                         * vectorization_factor == UNITS_PER_SIMD_WORD);
3729 #endif
3730         }
3731     }
3732
3733   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
3734
3735   if (vectorization_factor <= 1)
3736     {
3737       if (vect_debug_stats (loop) || vect_debug_details (loop))
3738         fprintf (dump_file, "not vectorized: unsupported data-type");
3739       return false;
3740     }
3741   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3742
3743   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3744     fprintf (dump_file,
3745         "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3746         vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3747
3748   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3749       && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
3750     {
3751       if (vect_debug_stats (loop) || vect_debug_details (loop))
3752         fprintf (dump_file, "not vectorized: iteration count too small.");
3753       return false;
3754     }
3755
3756   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3757       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3758     {
3759       if (vect_debug_stats (loop) || vect_debug_details (loop))
3760         fprintf (dump_file, "epilog loop required.");
3761       if (!vect_can_advance_ivs_p (loop))
3762         {
3763           if (vect_debug_stats (loop) || vect_debug_details (loop))
3764             fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3765           return false;
3766         }
3767       if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3768         {
3769           if (vect_debug_stats (loop) || vect_debug_details (loop))
3770             fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3771           return false;
3772         }
3773     }
3774
3775   return true;
3776 }
3777
3778
3779 /* Function exist_non_indexing_operands_for_use_p
3780
3781    USE is one of the uses attached to STMT. Check if USE is
3782    used in STMT for anything other than indexing an array.  */
3783
3784 static bool
3785 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3786 {
3787   tree operand;
3788   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3789
3790   /* USE corresponds to some operand in STMT. If there is no data
3791      reference in STMT, then any operand that corresponds to USE
3792      is not indexing an array.  */
3793   if (!STMT_VINFO_DATA_REF (stmt_info))
3794     return true;
3795
3796   /* STMT has a data_ref. FORNOW this means that its of one of
3797      the following forms:
3798      -1- ARRAY_REF = var
3799      -2- var = ARRAY_REF
3800      (This should have been verified in analyze_data_refs).
3801
3802      'var' in the second case corresponds to a def, not a use,
3803      so USE cannot correspond to any operands that are not used
3804      for array indexing.
3805
3806      Therefore, all we need to check is if STMT falls into the
3807      first case, and whether var corresponds to USE.  */
3808
3809   if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3810     return false;
3811
3812   operand = TREE_OPERAND (stmt, 1);
3813
3814   if (TREE_CODE (operand) != SSA_NAME)
3815     return false;
3816
3817   if (operand == use)
3818     return true;
3819
3820   return false;
3821 }
3822
3823
3824 /* Function vect_is_simple_iv_evolution.
3825
3826    FORNOW: A simple evolution of an induction variables in the loop is
3827    considered a polynomial evolution with constant step.  */
3828
3829 static bool
3830 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3831                                 tree * step, bool strict)
3832 {
3833   tree init_expr;
3834   tree step_expr;
3835
3836   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3837
3838   /* When there is no evolution in this loop, the evolution function
3839      is not "simple".  */
3840   if (evolution_part == NULL_TREE)
3841     return false;
3842
3843   /* When the evolution is a polynomial of degree >= 2
3844      the evolution function is not "simple".  */
3845   if (tree_is_chrec (evolution_part))
3846     return false;
3847
3848   step_expr = evolution_part;
3849   init_expr = unshare_expr (initial_condition (access_fn));
3850
3851   if (vect_debug_details (NULL))
3852     {
3853       fprintf (dump_file, "step: ");
3854       print_generic_expr (dump_file, step_expr, TDF_SLIM);
3855       fprintf (dump_file, ",  init: ");
3856       print_generic_expr (dump_file, init_expr, TDF_SLIM);
3857     }
3858
3859   *init = init_expr;
3860   *step = step_expr;
3861
3862   if (TREE_CODE (step_expr) != INTEGER_CST)
3863     {
3864       if (vect_debug_details (NULL))
3865         fprintf (dump_file, "step unknown.");
3866       return false;
3867     }
3868
3869   if (strict)
3870     if (!integer_onep (step_expr))
3871       {
3872         if (vect_debug_details (NULL))
3873           print_generic_expr (dump_file, step_expr, TDF_SLIM);
3874         return false;
3875       }
3876
3877   return true;
3878 }
3879
3880
3881 /* Function vect_analyze_scalar_cycles.
3882
3883    Examine the cross iteration def-use cycles of scalar variables, by
3884    analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3885    cycles that they represent do not impede vectorization.
3886
3887    FORNOW: Reduction as in the following loop, is not supported yet:
3888               loop1:
3889               for (i=0; i<N; i++)
3890                  sum += a[i];
3891            The cross-iteration cycle corresponding to variable 'sum' will be
3892            considered too complicated and will impede vectorization.
3893
3894    FORNOW: Induction as in the following loop, is not supported yet:
3895               loop2:
3896               for (i=0; i<N; i++)
3897                  a[i] = i;
3898
3899            However, the following loop *is* vectorizable:
3900               loop3:
3901               for (i=0; i<N; i++)
3902                  a[i] = b[i];
3903
3904            In both loops there exists a def-use cycle for the variable i:
3905               loop: i_2 = PHI (i_0, i_1)
3906                     a[i_2] = ...;
3907                     i_1 = i_2 + 1;
3908                     GOTO loop;
3909
3910            The evolution of the above cycle is considered simple enough,
3911            however, we also check that the cycle does not need to be
3912            vectorized, i.e - we check that the variable that this cycle
3913            defines is only used for array indexing or in stmts that do not
3914            need to be vectorized. This is not the case in loop2, but it
3915            *is* the case in loop3.  */
3916
3917 static bool
3918 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3919 {
3920   tree phi;
3921   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3922   basic_block bb = loop->header;
3923   tree dummy;
3924
3925   if (vect_debug_details (NULL))
3926     fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3927
3928   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3929     {
3930       tree access_fn = NULL;
3931
3932       if (vect_debug_details (NULL))
3933         {
3934           fprintf (dump_file, "Analyze phi: ");
3935           print_generic_expr (dump_file, phi, TDF_SLIM);
3936         }
3937
3938       /* Skip virtual phi's. The data dependences that are associated with
3939          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
3940
3941       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3942         {
3943           if (vect_debug_details (NULL))
3944             fprintf (dump_file, "virtual phi. skip.");
3945           continue;
3946         }
3947
3948       /* Analyze the evolution function.  */
3949
3950       /* FORNOW: The only scalar cross-iteration cycles that we allow are
3951          those of loop induction variables; This property is verified here.
3952
3953          Furthermore, if that induction variable is used in an operation
3954          that needs to be vectorized (i.e, is not solely used to index
3955          arrays and check the exit condition) - we do not support its
3956          vectorization yet. This property is verified in vect_is_simple_use,
3957          during vect_analyze_operations.  */
3958
3959       access_fn = /* instantiate_parameters
3960                      (loop,*/
3961          analyze_scalar_evolution (loop, PHI_RESULT (phi));
3962
3963       if (!access_fn)
3964         {
3965           if (vect_debug_stats (loop) || vect_debug_details (loop))
3966             fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3967           return false;
3968         }
3969
3970       if (vect_debug_details (NULL))
3971         {
3972            fprintf (dump_file, "Access function of PHI: ");
3973            print_generic_expr (dump_file, access_fn, TDF_SLIM);
3974         }
3975
3976       if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3977                                         &dummy, false))
3978         {
3979           if (vect_debug_stats (loop) || vect_debug_details (loop))
3980             fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3981           return false;
3982         }
3983     }
3984
3985   return true;
3986 }
3987
3988
3989 /* Function vect_analyze_data_ref_dependence.
3990
3991    Return TRUE if there (might) exist a dependence between a memory-reference
3992    DRA and a memory-reference DRB.  */
3993
3994 static bool
3995 vect_analyze_data_ref_dependence (struct data_reference *dra,
3996                                   struct data_reference *drb,
3997                                   struct loop *loop)
3998 {
3999   bool differ_p;
4000   struct data_dependence_relation *ddr;
4001
4002   if (!array_base_name_differ_p (dra, drb, &differ_p))
4003     {
4004       if (vect_debug_stats (loop) || vect_debug_details (loop))
4005         {
4006           fprintf (dump_file,
4007                 "not vectorized: can't determine dependence between: ");
4008           print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
4009           fprintf (dump_file, " and ");
4010           print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
4011         }
4012       return true;
4013     }
4014
4015   if (differ_p)
4016     return false;
4017
4018   ddr = initialize_data_dependence_relation (dra, drb);
4019   compute_affine_dependence (ddr);
4020
4021   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4022     return false;
4023
4024   if (vect_debug_stats (loop) || vect_debug_details (loop))
4025     {
4026       fprintf (dump_file,
4027         "not vectorized: possible dependence between data-refs ");
4028       print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
4029       fprintf (dump_file, " and ");
4030       print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
4031     }
4032
4033   return true;
4034 }
4035
4036
4037 /* Function vect_analyze_data_ref_dependences.
4038
4039    Examine all the data references in the loop, and make sure there do not
4040    exist any data dependences between them.
4041
4042    TODO: dependences which distance is greater than the vectorization factor
4043          can be ignored.  */
4044
4045 static bool
4046 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
4047 {
4048   unsigned int i, j;
4049   varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4050   varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4051   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4052
4053   /* Examine store-store (output) dependences.  */
4054
4055   if (vect_debug_details (NULL))
4056     fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
4057
4058   if (vect_debug_details (NULL))
4059     fprintf (dump_file, "compare all store-store pairs.");
4060
4061   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
4062     {
4063       for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4064         {
4065           struct data_reference *dra =
4066             VARRAY_GENERIC_PTR (loop_write_refs, i);
4067           struct data_reference *drb =
4068             VARRAY_GENERIC_PTR (loop_write_refs, j);
4069           if (vect_analyze_data_ref_dependence (dra, drb, loop))
4070             return false;
4071         }
4072     }
4073
4074   /* Examine load-store (true/anti) dependences.  */
4075
4076   if (vect_debug_details (NULL))
4077     fprintf (dump_file, "compare all load-store pairs.");
4078
4079   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
4080     {
4081       for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4082         {
4083           struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
4084           struct data_reference *drb =
4085             VARRAY_GENERIC_PTR (loop_write_refs, j);
4086           if (vect_analyze_data_ref_dependence (dra, drb, loop))
4087             return false;
4088         }
4089     }
4090
4091   return true;
4092 }
4093
4094
4095 /* Function vect_compute_data_ref_alignment
4096
4097    Compute the misalignment of the data reference DR.
4098
4099    Output:
4100    1. If during the misalignment computation it is found that the data reference
4101       cannot be vectorized then false is returned.
4102    2. DR_MISALIGNMENT (DR) is defined.
4103
4104    FOR NOW: No analysis is actually performed. Misalignment is calculated
4105    only for trivial cases. TODO.  */
4106
4107 static bool
4108 vect_compute_data_ref_alignment (struct data_reference *dr)
4109 {
4110   tree stmt = DR_STMT (dr);
4111   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4112   tree ref = DR_REF (dr);
4113   tree vectype;
4114   tree base, alignment;
4115   bool base_aligned_p;
4116   tree misalign;
4117
4118   if (vect_debug_details (NULL))
4119     fprintf (dump_file, "vect_compute_data_ref_alignment:");
4120
4121   /* Initialize misalignment to unknown.  */
4122   DR_MISALIGNMENT (dr) = -1;
4123
4124   misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
4125   base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
4126   base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4127   vectype = STMT_VINFO_VECTYPE (stmt_info);
4128
4129   if (!misalign)
4130     {
4131       if (vect_debug_details (NULL))
4132         {
4133           fprintf (dump_file, "Unknown alignment for access: ");
4134           print_generic_expr (dump_file, base, TDF_SLIM);
4135         }
4136       return true;
4137     }
4138
4139   if (!base_aligned_p)
4140     {
4141       if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4142         {
4143           if (vect_debug_details (NULL))
4144             {
4145               fprintf (dump_file, "can't force alignment of ref: ");
4146               print_generic_expr (dump_file, ref, TDF_SLIM);
4147             }
4148           return true;
4149         }
4150
4151       /* Force the alignment of the decl.
4152          NOTE: This is the only change to the code we make during
4153          the analysis phase, before deciding to vectorize the loop.  */
4154       if (vect_debug_details (NULL))
4155         fprintf (dump_file, "force alignment");
4156       DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4157       DECL_USER_ALIGN (base) = 1;
4158     }
4159
4160   /* At this point we assume that the base is aligned.  */
4161   gcc_assert (base_aligned_p
4162               || (TREE_CODE (base) == VAR_DECL
4163                   && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4164
4165   /* Alignment required, in bytes:  */
4166   alignment = size_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
4167
4168   /* Modulo alignment.  */
4169   misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
4170   if (tree_int_cst_sgn (misalign) < 0)
4171     {
4172       /* Negative misalignment value.  */
4173       if (vect_debug_details (NULL))
4174         fprintf (dump_file, "unexpected misalign value");
4175       return false;
4176     }
4177
4178   DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
4179
4180   if (vect_debug_details (NULL))
4181     fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4182
4183   return true;
4184 }
4185
4186
4187 /* Function vect_compute_data_refs_alignment
4188
4189    Compute the misalignment of data references in the loop.
4190    This pass may take place at function granularity instead of at loop
4191    granularity.
4192
4193    FOR NOW: No analysis is actually performed. Misalignment is calculated
4194    only for trivial cases. TODO.  */
4195
4196 static bool
4197 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4198 {
4199   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4200   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4201   unsigned int i;
4202
4203   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4204     {
4205       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4206       if (!vect_compute_data_ref_alignment (dr))
4207         return false;
4208     }
4209
4210   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4211     {
4212       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4213       if (!vect_compute_data_ref_alignment (dr))
4214         return false;
4215     }
4216
4217   return true;
4218 }
4219
4220
4221 /* Function vect_enhance_data_refs_alignment
4222
4223    This pass will use loop versioning and loop peeling in order to enhance
4224    the alignment of data references in the loop.
4225
4226    FOR NOW: we assume that whatever versioning/peeling takes place, only the
4227    original loop is to be vectorized; Any other loops that are created by
4228    the transformations performed in this pass - are not supposed to be
4229    vectorized. This restriction will be relaxed.  */
4230
4231 static void
4232 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4233 {
4234   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4235   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4236   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4237   unsigned int i;
4238
4239   /*
4240      This pass will require a cost model to guide it whether to apply peeling
4241      or versioning or a combination of the two. For example, the scheme that
4242      intel uses when given a loop with several memory accesses, is as follows:
4243      choose one memory access ('p') which alignment you want to force by doing
4244      peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4245      other accesses are not necessarily aligned, or (2) use loop versioning to
4246      generate one loop in which all accesses are aligned, and another loop in
4247      which only 'p' is necessarily aligned.
4248
4249      ("Automatic Intra-Register Vectorization for the Intel Architecture",
4250       Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4251       Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4252
4253      Devising a cost model is the most critical aspect of this work. It will
4254      guide us on which access to peel for, whether to use loop versioning, how
4255      many versions to create, etc. The cost model will probably consist of
4256      generic considerations as well as target specific considerations (on
4257      powerpc for example, misaligned stores are more painful than misaligned
4258      loads).
4259
4260      Here is the general steps involved in alignment enhancements:
4261
4262      -- original loop, before alignment analysis:
4263         for (i=0; i<N; i++){
4264           x = q[i];                     # DR_MISALIGNMENT(q) = unknown
4265           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
4266         }
4267
4268      -- After vect_compute_data_refs_alignment:
4269         for (i=0; i<N; i++){
4270           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4271           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
4272         }
4273
4274      -- Possibility 1: we do loop versioning:
4275      if (p is aligned) {
4276         for (i=0; i<N; i++){    # loop 1A
4277           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4278           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
4279         }
4280      }
4281      else {
4282         for (i=0; i<N; i++){    # loop 1B
4283           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4284           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
4285         }
4286      }
4287
4288      -- Possibility 2: we do loop peeling:
4289      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
4290         x = q[i];
4291         p[i] = y;
4292      }
4293      for (i = 3; i < N; i++){   # loop 2A
4294         x = q[i];                       # DR_MISALIGNMENT(q) = 0
4295         p[i] = y;                       # DR_MISALIGNMENT(p) = unknown
4296      }
4297
4298      -- Possibility 3: combination of loop peeling and versioning:
4299      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
4300         x = q[i];
4301         p[i] = y;
4302      }
4303      if (p is aligned) {
4304         for (i = 3; i<N; i++){  # loop 3A
4305           x = q[i];                     # DR_MISALIGNMENT(q) = 0
4306           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
4307         }
4308      }
4309      else {
4310         for (i = 3; i<N; i++){  # loop 3B
4311           x = q[i];                     # DR_MISALIGNMENT(q) = 0
4312           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
4313         }
4314      }
4315
4316      These loops are later passed to loop_transform to be vectorized. The
4317      vectorizer will use the alignment information to guide the transformation
4318      (whether to generate regular loads/stores, or with special handling for
4319      misalignment).
4320    */
4321
4322   /* (1) Peeling to force alignment.  */
4323
4324   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4325      Considerations:
4326      + How many accesses will become aligned due to the peeling
4327      - How many accesses will become unaligned due to the peeling,
4328        and the cost of misaligned accesses.
4329      - The cost of peeling (the extra runtime checks, the increase
4330        in code size).
4331
4332      The scheme we use FORNOW: peel to force the alignment of the first
4333      misaligned store in the loop.
4334      Rationale: misaligned stores are not yet supported.
4335
4336      TODO: Use a better cost model.  */
4337
4338   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4339     {
4340       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4341       if (!aligned_access_p (dr))
4342         {
4343           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4344           LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4345           break;
4346         }
4347     }
4348
4349   if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4350     {
4351       if (vect_debug_details (loop))
4352         fprintf (dump_file, "Peeling for alignment will not be applied.");
4353       return;
4354     }
4355   else
4356     if (vect_debug_details (loop))
4357       fprintf (dump_file, "Peeling for alignment will be applied.");
4358
4359
4360   /* (1.2) Update the alignment info according to the peeling factor.
4361            If the misalignment of the DR we peel for is M, then the
4362            peeling factor is VF - M, and the misalignment of each access DR_i
4363            in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4364            If the misalignment of the DR we peel for is unknown, then the
4365            misalignment of each access DR_i in the loop is also unknown.
4366
4367            FORNOW: set the misalignment of the accesses to unknown even
4368                    if the peeling factor is known at compile time.
4369
4370            TODO: - if the peeling factor is known at compile time, use that
4371                    when updating the misalignment info of the loop DRs.
4372                  - consider accesses that are known to have the same
4373                    alignment, even if that alignment is unknown.  */
4374
4375   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4376     {
4377       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4378       if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4379         DR_MISALIGNMENT (dr) = 0;
4380       else
4381         DR_MISALIGNMENT (dr) = -1;
4382     }
4383   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4384     {
4385       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4386       if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4387         DR_MISALIGNMENT (dr) = 0;
4388       else
4389         DR_MISALIGNMENT (dr) = -1;
4390     }
4391 }
4392
4393
4394 /* Function vect_analyze_data_refs_alignment
4395
4396    Analyze the alignment of the data-references in the loop.
4397    FOR NOW: Until support for misliagned accesses is in place, only if all
4398    accesses are aligned can the loop be vectorized. This restriction will be
4399    relaxed.  */
4400
4401 static bool
4402 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4403 {
4404   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4405   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4406   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4407   enum dr_alignment_support supportable_dr_alignment;
4408   unsigned int i;
4409
4410   if (vect_debug_details (NULL))
4411     fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4412
4413
4414   /* This pass may take place at function granularity instead of at loop
4415      granularity.  */
4416
4417   if (!vect_compute_data_refs_alignment (loop_vinfo))
4418     {
4419       if (vect_debug_details (loop) || vect_debug_stats (loop))
4420         fprintf (dump_file,
4421                  "not vectorized: can't calculate alignment for data ref.");
4422       return false;
4423     }
4424
4425
4426   /* This pass will decide on using loop versioning and/or loop peeling in
4427      order to enhance the alignment of data references in the loop.  */
4428
4429   vect_enhance_data_refs_alignment (loop_vinfo);
4430
4431
4432   /* Finally, check that all the data references in the loop can be
4433      handled with respect to their alignment.  */
4434
4435   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4436     {
4437       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4438       supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4439       if (!supportable_dr_alignment)
4440         {
4441           if (vect_debug_details (loop) || vect_debug_stats (loop))
4442             fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4443           return false;
4444         }
4445     }
4446   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4447     {
4448       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4449       supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4450       if (!supportable_dr_alignment)
4451         {
4452           if (vect_debug_details (loop) || vect_debug_stats (loop))
4453             fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4454           return false;
4455         }
4456     }
4457
4458   return true;
4459 }
4460
4461
4462 /* Function vect_analyze_data_ref_access.
4463
4464    Analyze the access pattern of the data-reference DR. For now, a data access
4465    has to consecutive to be considered vectorizable.  */
4466
4467 static bool
4468 vect_analyze_data_ref_access (struct data_reference *dr)
4469 {
4470   tree stmt = DR_STMT (dr);
4471   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4472   tree step = STMT_VINFO_VECT_STEP (stmt_info);
4473   tree scalar_type = TREE_TYPE (DR_REF (dr));
4474
4475   if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
4476     {
4477       if (vect_debug_details (NULL))
4478         fprintf (dump_file, "not consecutive access");
4479       return false;
4480     }
4481   return true;
4482 }
4483
4484
4485 /* Function vect_analyze_data_ref_accesses.
4486
4487    Analyze the access pattern of all the data references in the loop.
4488
4489    FORNOW: the only access pattern that is considered vectorizable is a
4490            simple step 1 (consecutive) access.
4491
4492    FORNOW: handle only arrays and pointer accesses.  */
4493
4494 static bool
4495 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4496 {
4497   unsigned int i;
4498   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4499   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4500
4501   if (vect_debug_details (NULL))
4502     fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4503
4504   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4505     {
4506       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4507       bool ok = vect_analyze_data_ref_access (dr);
4508       if (!ok)
4509         {
4510           if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4511               || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4512             fprintf (dump_file, "not vectorized: complicated access pattern.");
4513           return false;
4514         }
4515     }
4516
4517   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4518     {
4519       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4520       bool ok = vect_analyze_data_ref_access (dr);
4521       if (!ok)
4522         {
4523           if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4524               || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4525             fprintf (dump_file, "not vectorized: complicated access pattern.");
4526           return false;
4527         }
4528     }
4529
4530   return true;
4531 }
4532
4533
4534 /* Function vect_analyze_pointer_ref_access.
4535
4536    Input:
4537    STMT - a stmt that contains a data-ref
4538    MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4539
4540    If the data-ref access is vectorizable, return a data_reference structure
4541    that represents it (DR). Otherwise - return NULL.  */
4542
4543 static struct data_reference *
4544 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4545 {
4546   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4547   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4548   tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4549   tree init, step;
4550   tree reftype, innertype;
4551   tree indx_access_fn;
4552   int loopnum = loop->num;
4553   struct data_reference *dr;
4554
4555   if (!access_fn)
4556     {
4557       if (vect_debug_stats (loop) || vect_debug_details (loop))
4558         fprintf (dump_file, "not vectorized: complicated pointer access.");
4559       return NULL;
4560     }
4561
4562   if (vect_debug_details (NULL))
4563     {
4564       fprintf (dump_file, "Access function of ptr: ");
4565       print_generic_expr (dump_file, access_fn, TDF_SLIM);
4566     }
4567
4568   if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4569     {
4570       if (vect_debug_stats (loop) || vect_debug_details (loop))
4571         fprintf (dump_file, "not vectorized: pointer access is not simple.");
4572       return NULL;
4573     }
4574
4575   STRIP_NOPS (init);
4576
4577   if (!TREE_CONSTANT (step))
4578     {
4579       if (vect_debug_stats (loop) || vect_debug_details (loop))
4580         fprintf (dump_file,
4581                 "not vectorized: non constant step for pointer access.");
4582       return NULL;
4583     }
4584
4585   reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4586   if (TREE_CODE (reftype) != POINTER_TYPE)
4587     {
4588       if (vect_debug_stats (loop) || vect_debug_details (loop))
4589         fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4590       return NULL;
4591     }
4592
4593   reftype = TREE_TYPE (init);
4594   if (TREE_CODE (reftype) != POINTER_TYPE)
4595     {
4596       if (vect_debug_stats (loop) || vect_debug_details (loop))
4597         fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4598       return NULL;
4599     }
4600
4601   innertype = TREE_TYPE (reftype);
4602   if (tree_int_cst_compare (TYPE_SIZE_UNIT (innertype), step))
4603     {
4604       /* FORNOW: support only consecutive access */
4605       if (vect_debug_stats (loop) || vect_debug_details (loop))
4606         fprintf (dump_file, "not vectorized: non consecutive access.");
4607       return NULL;
4608     }
4609
4610   STMT_VINFO_VECT_STEP (stmt_info) = fold_convert (sizetype, step);
4611   if (TREE_CODE (init) == PLUS_EXPR
4612       || TREE_CODE (init) == MINUS_EXPR)
4613     STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4614       fold (size_binop (TREE_CODE (init), size_zero_node,
4615                         fold_convert (sizetype, TREE_OPERAND (init, 1))));
4616   else
4617     STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = size_zero_node;
4618
4619   indx_access_fn =
4620         build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4621   if (vect_debug_details (NULL))
4622     {
4623       fprintf (dump_file, "Access function of ptr indx: ");
4624       print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4625     }
4626   dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4627   return dr;
4628 }
4629
4630
4631 /* Function vect_get_memtag_and_dr.
4632
4633    The function returns the relevant variable for memory tag (for aliasing
4634    purposes). Also data reference structure DR is created.
4635
4636    This function handles three kinds of MEMREF:
4637
4638    It is called from vect_analyze_data_refs with a MEMREF that is either an
4639    ARRAY_REF or an INDIRECT_REF (this is category 1 - "recursion begins").
4640    It builds a DR for them using vect_get_base_and_offset, and calls itself
4641    recursively to retrieve the relevant memtag for the MEMREF, "peeling" the
4642    MEMREF along the way. During the recursive calls, the function may be called
4643    with a MEMREF for which the recursion has to continue - PLUS_EXPR,
4644    MINUS_EXPR, INDIRECT_REF (category 2 - "recursion continues"),
4645    and/or with a MEMREF for which a memtag can be trivially obtained - VAR_DECL
4646    and SSA_NAME (this is category 3 - "recursion stop condition").
4647
4648    When the MEMREF falls into category 1 there is still no data reference struct
4649    (DR) available. It is created by this function, and then, along the recursion,
4650    MEMREF will fall into category 2 or 3, in which case a DR will have already
4651    been created, but the analysis continues to retrieve the MEMTAG.
4652
4653    Input:
4654    MEMREF - data reference in STMT
4655    IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4656
4657    Output:
4658    DR - data_reference struct for MEMREF
4659    return value - the relevant variable for memory tag (for aliasing purposes).
4660
4661 */
4662
4663 static tree
4664 vect_get_memtag_and_dr (tree memref, tree stmt, bool is_read,
4665                         loop_vec_info loop_vinfo,
4666                         tree vectype, struct data_reference **dr)
4667 {
4668   tree symbl, oprnd0, oprnd1;
4669   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4670   tree offset, misalign, step;
4671   tree ref_to_be_analyzed, tag, dr_base;
4672   struct data_reference *new_dr;
4673   bool base_aligned_p;
4674
4675   if (*dr)
4676     {
4677       /* Category 3: recursion stop condition.  */
4678       /* (1) A DR already exists. We only need to get the relevant memtag for
4679          MEMREF, the rest of the data was already initialized.  */
4680
4681       switch (TREE_CODE (memref))
4682         {
4683           /* (1.1) Stop condition: find the relevant memtag and return.  */
4684         case SSA_NAME:
4685           symbl = SSA_NAME_VAR (memref);
4686           tag = get_var_ann (symbl)->type_mem_tag;
4687           if (!tag)
4688             {
4689               tree ptr = TREE_OPERAND (DR_REF ((*dr)), 0);
4690               if (TREE_CODE (ptr) == SSA_NAME)
4691                 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
4692             }
4693           if (!tag)
4694             {
4695               if (vect_debug_details (NULL))
4696                 fprintf (dump_file, "not vectorized: no memtag for ref.");
4697               return NULL_TREE;
4698             }
4699           return tag;
4700
4701         case VAR_DECL:
4702         case PARM_DECL:
4703           return memref;
4704
4705           /* Category 2: recursion continues.  */
4706           /* (1.2) A recursive call to find the relevant memtag is required.  */
4707         case INDIRECT_REF:
4708           symbl = TREE_OPERAND (memref, 0);
4709           break; /* For recursive call.  */
4710
4711         case COMPONENT_REF:
4712           /* Could have recorded more accurate information -
4713              i.e, the actual FIELD_DECL that is being referenced -
4714              but later passes expect VAR_DECL as the nmt.  */
4715           /* Fall through.  */
4716
4717         case ADDR_EXPR:
4718           symbl = STMT_VINFO_VECT_DR_BASE (stmt_info);
4719           break; /* For recursive call.  */
4720
4721         case PLUS_EXPR:
4722         case MINUS_EXPR:
4723           /* Although DR exists, we have to call the function recursively to
4724              build MEMTAG for such expression. This is handled below.  */
4725           oprnd0 = TREE_OPERAND (memref, 0);
4726           oprnd1 = TREE_OPERAND (memref, 1);
4727
4728           STRIP_NOPS (oprnd1);
4729            /* Supported plus/minus expressions are of the form
4730              {address_base + offset}, such that address_base is of type
4731              POINTER/ARRAY, and offset is either an INTEGER_CST of type POINTER,
4732              or it's not of type POINTER/ARRAY.
4733              TODO: swap operands if {offset + address_base}.  */
4734           if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4735                && TREE_CODE (oprnd1) != INTEGER_CST)
4736               || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4737             return NULL_TREE;
4738
4739           symbl = oprnd0;
4740           break; /* For recursive call.  */
4741
4742         default:
4743           return NULL_TREE;
4744         }
4745     }
4746   else
4747     {
4748       /* Category 1: recursion begins.  */
4749       /* (2) A DR does not exist yet and must be built, followed by a
4750          recursive call to get the relevant memtag for MEMREF.  */
4751
4752       switch (TREE_CODE (memref))
4753         {
4754         case INDIRECT_REF:
4755           new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4756           if (!new_dr)
4757             return NULL_TREE;
4758           *dr = new_dr;
4759           symbl = DR_BASE_NAME (new_dr);
4760           ref_to_be_analyzed = DR_BASE_NAME (new_dr);
4761           break;
4762
4763         case ARRAY_REF:
4764           new_dr = analyze_array (stmt, memref, is_read);
4765           *dr = new_dr;
4766           symbl = DR_BASE_NAME (new_dr);
4767           ref_to_be_analyzed = memref;
4768           break;
4769
4770         default:
4771           /* TODO: Support data-refs of form a[i].p for unions and single
4772              field structures.  */
4773           return NULL_TREE;
4774         }
4775
4776       offset = size_zero_node;
4777       misalign = size_zero_node;
4778       step = size_zero_node;
4779
4780       /* Analyze data-ref, find its base, initial offset from the base, step,
4781          and alignment.  */
4782       dr_base = vect_get_base_and_offset (new_dr, ref_to_be_analyzed,
4783                                           vectype, loop_vinfo, &offset,
4784                                           &misalign, &step, &base_aligned_p);
4785       if (!dr_base)
4786         return NULL_TREE;
4787
4788       /* Initialize information according to above analysis.  */
4789       /* Since offset and step of a pointer can be also set in
4790          vect_analyze_pointer_ref_access, we combine the values here. */
4791       if (STMT_VINFO_VECT_INIT_OFFSET (stmt_info))
4792         STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4793           fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
4794                         STMT_VINFO_VECT_INIT_OFFSET (stmt_info)));
4795       else
4796         STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
4797
4798       if (step && STMT_VINFO_VECT_STEP (stmt_info))
4799         STMT_VINFO_VECT_STEP (stmt_info) =
4800           size_binop (PLUS_EXPR, step, STMT_VINFO_VECT_STEP (stmt_info));
4801       else
4802         STMT_VINFO_VECT_STEP (stmt_info) = step;
4803
4804       STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned_p;
4805       STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
4806       STMT_VINFO_VECT_DR_BASE (stmt_info) = dr_base;
4807     }
4808
4809   if (!symbl)
4810     return NULL_TREE;
4811   /* Recursive call to retrieve the relevant memtag.  */
4812   tag = vect_get_memtag_and_dr (symbl, stmt, is_read, loop_vinfo, vectype, dr);
4813   return tag;
4814 }
4815
4816
4817
4818 /* Function vect_analyze_data_refs.
4819
4820    Find all the data references in the loop.
4821
4822    The general structure of the analysis of data refs in the vectorizer is as
4823    follows:
4824    1- vect_analyze_data_refs(loop):
4825       Find and analyze all data-refs in the loop:
4826           foreach ref
4827              ref_stmt.memtag =  vect_get_memtag_and_dr (ref)
4828    1.1- vect_get_memtag_and_dr(ref):
4829       Analyze ref, and build a DR (data_referece struct) for it;
4830       call vect_get_base_and_offset to compute base, initial_offset,
4831       step and alignment. Set ref_stmt.base, ref_stmt.initial_offset,
4832       ref_stmt.alignment, and ref_stmt.step accordingly.
4833    1.1.1- vect_get_base_and_offset():
4834       Calculate base, initial_offset, step and alignment.
4835       For ARRAY_REFs and COMPONENT_REFs use call get_inner_reference.
4836    2- vect_analyze_dependences(): apply dependece testing using ref_stmt.DR
4837    3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
4838    4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
4839
4840    FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4841            which base is really an array (not a pointer) and which alignment
4842            can be forced. This restriction will be relaxed.  */
4843
4844 static bool
4845 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4846 {
4847   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4848   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4849   int nbbs = loop->num_nodes;
4850   block_stmt_iterator si;
4851   int j;
4852   struct data_reference *dr;
4853
4854   if (vect_debug_details (NULL))
4855     fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4856
4857   for (j = 0; j < nbbs; j++)
4858     {
4859       basic_block bb = bbs[j];
4860       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4861         {
4862           bool is_read = false;
4863           tree stmt = bsi_stmt (si);
4864           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4865           v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4866           v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4867           vuse_optype vuses = STMT_VUSE_OPS (stmt);
4868           varray_type *datarefs = NULL;
4869           int nvuses, nv_may_defs, nv_must_defs;
4870           tree memref = NULL;
4871           tree symbl;
4872           tree scalar_type, vectype;
4873
4874           /* Assumption: there exists a data-ref in stmt, if and only if
4875              it has vuses/vdefs.  */
4876
4877           if (!vuses && !v_may_defs && !v_must_defs)
4878             continue;
4879
4880           nvuses = NUM_VUSES (vuses);
4881           nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4882           nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4883
4884           if (nvuses && (nv_may_defs || nv_must_defs))
4885             {
4886               if (vect_debug_details (NULL))
4887                 {
4888                   fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4889                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4890                 }
4891               return false;
4892             }
4893
4894           if (TREE_CODE (stmt) != MODIFY_EXPR)
4895             {
4896               if (vect_debug_details (NULL))
4897                 {
4898                   fprintf (dump_file, "unexpected vops in stmt: ");
4899                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4900                 }
4901               return false;
4902             }
4903
4904           if (vuses)
4905             {
4906               memref = TREE_OPERAND (stmt, 1);
4907               datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4908               is_read = true;
4909             }
4910           else /* vdefs */
4911             {
4912               memref = TREE_OPERAND (stmt, 0);
4913               datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4914               is_read = false;
4915             }
4916
4917           scalar_type = TREE_TYPE (memref);
4918           vectype = get_vectype_for_scalar_type (scalar_type);
4919           if (!vectype)
4920             {
4921               if (vect_debug_details (NULL))
4922                 {
4923                   fprintf (dump_file, "no vectype for stmt: ");
4924                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4925                   fprintf (dump_file, " scalar_type: ");
4926                   print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4927                 }
4928               /* It is not possible to vectorize this data reference.  */
4929               return false;
4930             }
4931           /* Analyze MEMREF. If it is of a supported form, build data_reference
4932              struct for it (DR) and find memtag for aliasing purposes.  */
4933           dr = NULL;
4934           symbl = vect_get_memtag_and_dr (memref, stmt, is_read, loop_vinfo,
4935                                           vectype, &dr);
4936           if (!symbl)
4937             {
4938               if (vect_debug_stats (loop) || vect_debug_details (loop))
4939                 {
4940                   fprintf (dump_file, "not vectorized: unhandled data ref: ");
4941                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4942                 }
4943               return false;
4944             }
4945           STMT_VINFO_MEMTAG (stmt_info) = symbl;
4946           STMT_VINFO_VECTYPE (stmt_info) = vectype;
4947           VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
4948           STMT_VINFO_DATA_REF (stmt_info) = dr;
4949         }
4950     }
4951
4952   return true;
4953 }
4954
4955
4956 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
4957
4958 /* Function vect_mark_relevant.
4959
4960    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
4961
4962 static void
4963 vect_mark_relevant (varray_type *worklist, tree stmt)
4964 {
4965   stmt_vec_info stmt_info;
4966
4967   if (vect_debug_details (NULL))
4968     fprintf (dump_file, "mark relevant.");
4969
4970   if (TREE_CODE (stmt) == PHI_NODE)
4971     {
4972       VARRAY_PUSH_TREE (*worklist, stmt);
4973       return;
4974     }
4975
4976   stmt_info = vinfo_for_stmt (stmt);
4977
4978   if (!stmt_info)
4979     {
4980       if (vect_debug_details (NULL))
4981         {
4982           fprintf (dump_file, "mark relevant: no stmt info!!.");
4983           print_generic_expr (dump_file, stmt, TDF_SLIM);
4984         }
4985       return;
4986     }
4987
4988   if (STMT_VINFO_RELEVANT_P (stmt_info))
4989     {
4990       if (vect_debug_details (NULL))
4991         fprintf (dump_file, "already marked relevant.");
4992       return;
4993     }
4994
4995   STMT_VINFO_RELEVANT_P (stmt_info) = 1;
4996   VARRAY_PUSH_TREE (*worklist, stmt);
4997 }
4998
4999
5000 /* Function vect_stmt_relevant_p.
5001
5002    Return true if STMT in loop that is represented by LOOP_VINFO is
5003    "relevant for vectorization".
5004
5005    A stmt is considered "relevant for vectorization" if:
5006    - it has uses outside the loop.
5007    - it has vdefs (it alters memory).
5008    - control stmts in the loop (except for the exit condition).
5009
5010    CHECKME: what other side effects would the vectorizer allow?  */
5011
5012 static bool
5013 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5014 {
5015   v_may_def_optype v_may_defs;
5016   v_must_def_optype v_must_defs;
5017   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5018   int i;
5019   dataflow_t df;
5020   int num_uses;
5021
5022   /* cond stmt other than loop exit cond.  */
5023   if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5024     return true;
5025
5026   /* changing memory.  */
5027   v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5028   v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5029   if (v_may_defs || v_must_defs)
5030     {
5031       if (vect_debug_details (NULL))
5032         fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5033       return true;
5034     }
5035
5036   /* uses outside the loop.  */
5037   df = get_immediate_uses (stmt);
5038   num_uses = num_immediate_uses (df);
5039   for (i = 0; i < num_uses; i++)
5040     {
5041       tree use = immediate_use (df, i);
5042       basic_block bb = bb_for_stmt (use);
5043       if (!flow_bb_inside_loop_p (loop, bb))
5044         {
5045           if (vect_debug_details (NULL))
5046             fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5047           return true;
5048         }
5049     }
5050
5051   return false;
5052 }
5053
5054
5055 /* Function vect_mark_stmts_to_be_vectorized.
5056
5057    Not all stmts in the loop need to be vectorized. For example:
5058
5059      for i...
5060        for j...
5061    1.    T0 = i + j
5062    2.    T1 = a[T0]
5063
5064    3.    j = j + 1
5065
5066    Stmt 1 and 3 do not need to be vectorized, because loop control and
5067    addressing of vectorized data-refs are handled differently.
5068
5069    This pass detects such stmts.  */
5070
5071 static bool
5072 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5073 {
5074   varray_type worklist;
5075   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5076   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5077   unsigned int nbbs = loop->num_nodes;
5078   block_stmt_iterator si;
5079   tree stmt;
5080   stmt_ann_t ann;
5081   unsigned int i;
5082   int j;
5083   use_optype use_ops;
5084   stmt_vec_info stmt_info;
5085
5086   if (vect_debug_details (NULL))
5087     fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5088
5089   VARRAY_TREE_INIT (worklist, 64, "work list");
5090
5091   /* 1. Init worklist.  */
5092
5093   for (i = 0; i < nbbs; i++)
5094     {
5095       basic_block bb = bbs[i];
5096       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5097         {
5098           stmt = bsi_stmt (si);
5099
5100           if (vect_debug_details (NULL))
5101             {
5102               fprintf (dump_file, "init: stmt relevant? ");
5103               print_generic_expr (dump_file, stmt, TDF_SLIM);
5104             }
5105
5106           stmt_info = vinfo_for_stmt (stmt);
5107           STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5108
5109           if (vect_stmt_relevant_p (stmt, loop_vinfo))
5110             vect_mark_relevant (&worklist, stmt);
5111         }
5112     }
5113
5114
5115   /* 2. Process_worklist */
5116
5117   while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5118     {
5119       stmt = VARRAY_TOP_TREE (worklist);
5120       VARRAY_POP (worklist);
5121
5122       if (vect_debug_details (NULL))
5123         {
5124           fprintf (dump_file, "worklist: examine stmt: ");
5125           print_generic_expr (dump_file, stmt, TDF_SLIM);
5126         }
5127
5128       /* Examine the USES in this statement. Mark all the statements which
5129          feed this statement's uses as "relevant", unless the USE is used as
5130          an array index.  */
5131
5132       if (TREE_CODE (stmt) == PHI_NODE)
5133         {
5134           /* follow the def-use chain inside the loop.  */
5135           for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5136             {
5137               tree arg = PHI_ARG_DEF (stmt, j);
5138               tree def_stmt = NULL_TREE;
5139               basic_block bb;
5140               if (!vect_is_simple_use (arg, loop, &def_stmt))
5141                 {
5142                   if (vect_debug_details (NULL))
5143                     fprintf (dump_file, "worklist: unsupported use.");
5144                   varray_clear (worklist);
5145                   return false;
5146                 }
5147               if (!def_stmt)
5148                 continue;
5149
5150               if (vect_debug_details (NULL))
5151                 {
5152                   fprintf (dump_file, "worklist: def_stmt: ");
5153                   print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5154                 }
5155
5156               bb = bb_for_stmt (def_stmt);
5157               if (flow_bb_inside_loop_p (loop, bb))
5158                 vect_mark_relevant (&worklist, def_stmt);
5159             }
5160         }
5161
5162       ann = stmt_ann (stmt);
5163       use_ops = USE_OPS (ann);
5164
5165       for (i = 0; i < NUM_USES (use_ops); i++)
5166         {
5167           tree use = USE_OP (use_ops, i);
5168
5169           /* We are only interested in uses that need to be vectorized. Uses
5170              that are used for address computation are not considered relevant.
5171            */
5172           if (exist_non_indexing_operands_for_use_p (use, stmt))
5173             {
5174               tree def_stmt = NULL_TREE;
5175               basic_block bb;
5176               if (!vect_is_simple_use (use, loop, &def_stmt))
5177                 {
5178                   if (vect_debug_details (NULL))
5179                     fprintf (dump_file, "worklist: unsupported use.");
5180                   varray_clear (worklist);
5181                   return false;
5182                 }
5183
5184               if (!def_stmt)
5185                 continue;
5186
5187               if (vect_debug_details (NULL))
5188                 {
5189                   fprintf (dump_file, "worklist: examine use %d: ", i);
5190                   print_generic_expr (dump_file, use, TDF_SLIM);
5191                 }
5192
5193               bb = bb_for_stmt (def_stmt);
5194               if (flow_bb_inside_loop_p (loop, bb))
5195                 vect_mark_relevant (&worklist, def_stmt);
5196             }
5197         }
5198     }                           /* while worklist */
5199
5200   varray_clear (worklist);
5201   return true;
5202 }
5203
5204
5205 /* Function vect_can_advance_ivs_p
5206
5207    In case the number of iterations that LOOP iterates in unknown at compile
5208    time, an epilog loop will be generated, and the loop induction variables
5209    (IVs) will be "advanced" to the value they are supposed to take just before
5210    the epilog loop.  Here we check that the access function of the loop IVs
5211    and the expression that represents the loop bound are simple enough.
5212    These restrictions will be relaxed in the future.  */
5213
5214 static bool
5215 vect_can_advance_ivs_p (struct loop *loop)
5216 {
5217   basic_block bb = loop->header;
5218   tree phi;
5219
5220   /* Analyze phi functions of the loop header.  */
5221
5222   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5223     {
5224       tree access_fn = NULL;
5225       tree evolution_part;
5226
5227       if (vect_debug_details (NULL))
5228         {
5229           fprintf (dump_file, "Analyze phi: ");
5230           print_generic_expr (dump_file, phi, TDF_SLIM);
5231         }
5232
5233       /* Skip virtual phi's. The data dependences that are associated with
5234          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
5235
5236       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5237         {
5238           if (vect_debug_details (NULL))
5239             fprintf (dump_file, "virtual phi. skip.");
5240           continue;
5241         }
5242
5243       /* Analyze the evolution function.  */
5244
5245       access_fn = instantiate_parameters
5246         (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5247
5248       if (!access_fn)
5249         {
5250           if (vect_debug_details (NULL))
5251             fprintf (dump_file, "No Access function.");
5252           return false;
5253         }
5254
5255       if (vect_debug_details (NULL))
5256         {
5257           fprintf (dump_file, "Access function of PHI: ");
5258           print_generic_expr (dump_file, access_fn, TDF_SLIM);
5259         }
5260
5261       evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5262
5263       if (evolution_part == NULL_TREE)
5264         return false;
5265
5266       /* FORNOW: We do not transform initial conditions of IVs
5267          which evolution functions are a polynomial of degree >= 2.  */
5268
5269       if (tree_is_chrec (evolution_part))
5270         return false;
5271     }
5272
5273   return true;
5274 }
5275
5276
5277 /* Function vect_get_loop_niters.
5278
5279    Determine how many iterations the loop is executed.
5280    If an expression that represents the number of iterations
5281    can be constructed, place it in NUMBER_OF_ITERATIONS.
5282    Return the loop exit condition.  */
5283
5284 static tree
5285 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5286 {
5287   tree niters;
5288
5289   if (vect_debug_details (NULL))
5290     fprintf (dump_file, "\n<<get_loop_niters>>\n");
5291
5292   niters = number_of_iterations_in_loop (loop);
5293
5294   if (niters != NULL_TREE
5295       && niters != chrec_dont_know)
5296     {
5297       *number_of_iterations = niters;
5298
5299       if (vect_debug_details (NULL))
5300         {
5301           fprintf (dump_file, "==> get_loop_niters:" );
5302           print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5303         }
5304     }
5305
5306   return get_loop_exit_condition (loop);
5307 }
5308
5309
5310 /* Function vect_analyze_loop_form.
5311
5312    Verify the following restrictions (some may be relaxed in the future):
5313    - it's an inner-most loop
5314    - number of BBs = 2 (which are the loop header and the latch)
5315    - the loop has a pre-header
5316    - the loop has a single entry and exit
5317    - the loop exit condition is simple enough, and the number of iterations
5318      can be analyzed (a countable loop).  */
5319
5320 static loop_vec_info
5321 vect_analyze_loop_form (struct loop *loop)
5322 {
5323   loop_vec_info loop_vinfo;
5324   tree loop_cond;
5325   tree number_of_iterations = NULL;
5326   bool rescan = false;
5327
5328   if (vect_debug_details (loop))
5329     fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5330
5331   if (loop->inner
5332       || !loop->single_exit
5333       || loop->num_nodes != 2
5334       || EDGE_COUNT (loop->header->preds) != 2
5335       || loop->num_entries != 1)
5336     {
5337       if (vect_debug_stats (loop) || vect_debug_details (loop))
5338         {
5339           fprintf (dump_file, "not vectorized: bad loop form. ");
5340           if (loop->inner)
5341             fprintf (dump_file, "nested loop.");
5342           else if (!loop->single_exit)
5343             fprintf (dump_file, "multiple exits.");
5344           else if (loop->num_nodes != 2)
5345             fprintf (dump_file, "too many BBs in loop.");
5346           else if (EDGE_COUNT (loop->header->preds) != 2)
5347             fprintf (dump_file, "too many incoming edges.");
5348           else if (loop->num_entries != 1)
5349             fprintf (dump_file, "too many entries.");
5350         }
5351
5352       return NULL;
5353     }
5354
5355   /* We assume that the loop exit condition is at the end of the loop. i.e,
5356      that the loop is represented as a do-while (with a proper if-guard
5357      before the loop if needed), where the loop header contains all the
5358      executable statements, and the latch is empty.  */
5359   if (!empty_block_p (loop->latch))
5360     {
5361       if (vect_debug_stats (loop) || vect_debug_details (loop))
5362         fprintf (dump_file, "not vectorized: unexpectd loop form.");
5363       return NULL;
5364     }
5365
5366   /* Make sure we have a preheader basic block.  */
5367   if (!loop->pre_header)
5368     {
5369       rescan = true;
5370       loop_split_edge_with (loop_preheader_edge (loop), NULL);
5371     }
5372
5373   /* Make sure there exists a single-predecessor exit bb:  */
5374   if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5375     {
5376       rescan = true;
5377       loop_split_edge_with (loop->exit_edges[0], NULL);
5378     }
5379
5380   if (rescan)
5381     {
5382       flow_loop_scan (loop, LOOP_ALL);
5383       /* Flow loop scan does not update loop->single_exit field.  */
5384       loop->single_exit = loop->exit_edges[0];
5385     }
5386
5387   if (empty_block_p (loop->header))
5388     {
5389       if (vect_debug_stats (loop) || vect_debug_details (loop))
5390         fprintf (dump_file, "not vectorized: empty loop.");
5391       return NULL;
5392     }
5393
5394   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5395   if (!loop_cond)
5396     {
5397       if (vect_debug_stats (loop) || vect_debug_details (loop))
5398         fprintf (dump_file, "not vectorized: complicated exit condition.");
5399       return NULL;
5400     }
5401
5402   if (!number_of_iterations)
5403     {
5404       if (vect_debug_stats (loop) || vect_debug_details (loop))
5405         fprintf (dump_file,
5406                  "not vectorized: number of iterations cannot be computed.");
5407       return NULL;
5408     }
5409
5410   if (chrec_contains_undetermined (number_of_iterations))
5411     {
5412       if (vect_debug_details (NULL))
5413         fprintf (dump_file, "Infinite number of iterations.");
5414       return false;
5415     }
5416
5417   loop_vinfo = new_loop_vec_info (loop);
5418   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5419
5420   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5421     {
5422       if (vect_debug_details (loop))
5423         {
5424           fprintf (dump_file, "loop bound unknown.\n");
5425           fprintf (dump_file, "Symbolic number of iterations is ");
5426           print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5427         }
5428     }
5429   else
5430   if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5431     {
5432       if (vect_debug_stats (loop) || vect_debug_details (loop))
5433         fprintf (dump_file, "not vectorized: number of iterations = 0.");
5434       return NULL;
5435     }
5436
5437   LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5438
5439   return loop_vinfo;
5440 }
5441
5442
5443 /* Function vect_analyze_loop.
5444
5445    Apply a set of analyses on LOOP, and create a loop_vec_info struct
5446    for it. The different analyses will record information in the
5447    loop_vec_info struct.  */
5448
5449 static loop_vec_info
5450 vect_analyze_loop (struct loop *loop)
5451 {
5452   bool ok;
5453   loop_vec_info loop_vinfo;
5454
5455   if (vect_debug_details (NULL))
5456     fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5457
5458   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
5459
5460   loop_vinfo = vect_analyze_loop_form (loop);
5461   if (!loop_vinfo)
5462     {
5463       if (vect_debug_details (loop))
5464         fprintf (dump_file, "bad loop form.");
5465       return NULL;
5466     }
5467
5468   /* Find all data references in the loop (which correspond to vdefs/vuses)
5469      and analyze their evolution in the loop.
5470
5471      FORNOW: Handle only simple, array references, which
5472      alignment can be forced, and aligned pointer-references.  */
5473
5474   ok = vect_analyze_data_refs (loop_vinfo);
5475   if (!ok)
5476     {
5477       if (vect_debug_details (loop))
5478         fprintf (dump_file, "bad data references.");
5479       destroy_loop_vec_info (loop_vinfo);
5480       return NULL;
5481     }
5482
5483   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
5484
5485   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5486   if (!ok)
5487     {
5488       if (vect_debug_details (loop))
5489         fprintf (dump_file, "unexpected pattern.");
5490       if (vect_debug_details (loop))
5491         fprintf (dump_file, "not vectorized: unexpected pattern.");
5492       destroy_loop_vec_info (loop_vinfo);
5493       return NULL;
5494     }
5495
5496   /* Check that all cross-iteration scalar data-flow cycles are OK.
5497      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
5498
5499   ok = vect_analyze_scalar_cycles (loop_vinfo);
5500   if (!ok)
5501     {
5502       if (vect_debug_details (loop))
5503         fprintf (dump_file, "bad scalar cycle.");
5504       destroy_loop_vec_info (loop_vinfo);
5505       return NULL;
5506     }
5507
5508   /* Analyze data dependences between the data-refs in the loop.
5509      FORNOW: fail at the first data dependence that we encounter.  */
5510
5511   ok = vect_analyze_data_ref_dependences (loop_vinfo);
5512   if (!ok)
5513     {
5514       if (vect_debug_details (loop))
5515         fprintf (dump_file, "bad data dependence.");
5516       destroy_loop_vec_info (loop_vinfo);
5517       return NULL;
5518     }
5519
5520   /* Analyze the access patterns of the data-refs in the loop (consecutive,
5521      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
5522
5523   ok = vect_analyze_data_ref_accesses (loop_vinfo);
5524   if (!ok)
5525     {
5526       if (vect_debug_details (loop))
5527         fprintf (dump_file, "bad data access.");
5528       destroy_loop_vec_info (loop_vinfo);
5529       return NULL;
5530     }
5531
5532   /* Analyze the alignment of the data-refs in the loop.
5533      FORNOW: Only aligned accesses are handled.  */
5534
5535   ok = vect_analyze_data_refs_alignment (loop_vinfo);
5536   if (!ok)
5537     {
5538       if (vect_debug_details (loop))
5539         fprintf (dump_file, "bad data alignment.");
5540       destroy_loop_vec_info (loop_vinfo);
5541       return NULL;
5542     }
5543
5544   /* Scan all the operations in the loop and make sure they are
5545      vectorizable.  */
5546
5547   ok = vect_analyze_operations (loop_vinfo);
5548   if (!ok)
5549     {
5550       if (vect_debug_details (loop))
5551         fprintf (dump_file, "bad operation or unsupported loop bound.");
5552       destroy_loop_vec_info (loop_vinfo);
5553       return NULL;
5554     }
5555
5556   LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5557
5558   return loop_vinfo;
5559 }
5560
5561
5562 /* Function need_imm_uses_for.
5563
5564    Return whether we ought to include information for 'var'
5565    when calculating immediate uses.  For this pass we only want use
5566    information for non-virtual variables.  */
5567
5568 static bool
5569 need_imm_uses_for (tree var)
5570 {
5571   return is_gimple_reg (var);
5572 }
5573
5574
5575 /* Function vectorize_loops.
5576
5577    Entry Point to loop vectorization phase.  */
5578
5579 void
5580 vectorize_loops (struct loops *loops)
5581 {
5582   unsigned int i, loops_num;
5583   unsigned int num_vectorized_loops = 0;
5584
5585   /* Does the target support SIMD?  */
5586   /* FORNOW: until more sophisticated machine modelling is in place.  */
5587   if (!UNITS_PER_SIMD_WORD)
5588     {
5589       if (vect_debug_details (NULL))
5590         fprintf (dump_file, "vectorizer: target vector size is not defined.");
5591       return;
5592     }
5593
5594 #ifdef ENABLE_CHECKING
5595   verify_loop_closed_ssa ();
5596 #endif
5597
5598   compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5599
5600   /*  ----------- Analyze loops. -----------  */
5601
5602   /* If some loop was duplicated, it gets bigger number
5603      than all previously defined loops. This fact allows us to run
5604      only over initial loops skipping newly generated ones.  */
5605   loops_num = loops->num;
5606   for (i = 1; i < loops_num; i++)
5607     {
5608       loop_vec_info loop_vinfo;
5609       struct loop *loop = loops->parray[i];
5610
5611       if (!loop)
5612         continue;
5613
5614       loop_vinfo = vect_analyze_loop (loop);
5615       loop->aux = loop_vinfo;
5616
5617       if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5618         continue;
5619
5620       vect_transform_loop (loop_vinfo, loops);
5621       num_vectorized_loops++;
5622     }
5623
5624   if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5625     fprintf (dump_file, "\nvectorized %u loops in function.\n",
5626              num_vectorized_loops);
5627
5628   /*  ----------- Finalize. -----------  */
5629
5630   free_df ();
5631   for (i = 1; i < loops_num; i++)
5632     {
5633       struct loop *loop = loops->parray[i];
5634       loop_vec_info loop_vinfo;
5635
5636       if (!loop)
5637         continue;
5638       loop_vinfo = loop->aux;
5639       destroy_loop_vec_info (loop_vinfo);
5640       loop->aux = NULL;
5641     }
5642
5643   rewrite_into_ssa (false);
5644   rewrite_into_loop_closed_ssa (); /* FORNOW */
5645   bitmap_clear (vars_to_rename);
5646 }