gcc/tree-vectorizer.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003, 2004 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20 02111-1307, USA.  */
  21
  22 /* Loop Vectorization Pass.
  23
  24    This pass tries to vectorize loops. This first implementation focuses on
  25    simple inner-most loops, with no conditional control flow, and a set of
  26    simple operations which vector form can be expressed using existing
  27    tree codes (PLUS, MULT etc).
  28
  29    For example, the vectorizer transforms the following simple loop:
  30
  31         short a[N]; short b[N]; short c[N]; int i;
  32
  33         for (i=0; i<N; i++){
  34           a[i] = b[i] + c[i];
  35         }
  36
  37    as if it was manually vectorized by rewriting the source code into:
  38
  39         typedef int __attribute__((mode(V8HI))) v8hi;
  40         short a[N];  short b[N]; short c[N];   int i;
  41         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  42         v8hi va, vb, vc;
  43
  44         for (i=0; i<N/8; i++){
  45           vb = pb[i];
  46           vc = pc[i];
  47           va = vb + vc;
  48           pa[i] = va;
  49         }
  50
  51         The main entry to this pass is vectorize_loops(), in which
  52    the vectorizer applies a set of analyses on a given set of loops,
  53    followed by the actual vectorization transformation for the loops that
  54    had successfully passed the analysis phase.
  55
  56         Throughout this pass we make a distinction between two types of
  57    data: scalars (which are represented by SSA_NAMES), and memory references
  58    ("data-refs"). These two types of data require different handling both
  59    during analysis and transformation. The types of data-refs that the
  60    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  61    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  62    accesses are required to have a  simple (consecutive) access pattern.
  63
  64    Analysis phase:
  65    ===============
  66         The driver for the analysis phase is vect_analyze_loop_nest().
  67    It applies a set of analyses, some of which rely on the scalar evolution
  68    analyzer (scev) developed by Sebastian Pop.
  69
  70         During the analysis phase the vectorizer records some information
  71    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  72    loop, as well as general information about the loop as a whole, which is
  73    recorded in a "loop_vec_info" struct attached to each loop.
  74
  75    Transformation phase:
  76    =====================
  77         The loop transformation phase scans all the stmts in the loop, and
  78    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  79    the loop that needs to be vectorized. It insert the vector code sequence
  80    just before the scalar stmt S, and records a pointer to the vector code
  81    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
  82    attached to S). This pointer will be used for the vectorization of following
  83    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
  84    otherwise, we rely on dead code elimination for removing it.
  85
  86         For example, say stmt S1 was vectorized into stmt VS1:
  87
  88    VS1: vb = px[i];
  89    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  90    S2:  a = b;
  91
  92    To vectorize stmt S2, the vectorizer first finds the stmt that defines
  93    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
  94    vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
  95    resulting sequence would be:
  96
  97    VS1: vb = px[i];
  98    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  99    VS2: va = vb;
 100    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 101
 102         Operands that are not SSA_NAMEs, are data-refs that appear in
 103    load/store operations (like 'x[i]' in S1), and are handled differently.
 104
 105    Target modeling:
 106    =================
 107         Currently the only target specific information that is used is the
 108    size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
 109    support different sizes of vectors, for now will need to specify one value
 110    for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
 111
 112         Since we only vectorize operations which vector form can be
 113    expressed using existing tree codes, to verify that an operation is
 114    supported, the vectorizer checks the relevant optab at the relevant
 115    machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
 116    the value found is CODE_FOR_nothing, then there's no target support, and
 117    we can't vectorize the stmt.
 118
 119    For additional information on this project see:
 120    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 121 */
 122
 123 #include "config.h"
 124 #include "system.h"
 125 #include "coretypes.h"
 126 #include "tm.h"
 127 #include "errors.h"
 128 #include "ggc.h"
 129 #include "tree.h"
 130 #include "target.h"
 131
 132 #include "rtl.h"
 133 #include "basic-block.h"
 134 #include "diagnostic.h"
 135 #include "tree-flow.h"
 136 #include "tree-dump.h"
 137 #include "timevar.h"
 138 #include "cfgloop.h"
 139 #include "cfglayout.h"
 140 #include "expr.h"
 141 #include "optabs.h"
 142 #include "toplev.h"
 143 #include "tree-chrec.h"
 144 #include "tree-data-ref.h"
 145 #include "tree-scalar-evolution.h"
 146 #include "tree-vectorizer.h"
 147 #include "tree-pass.h"
 148
 149
 150 /*************************************************************************
 151   Simple Loop Peeling Utilities
 152  *************************************************************************/
 153
 154 /* Entry point for peeling of simple loops.
 155    Peel the first/last iterations of a loop.
 156    It can be used outside of the vectorizer for loops that are simple enough
 157    (see function documentation).  In the vectorizer it is used to peel the
 158    last few iterations when the loop bound is unknown or does not evenly
 159    divide by the vectorization factor, and to peel the first few iterations
 160    to force the alignment of data references in the loop.  */
 161 struct loop *slpeel_tree_peel_loop_to_edge
 162   (struct loop *, struct loops *, edge, tree, tree, bool);
 163 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
 164   (struct loop *, struct loops *, edge);
 165 static void slpeel_update_phis_for_duplicate_loop
 166   (struct loop *, struct loop *, bool after);
 167 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
 168 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
 169 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
 170 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
 171 static void allocate_new_names (bitmap);
 172 static void rename_use_op (use_operand_p);
 173 static void rename_def_op (def_operand_p, tree);
 174 static void rename_variables_in_bb (basic_block);
 175 static void free_new_names (bitmap);
 176 static void rename_variables_in_loop (struct loop *);
 177 #ifdef ENABLE_CHECKING
 178 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
 179 #endif
 180
 181
 182 /*************************************************************************
 183   Vectorization Utilities.
 184  *************************************************************************/
 185
 186 /* Main analysis functions.  */
 187 static loop_vec_info vect_analyze_loop (struct loop *);
 188 static loop_vec_info vect_analyze_loop_form (struct loop *);
 189 static bool vect_analyze_data_refs (loop_vec_info);
 190 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
 191 static bool vect_analyze_scalar_cycles (loop_vec_info);
 192 static bool vect_analyze_data_ref_accesses (loop_vec_info);
 193 static bool vect_analyze_data_refs_alignment (loop_vec_info);
 194 static bool vect_compute_data_refs_alignment (loop_vec_info);
 195 static bool vect_analyze_operations (loop_vec_info);
 196
 197 /* Main code transformation functions.  */
 198 static void vect_transform_loop (loop_vec_info, struct loops *);
 199 static void vect_transform_loop_bound (loop_vec_info, tree niters);
 200 static bool vect_transform_stmt (tree, block_stmt_iterator *);
 201 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
 202 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
 203 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
 204 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
 205 static enum dr_alignment_support vect_supportable_dr_alignment
 206   (struct data_reference *);
 207 static void vect_align_data_ref (tree);
 208 static void vect_enhance_data_refs_alignment (loop_vec_info);
 209
 210 /* Utility functions for the analyses.  */
 211 static bool vect_is_simple_use (tree , struct loop *, tree *);
 212 static bool exist_non_indexing_operands_for_use_p (tree, tree);
 213 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
 214 static void vect_mark_relevant (varray_type, tree);
 215 static bool vect_stmt_relevant_p (tree, loop_vec_info);
 216 static tree vect_get_loop_niters (struct loop *, tree *);
 217 static bool vect_compute_data_ref_alignment
 218   (struct data_reference *, loop_vec_info);
 219 static bool vect_analyze_data_ref_access (struct data_reference *);
 220 static bool vect_get_first_index (tree, tree *);
 221 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
 222 static struct data_reference * vect_analyze_pointer_ref_access
 223   (tree, tree, bool);
 224 static bool vect_can_advance_ivs_p (struct loop *);
 225 static tree vect_get_base_and_bit_offset
 226   (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
 227 static struct data_reference * vect_analyze_pointer_ref_access
 228   (tree, tree, bool);
 229 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
 230 static tree vect_compute_array_ref_alignment
 231   (struct data_reference *, loop_vec_info, tree, tree *);
 232 static tree vect_get_ptr_offset (tree, tree, tree *);
 233 static tree vect_get_symbl_and_dr
 234   (tree, tree, bool, loop_vec_info, struct data_reference **);
 235
 236 /* Utility functions for the code transformation.  */
 237 static tree vect_create_destination_var (tree, tree);
 238 static tree vect_create_data_ref_ptr
 239   (tree, block_stmt_iterator *, tree, tree *, bool);
 240 static tree vect_create_index_for_vector_ref
 241   (struct loop *, block_stmt_iterator *);
 242 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
 243 static tree get_vectype_for_scalar_type (tree);
 244 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
 245 static tree vect_get_vec_def_for_operand (tree, tree);
 246 static tree vect_init_vector (tree, tree);
 247 static tree vect_build_symbol_bound (tree, int, struct loop *);
 248 static void vect_finish_stmt_generation
 249   (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
 250
 251 /* Utility function dealing with loop peeling (not peeling itself).  */
 252 static void vect_generate_tmps_on_preheader
 253   (loop_vec_info, tree *, tree *, tree *);
 254 static tree vect_build_loop_niters (loop_vec_info);
 255 static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge);
 256 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
 257 static void vect_update_inits_of_dr
 258   (struct data_reference *, struct loop *, tree niters);
 259 static void vect_update_inits_of_drs (loop_vec_info, tree);
 260 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
 261 static void vect_do_peeling_for_loop_bound
 262   (loop_vec_info, tree *, struct loops *);
 263
 264 /* Utilities for creation and deletion of vec_info structs.  */
 265 loop_vec_info new_loop_vec_info (struct loop *loop);
 266 void destroy_loop_vec_info (loop_vec_info);
 267 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
 268
 269 static bool vect_debug_stats (struct loop *loop);
 270 static bool vect_debug_details (struct loop *loop);
 271
 272 \f
 273 /*************************************************************************
 274   Simple Loop Peeling Utilities
 275
 276   Utilities to support loop peeling for vectorization purposes.
 277  *************************************************************************/
 278
 279
 280 /* For each definition in DEFINITIONS this function allocates
 281    new ssa name.  */
 282
 283 static void
 284 allocate_new_names (bitmap definitions)
 285 {
 286   unsigned ver;
 287   bitmap_iterator bi;
 288
 289   EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
 290     {
 291       tree def = ssa_name (ver);
 292       tree *new_name_ptr = xmalloc (sizeof (tree));
 293
 294       bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
 295
 296       *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
 297       SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
 298
 299       SSA_NAME_AUX (def) = new_name_ptr;
 300     }
 301 }
 302
 303
 304 /* Renames the use *OP_P.  */
 305
 306 static void
 307 rename_use_op (use_operand_p op_p)
 308 {
 309   tree *new_name_ptr;
 310
 311   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
 312     return;
 313
 314   new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
 315
 316   /* Something defined outside of the loop.  */
 317   if (!new_name_ptr)
 318     return;
 319
 320   /* An ordinary ssa name defined in the loop.  */
 321
 322   SET_USE (op_p, *new_name_ptr);
 323 }
 324
 325
 326 /* Renames the def *OP_P in statement STMT.  */
 327
 328 static void
 329 rename_def_op (def_operand_p op_p, tree stmt)
 330 {
 331   tree *new_name_ptr;
 332
 333   if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
 334     return;
 335
 336   new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
 337
 338   /* Something defined outside of the loop.  */
 339   if (!new_name_ptr)
 340     return;
 341
 342   /* An ordinary ssa name defined in the loop.  */
 343
 344   SET_DEF (op_p, *new_name_ptr);
 345   SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
 346 }
 347
 348
 349 /* Renames the variables in basic block BB.  */
 350
 351 static void
 352 rename_variables_in_bb (basic_block bb)
 353 {
 354   tree phi;
 355   block_stmt_iterator bsi;
 356   tree stmt;
 357   stmt_ann_t ann;
 358   use_optype uses;
 359   vuse_optype vuses;
 360   def_optype defs;
 361   v_may_def_optype v_may_defs;
 362   v_must_def_optype v_must_defs;
 363   unsigned i;
 364   edge e;
 365   edge_iterator ei;
 366   struct loop *loop = bb->loop_father;
 367
 368   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
 369     rename_def_op (PHI_RESULT_PTR (phi), phi);
 370
 371   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 372     {
 373       stmt = bsi_stmt (bsi);
 374       get_stmt_operands (stmt);
 375       ann = stmt_ann (stmt);
 376
 377       uses = USE_OPS (ann);
 378       for (i = 0; i < NUM_USES (uses); i++)
 379         rename_use_op (USE_OP_PTR (uses, i));
 380
 381       defs = DEF_OPS (ann);
 382       for (i = 0; i < NUM_DEFS (defs); i++)
 383         rename_def_op (DEF_OP_PTR (defs, i), stmt);
 384
 385       vuses = VUSE_OPS (ann);
 386       for (i = 0; i < NUM_VUSES (vuses); i++)
 387         rename_use_op (VUSE_OP_PTR (vuses, i));
 388
 389       v_may_defs = V_MAY_DEF_OPS (ann);
 390       for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
 391         {
 392           rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
 393           rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
 394         }
 395
 396       v_must_defs = V_MUST_DEF_OPS (ann);
 397       for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
 398         {
 399           rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
 400           rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
 401         }
 402     }
 403
 404   FOR_EACH_EDGE (e, ei, bb->succs)
 405     {
 406       if (!flow_bb_inside_loop_p (loop, e->dest))
 407         continue;
 408       for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
 409         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
 410     }
 411 }
 412
 413
 414 /* Releases the structures holding the new ssa names.  */
 415
 416 static void
 417 free_new_names (bitmap definitions)
 418 {
 419   unsigned ver;
 420   bitmap_iterator bi;
 421
 422   EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
 423     {
 424       tree def = ssa_name (ver);
 425
 426       if (SSA_NAME_AUX (def))
 427         {
 428           free (SSA_NAME_AUX (def));
 429           SSA_NAME_AUX (def) = NULL;
 430         }
 431     }
 432 }
 433
 434
 435 /* Renames variables in new generated LOOP.  */
 436
 437 static void
 438 rename_variables_in_loop (struct loop *loop)
 439 {
 440   unsigned i;
 441   basic_block *bbs;
 442
 443   bbs = get_loop_body (loop);
 444
 445   for (i = 0; i < loop->num_nodes; i++)
 446     rename_variables_in_bb (bbs[i]);
 447
 448   free (bbs);
 449 }
 450
 451
 452 /* Update the PHI nodes of NEW_LOOP.
 453
 454    NEW_LOOP is a duplicate of ORIG_LOOP.
 455    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 456    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 457    executes before it.  */
 458
 459 static void
 460 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 461                                        struct loop *new_loop, bool after)
 462 {
 463   tree *new_name_ptr, new_ssa_name;
 464   tree phi_new, phi_orig;
 465   tree def;
 466   edge orig_loop_latch = loop_latch_edge (orig_loop);
 467   edge orig_entry_e = loop_preheader_edge (orig_loop);
 468   edge new_loop_exit_e = new_loop->exit_edges[0];
 469   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 470   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 471
 472   /*
 473      step 1. For each loop-header-phi:
 474              Add the first phi argument for the phi in NEW_LOOP
 475             (the one associated with the entry of NEW_LOOP)
 476
 477      step 2. For each loop-header-phi:
 478              Add the second phi argument for the phi in NEW_LOOP
 479             (the one associated with the latch of NEW_LOOP)
 480
 481      step 3. Update the phis in the successor block of NEW_LOOP.
 482
 483         case 1: NEW_LOOP was placed before ORIG_LOOP:
 484                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 485                 Updating the phis in the successor block can therefore be done
 486                 along with the scanning of the loop header phis, because the
 487                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 488                 phi nodes, organized in the same order.
 489
 490         case 2: NEW_LOOP was placed after ORIG_LOOP:
 491                 The successor block of NEW_LOOP is the original exit block of
 492                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 493                 We postpone updating these phis to a later stage (when
 494                 loop guards are added).
 495    */
 496
 497
 498   /* Scan the phis in the headers of the old and new loops
 499      (they are organized in exactly the same order).  */
 500
 501   for (phi_new = phi_nodes (new_loop->header),
 502        phi_orig = phi_nodes (orig_loop->header);
 503        phi_new && phi_orig;
 504        phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
 505     {
 506       /* step 1.  */
 507       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 508       add_phi_arg (&phi_new, def, new_loop_entry_e);
 509
 510       /* step 2.  */
 511       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 512       if (TREE_CODE (def) != SSA_NAME)
 513         continue;
 514
 515       new_name_ptr = SSA_NAME_AUX (def);
 516       if (!new_name_ptr)
 517         /* Something defined outside of the loop.  */
 518         continue;
 519
 520       /* An ordinary ssa name defined in the loop.  */
 521       new_ssa_name = *new_name_ptr;
 522       add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge (new_loop));
 523
 524       /* step 3 (case 1).  */
 525       if (!after)
 526         {
 527           gcc_assert (new_loop_exit_e == orig_entry_e);
 528           SET_PHI_ARG_DEF (phi_orig,
 529                            phi_arg_from_edge (phi_orig, new_loop_exit_e),
 530                            new_ssa_name);
 531         }
 532     }
 533 }
 534
 535
 536 /* Update PHI nodes for a guard of the LOOP.
 537
 538    Input:
 539    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 540         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 541         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 542         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 543         We denote this bb NEW_MERGE_BB because it had a single predecessor (the
 544         LOOP header) before the guard code was added, and now it became a merge
 545         point of two paths - the path that ends with the LOOP exit-edge, and
 546         the path that ends with GUARD_EDGE.
 547
 548         This function creates and updates the relevant phi nodes to account for
 549         the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
 550         1. Create phi nodes at NEW_MERGE_BB.
 551         2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 552            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 553            was added:
 554
 555         ===> The CFG before the guard-code was added:
 556         LOOP_header_bb:
 557           if (exit_loop) goto update_bb : LOOP_header_bb
 558         update_bb:
 559
 560         ==> The CFG after the guard-code was added:
 561         guard_bb:
 562           if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
 563         LOOP_header_bb:
 564           if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
 565         new_merge_bb:
 566           goto update_bb
 567         update_bb:
 568
 569    - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
 570         UPDATE_BB are loop entry phis, like the phis in the LOOP header,
 571         organized in the same order.
 572         If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
 573         loop exit phis.
 574
 575    - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
 576         "original" loop).  FALSE if LOOP is an original loop (not a newly
 577         created copy).  The SSA_NAME_AUX fields of the defs in the original
 578         loop are the corresponding new ssa-names used in the new duplicated
 579         loop copy.  IS_NEW_LOOP indicates which of the two args of the phi
 580         nodes in UPDATE_BB takes the original ssa-name, and which takes the
 581         new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
 582         the LOOP-exit-edge takes the new-name, and the phi-arg that is
 583         associated with GUARD_EDGE takes the original name.  If IS_NEW_LOOP is
 584         FALSE, it's the other way around.
 585   */
 586
 587 static void
 588 slpeel_update_phi_nodes_for_guard (edge guard_edge,
 589                                    struct loop *loop,
 590                                    bool entry_phis,
 591                                    bool is_new_loop)
 592 {
 593   tree orig_phi, new_phi, update_phi;
 594   tree guard_arg, loop_arg;
 595   basic_block new_merge_bb = guard_edge->dest;
 596   edge e = EDGE_SUCC (new_merge_bb, 0);
 597   basic_block update_bb = e->dest;
 598   basic_block orig_bb = (entry_phis ? loop->header : update_bb);
 599
 600   for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
 601        orig_phi && update_phi;
 602        orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
 603     {
 604       /* 1. Generate new phi node in NEW_MERGE_BB:  */
 605       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 606                                  new_merge_bb);
 607
 608       /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 609             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 610       if (entry_phis)
 611         {
 612           loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
 613                                             EDGE_SUCC (loop->latch, 0));
 614           guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
 615         }
 616       else /* exit phis */
 617         {
 618           tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 619           tree *new_name_ptr = SSA_NAME_AUX (orig_def);
 620           tree new_name;
 621
 622           if (new_name_ptr)
 623             new_name = *new_name_ptr;
 624           else
 625             /* Something defined outside of the loop  */
 626             new_name = orig_def;
 627
 628           if (is_new_loop)
 629             {
 630               guard_arg = orig_def;
 631               loop_arg = new_name;
 632             }
 633           else
 634             {
 635               guard_arg = new_name;
 636               loop_arg = orig_def;
 637             }
 638         }
 639       add_phi_arg (&new_phi, loop_arg, loop->exit_edges[0]);
 640       add_phi_arg (&new_phi, guard_arg, guard_edge);
 641
 642       /* 3. Update phi in successor block.  */
 643       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 644                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 645       SET_PHI_ARG_DEF (update_phi, phi_arg_from_edge (update_phi, e),
 646                        PHI_RESULT (new_phi));
 647     }
 648
 649   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 650 }
 651
 652
 653 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 654    that starts at zero, increases by one and its limit is NITERS.
 655
 656    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 657
 658 static void
 659 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 660 {
 661   tree indx_before_incr, indx_after_incr, cond_stmt, cond;
 662   tree orig_cond;
 663   edge exit_edge = loop->exit_edges[0];
 664   block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
 665   tree begin_label = tree_block_label (loop->latch);
 666   tree exit_label = tree_block_label (loop->single_exit->dest);
 667
 668   orig_cond = get_loop_exit_condition (loop);
 669   gcc_assert (orig_cond);
 670   create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
 671              &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
 672
 673   /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
 674      back to the exit condition statement.  */
 675   bsi_next (&loop_exit_bsi);
 676   gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
 677
 678   if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
 679     cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
 680   else /* 'then' edge loops back.  */
 681     cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
 682
 683   begin_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 684   exit_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 685   cond_stmt = build (COND_EXPR, TREE_TYPE (orig_cond), cond,
 686                      begin_label, exit_label);
 687   bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
 688
 689   /* Remove old loop exit test:  */
 690   bsi_remove (&loop_exit_bsi);
 691
 692   if (vect_debug_stats (loop) || vect_debug_details (loop))
 693     print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
 694
 695   loop->nb_iterations = niters;
 696 }
 697
 698
 699 /* Given LOOP this function generates a new copy of it and puts it
 700    on E which is either the entry or exit of LOOP.  */
 701
 702 static struct loop *
 703 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
 704                                         edge e)
 705 {
 706   struct loop *new_loop;
 707   basic_block *new_bbs, *bbs;
 708   bool at_exit;
 709   bool was_imm_dom;
 710   basic_block exit_dest;
 711   tree phi, phi_arg;
 712
 713   at_exit = (e == loop->exit_edges[0]);
 714   if (!at_exit && e != loop_preheader_edge (loop))
 715     {
 716       if (dump_file && (dump_flags & TDF_DETAILS))
 717           fprintf (dump_file, "Edge is not an entry nor an exit edge.\n");
 718       return NULL;
 719     }
 720
 721   bbs = get_loop_body (loop);
 722
 723   /* Check whether duplication is possible.  */
 724   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 725     {
 726       if (vect_debug_stats (loop) || vect_debug_details (loop))
 727           fprintf (dump_file, "Cannot copy basic blocks.\n");
 728       free (bbs);
 729       return NULL;
 730     }
 731
 732   /* Generate new loop structure.  */
 733   new_loop = duplicate_loop (loops, loop, loop->outer);
 734   if (!new_loop)
 735     {
 736       if (vect_debug_stats (loop) || vect_debug_details (loop))
 737           fprintf (dump_file, "duplicate_loop returns NULL.\n");
 738       free (bbs);
 739       return NULL;
 740     }
 741
 742   exit_dest = loop->exit_edges[0]->dest;
 743   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 744                                           exit_dest) == loop->header ?
 745                  true : false);
 746
 747   new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
 748
 749   copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
 750
 751   /* Duplicating phi args at exit bbs as coming
 752      also from exit of duplicated loop.  */
 753   for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
 754     {
 755       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
 756       if (phi_arg)
 757         {
 758           edge new_loop_exit_edge;
 759
 760           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 761             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 762           else
 763             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 764
 765           add_phi_arg (&phi, phi_arg, new_loop_exit_edge);
 766         }
 767     }
 768
 769   if (at_exit) /* Add the loop copy at exit.  */
 770     {
 771       redirect_edge_and_branch_force (e, new_loop->header);
 772       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 773       if (was_imm_dom)
 774         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 775     }
 776   else /* Add the copy at entry.  */
 777     {
 778       edge new_exit_e;
 779       edge entry_e = loop_preheader_edge (loop);
 780       basic_block preheader = entry_e->src;
 781
 782       if (!flow_bb_inside_loop_p (new_loop,
 783                                   EDGE_SUCC (new_loop->header, 0)->dest))
 784         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 785       else
 786         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 787
 788       redirect_edge_and_branch_force (new_exit_e, loop->header);
 789       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 790                                new_exit_e->src);
 791
 792       /* We have to add phi args to the loop->header here as coming
 793          from new_exit_e edge.  */
 794       for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 795         {
 796           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 797           if (phi_arg)
 798             add_phi_arg (&phi, phi_arg, new_exit_e);
 799         }
 800
 801       redirect_edge_and_branch_force (entry_e, new_loop->header);
 802       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 803     }
 804
 805   flow_loop_scan (new_loop, LOOP_ALL);
 806   flow_loop_scan (loop, LOOP_ALL);
 807   free (new_bbs);
 808   free (bbs);
 809
 810   return new_loop;
 811 }
 812
 813
 814 /* Given the condition statement COND, put it as the last statement
 815    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 816    Assumes that this is the single exit of the guarded loop.
 817    Returns the skip edge.  */
 818
 819 static edge
 820 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
 821                         basic_block dom_bb)
 822 {
 823   block_stmt_iterator bsi;
 824   edge new_e, enter_e;
 825   tree cond_stmt, then_label, else_label;
 826
 827   enter_e = EDGE_SUCC (guard_bb, 0);
 828   enter_e->flags &= ~EDGE_FALLTHRU;
 829   enter_e->flags |= EDGE_FALSE_VALUE;
 830   bsi = bsi_last (guard_bb);
 831
 832   then_label = build1 (GOTO_EXPR, void_type_node,
 833                        tree_block_label (exit_bb));
 834   else_label = build1 (GOTO_EXPR, void_type_node,
 835                        tree_block_label (enter_e->dest));
 836   cond_stmt = build (COND_EXPR, void_type_node, cond,
 837                      then_label, else_label);
 838   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
 839   /* Add new edge to connect entry block to the second loop.  */
 840   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 841   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 842   return new_e;
 843 }
 844
 845
 846 /* This function verifies that the following restrictions apply to LOOP:
 847    (1) it is innermost
 848    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 849    (3) it is single entry, single exit
 850    (4) its exit condition is the last stmt in the header
 851    (5) E is the entry/exit edge of LOOP.
 852  */
 853
 854 static bool
 855 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
 856 {
 857   edge exit_e = loop->exit_edges [0];
 858   edge entry_e = loop_preheader_edge (loop);
 859   tree orig_cond = get_loop_exit_condition (loop);
 860   block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
 861
 862   if (any_marked_for_rewrite_p ())
 863     return false;
 864
 865   if (loop->inner
 866       /* All loops have an outer scope; the only case loop->outer is NULL is for
 867          the function itself.  */
 868       || !loop->outer
 869       || loop->num_nodes != 2
 870       || !empty_block_p (loop->latch)
 871       || loop->num_exits != 1
 872       || loop->num_entries != 1
 873       /* Verify that new loop exit condition can be trivially modified.  */
 874       || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
 875       || (e != exit_e && e != entry_e))
 876     return false;
 877
 878   return true;
 879 }
 880
 881 #ifdef ENABLE_CHECKING
 882 static void
 883 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 884                                  struct loop *second_loop)
 885 {
 886   basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
 887   basic_block loop2_entry_bb = second_loop->pre_header;
 888   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
 889
 890   /* A guard that controls whether the second_loop is to be executed or skipped
 891      is placed in first_loop->exit.  first_loopt->exit therefore has two
 892      successors - one is the preheader of second_loop, and the other is a bb
 893      after second_loop.
 894    */
 895   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
 896
 897
 898   /* 1. Verify that one of the successors of first_loopt->exit is the preheader
 899         of second_loop.  */
 900
 901   /* The preheader of new_loop is expected to have two predessors:
 902      first_loop->exit and the block that precedes first_loop.  */
 903
 904   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
 905               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
 906                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
 907                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
 908                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
 909
 910   /* Verify that the other successor of first_loopt->exit is after the
 911      second_loop.  */
 912   /* TODO */
 913 }
 914 #endif
 915
 916 /* Function slpeel_tree_peel_loop_to_edge.
 917
 918    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
 919    that is placed on the entry (exit) edge E of LOOP. After this transformation
 920    we have two loops one after the other - first-loop iterates FIRST_NITERS
 921    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
 922
 923    Input:
 924    - LOOP: the loop to be peeled.
 925    - E: the exit or entry edge of LOOP.
 926         If it is the entry edge, we peel the first iterations of LOOP. In this
 927         case first-loop is LOOP, and second-loop is the newly created loop.
 928         If it is the exit edge, we peel the last iterations of LOOP. In this
 929         case, first-loop is the newly created loop, and second-loop is LOOP.
 930    - NITERS: the number of iterations that LOOP iterates.
 931    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
 932    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
 933         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
 934         is false, the caller of this function may want to take care of this
 935         (this can be useful if we don't want new stmts added to first-loop).
 936
 937    Output:
 938    The function returns a pointer to the new loop-copy, or NULL if it failed
 939    to perform the transformation.
 940
 941    The function generates two if-then-else guards: one before the first loop,
 942    and the other before the second loop:
 943    The first guard is:
 944      if (FIRST_NITERS == 0) then skip the first loop,
 945      and go directly to the second loop.
 946    The second guard is:
 947      if (FIRST_NITERS == NITERS) then skip the second loop.
 948
 949    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
 950    FORNOW the resulting code will not be in loop-closed-ssa form.
 951 */
 952
 953 struct loop*
 954 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
 955                                edge e, tree first_niters,
 956                                tree niters, bool update_first_loop_count)
 957 {
 958   struct loop *new_loop = NULL, *first_loop, *second_loop;
 959   edge skip_e;
 960   tree pre_condition;
 961   bitmap definitions;
 962   basic_block bb_before_second_loop, bb_after_second_loop;
 963   basic_block bb_before_first_loop;
 964   basic_block bb_between_loops;
 965   edge exit_e = loop->exit_edges [0];
 966
 967   if (!slpeel_can_duplicate_loop_p (loop, e))
 968     return NULL;
 969
 970   /* We have to initialize cfg_hooks. Then, when calling
 971    cfg_hooks->split_edge, the function tree_split_edge
 972    is actually called and, when calling cfg_hooks->duplicate_block,
 973    the function tree_duplicate_bb is called.  */
 974   tree_register_cfg_hooks ();
 975
 976
 977   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
 978         Resulting CFG would be:
 979
 980         first_loop:
 981         do {
 982         } while ...
 983
 984         second_loop:
 985         do {
 986         } while ...
 987
 988         orig_exit_bb:
 989    */
 990
 991   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
 992     {
 993       if (vect_debug_stats (loop) || vect_debug_details (loop))
 994         fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
 995       return NULL;
 996     }
 997
 998   if (e == exit_e)
 999     {
1000       /* NEW_LOOP was placed after LOOP.  */
1001       first_loop = loop;
1002       second_loop = new_loop;
1003     }
1004   else
1005     {
1006       /* NEW_LOOP was placed before LOOP.  */
1007       first_loop = new_loop;
1008       second_loop = loop;
1009     }
1010
1011   definitions = marked_ssa_names ();
1012   allocate_new_names (definitions);
1013   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1014   rename_variables_in_loop (new_loop);
1015
1016
1017   /* 2. Add the guard that controls whether the first loop is executed.
1018         Resulting CFG would be:
1019
1020         bb_before_first_loop:
1021         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1022                                GOTO first-loop
1023
1024         first_loop:
1025         do {
1026         } while ...
1027
1028         bb_before_second_loop:
1029
1030         second_loop:
1031         do {
1032         } while ...
1033
1034         orig_exit_bb:
1035    */
1036
1037   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1038   add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1039   bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1040   add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1041   flow_loop_scan (first_loop, LOOP_ALL);
1042   flow_loop_scan (second_loop, LOOP_ALL);
1043
1044   pre_condition =
1045         build (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1046   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1047                                   bb_before_second_loop, bb_before_first_loop);
1048   slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1049                                      first_loop == new_loop);
1050
1051
1052   /* 3. Add the guard that controls whether the second loop is executed.
1053         Resulting CFG would be:
1054
1055         bb_before_first_loop:
1056         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1057                                GOTO first-loop
1058
1059         first_loop:
1060         do {
1061         } while ...
1062
1063         bb_between_loops:
1064         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1065                                     GOTO bb_before_second_loop
1066
1067         bb_before_second_loop:
1068
1069         second_loop:
1070         do {
1071         } while ...
1072
1073         bb_after_second_loop:
1074
1075         orig_exit_bb:
1076    */
1077
1078   bb_between_loops = split_edge (first_loop->exit_edges[0]);
1079   add_bb_to_loop (bb_between_loops, first_loop->outer);
1080   bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1081   add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1082   flow_loop_scan (first_loop, LOOP_ALL);
1083   flow_loop_scan (second_loop, LOOP_ALL);
1084
1085   pre_condition = build (EQ_EXPR, boolean_type_node, first_niters, niters);
1086   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1087                                   bb_after_second_loop, bb_before_first_loop);
1088   slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1089                                      second_loop == new_loop);
1090
1091   /* Flow loop scan does not update loop->single_exit field.  */
1092   first_loop->single_exit = first_loop->exit_edges[0];
1093   second_loop->single_exit = second_loop->exit_edges[0];
1094
1095   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1096    */
1097   if (update_first_loop_count)
1098     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1099
1100   free_new_names (definitions);
1101   BITMAP_XFREE (definitions);
1102   unmark_all_for_rewrite ();
1103
1104   return new_loop;
1105 }
1106
1107 \f
1108 /* Here the proper Vectorizer starts.  */
1109
1110 /*************************************************************************
1111   Vectorization Utilities.
1112  *************************************************************************/
1113
1114 /* Function new_stmt_vec_info.
1115
1116    Create and initialize a new stmt_vec_info struct for STMT.  */
1117
1118 stmt_vec_info
1119 new_stmt_vec_info (tree stmt, struct loop *loop)
1120 {
1121   stmt_vec_info res;
1122   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1123
1124   STMT_VINFO_TYPE (res) = undef_vec_info_type;
1125   STMT_VINFO_STMT (res) = stmt;
1126   STMT_VINFO_LOOP (res) = loop;
1127   STMT_VINFO_RELEVANT_P (res) = 0;
1128   STMT_VINFO_VECTYPE (res) = NULL;
1129   STMT_VINFO_VEC_STMT (res) = NULL;
1130   STMT_VINFO_DATA_REF (res) = NULL;
1131   STMT_VINFO_MEMTAG (res) = NULL;
1132   STMT_VINFO_VECT_DR_BASE (res) = NULL;
1133
1134   return res;
1135 }
1136
1137
1138 /* Function new_loop_vec_info.
1139
1140    Create and initialize a new loop_vec_info struct for LOOP, as well as
1141    stmt_vec_info structs for all the stmts in LOOP.  */
1142
1143 loop_vec_info
1144 new_loop_vec_info (struct loop *loop)
1145 {
1146   loop_vec_info res;
1147   basic_block *bbs;
1148   block_stmt_iterator si;
1149   unsigned int i;
1150
1151   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1152
1153   bbs = get_loop_body (loop);
1154
1155   /* Create stmt_info for all stmts in the loop.  */
1156   for (i = 0; i < loop->num_nodes; i++)
1157     {
1158       basic_block bb = bbs[i];
1159       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1160         {
1161           tree stmt = bsi_stmt (si);
1162           stmt_ann_t ann;
1163
1164           get_stmt_operands (stmt);
1165           ann = stmt_ann (stmt);
1166           set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1167         }
1168     }
1169
1170   LOOP_VINFO_LOOP (res) = loop;
1171   LOOP_VINFO_BBS (res) = bbs;
1172   LOOP_VINFO_EXIT_COND (res) = NULL;
1173   LOOP_VINFO_NITERS (res) = NULL;
1174   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1175   LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1176   LOOP_VINFO_VECT_FACTOR (res) = 0;
1177   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1178                            "loop_write_datarefs");
1179   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1180                            "loop_read_datarefs");
1181   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1182
1183   return res;
1184 }
1185
1186
1187 /* Function destroy_loop_vec_info.
1188
1189    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1190    stmts in the loop.  */
1191
1192 void
1193 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1194 {
1195   struct loop *loop;
1196   basic_block *bbs;
1197   int nbbs;
1198   block_stmt_iterator si;
1199   int j;
1200
1201   if (!loop_vinfo)
1202     return;
1203
1204   loop = LOOP_VINFO_LOOP (loop_vinfo);
1205
1206   bbs = LOOP_VINFO_BBS (loop_vinfo);
1207   nbbs = loop->num_nodes;
1208
1209   for (j = 0; j < nbbs; j++)
1210     {
1211       basic_block bb = bbs[j];
1212       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1213         {
1214           tree stmt = bsi_stmt (si);
1215           stmt_ann_t ann = stmt_ann (stmt);
1216           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1217           free (stmt_info);
1218           set_stmt_info (ann, NULL);
1219         }
1220     }
1221
1222   free (LOOP_VINFO_BBS (loop_vinfo));
1223   varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1224   varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1225
1226   free (loop_vinfo);
1227 }
1228
1229
1230 /* Function debug_loop_stats.
1231
1232    For vectorization statistics dumps.  */
1233
1234 static bool
1235 vect_debug_stats (struct loop *loop)
1236 {
1237   basic_block bb;
1238   block_stmt_iterator si;
1239   tree node = NULL_TREE;
1240
1241   if (!dump_file || !(dump_flags & TDF_STATS))
1242     return false;
1243
1244   if (!loop)
1245     {
1246       fprintf (dump_file, "\n");
1247       return true;
1248     }
1249
1250   if (!loop->header)
1251     return false;
1252
1253   bb = loop->header;
1254
1255   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1256     {
1257       node = bsi_stmt (si);
1258       if (node && EXPR_P (node) && EXPR_LOCUS (node))
1259         break;
1260     }
1261
1262   if (node && EXPR_P (node) && EXPR_LOCUS (node)
1263       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1264     {
1265       fprintf (dump_file, "\nloop at %s:%d: ",
1266         EXPR_FILENAME (node), EXPR_LINENO (node));
1267       return true;
1268     }
1269
1270   return false;
1271 }
1272
1273
1274 /* Function debug_loop_details.
1275
1276    For vectorization debug dumps.  */
1277
1278 static bool
1279 vect_debug_details (struct loop *loop)
1280 {
1281    basic_block bb;
1282    block_stmt_iterator si;
1283    tree node = NULL_TREE;
1284
1285   if (!dump_file || !(dump_flags & TDF_DETAILS))
1286     return false;
1287
1288   if (!loop)
1289     {
1290       fprintf (dump_file, "\n");
1291       return true;
1292     }
1293
1294   if (!loop->header)
1295     return false;
1296
1297   bb = loop->header;
1298
1299   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1300     {
1301       node = bsi_stmt (si);
1302       if (node && EXPR_P (node) && EXPR_LOCUS (node))
1303         break;
1304     }
1305
1306   if (node && EXPR_P (node) && EXPR_LOCUS (node)
1307       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1308     {
1309       fprintf (dump_file, "\nloop at %s:%d: ",
1310                EXPR_FILENAME (node), EXPR_LINENO (node));
1311       return true;
1312     }
1313
1314   return false;
1315 }
1316
1317
1318 /* Function vect_get_ptr_offset
1319
1320    Compute the OFFSET modulo vector-type alignment of pointer REF in bits.  */
1321
1322 static tree
1323 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1324                      tree vectype ATTRIBUTE_UNUSED,
1325                      tree *offset ATTRIBUTE_UNUSED)
1326 {
1327   /* TODO: Use alignment information.  */
1328   return NULL_TREE;
1329 }
1330
1331
1332 /* Function vect_get_base_and_bit_offset
1333
1334    Return the BASE of the data reference EXPR.
1335    If VECTYPE is given, also compute the OFFSET from BASE in bits.
1336    E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
1337    bits of 'a.b[i] + 4B' from a.
1338
1339    Input:
1340    EXPR - the memory reference that is being analyzed
1341    DR - the data_reference struct of the _original_ memory reference
1342         (Note: DR_REF (DR) is not necessarily EXPR)
1343    VECTYPE - the type that defines the alignment (i.e, we compute
1344              alignment relative to TYPE_ALIGN(VECTYPE))
1345
1346    Output:
1347    BASE (returned value) - the base of the data reference EXPR.
1348                            E.g, if EXPR is a.b[k].c[i][j] the returned
1349                            base is a.
1350    OFFSET - offset of EXPR from BASE in bits
1351    BASE_ALIGNED_P - indicates if BASE is aligned
1352
1353    If something unexpected is encountered (an unsupported form of data-ref),
1354    or if VECTYPE is given but OFFSET cannot be determined:
1355    then NULL_TREE is returned.  */
1356
1357 static tree
1358 vect_get_base_and_bit_offset (struct data_reference *dr,
1359                               tree expr,
1360                               tree vectype,
1361                               loop_vec_info loop_vinfo,
1362                               tree *offset,
1363                               bool *base_aligned_p)
1364 {
1365   tree this_offset = size_zero_node;
1366   tree base = NULL_TREE;
1367   tree next_ref;
1368   tree oprnd0, oprnd1;
1369   struct data_reference *array_dr;
1370   enum tree_code code = TREE_CODE (expr);
1371
1372   *base_aligned_p = false;
1373
1374   switch (code)
1375     {
1376     /* These cases end the recursion:  */
1377     case VAR_DECL:
1378       *offset = size_zero_node;
1379       if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1380         *base_aligned_p = true;
1381       return expr;
1382
1383     case SSA_NAME:
1384       if (!vectype)
1385         return expr;
1386
1387       if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1388         return NULL_TREE;
1389
1390       if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1391         {
1392           base = vect_get_ptr_offset (expr, vectype, offset);
1393           if (base)
1394             *base_aligned_p = true;
1395         }
1396       else
1397         {
1398           *base_aligned_p = true;
1399           *offset = size_zero_node;
1400           base = expr;
1401         }
1402       return base;
1403
1404     case INTEGER_CST:
1405       *offset = int_const_binop (MULT_EXPR, expr,
1406                                  build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
1407       return expr;
1408
1409     /* These cases continue the recursion:  */
1410     case COMPONENT_REF:
1411       oprnd0 = TREE_OPERAND (expr, 0);
1412       oprnd1 = TREE_OPERAND (expr, 1);
1413
1414       this_offset = bit_position (oprnd1);
1415       if (vectype && !host_integerp (this_offset, 1))
1416         return NULL_TREE;
1417       next_ref = oprnd0;
1418       break;
1419
1420     case ADDR_EXPR:
1421       oprnd0 = TREE_OPERAND (expr, 0);
1422       next_ref = oprnd0;
1423       break;
1424
1425     case INDIRECT_REF:
1426       oprnd0 = TREE_OPERAND (expr, 0);
1427       next_ref = oprnd0;
1428       break;
1429
1430     case ARRAY_REF:
1431       if (DR_REF (dr) != expr)
1432         /* Build array data_reference struct if the existing DR_REF
1433            doesn't match EXPR. This happens, for example, when the
1434            EXPR is *T and T is initialized to &arr[indx]. The DR struct
1435            contains information on the access of T, not of arr. In order
1436            to continue  the analysis, we create a new DR struct that
1437            describes the access of arr.
1438         */
1439         array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
1440       else
1441         array_dr = dr;
1442
1443       next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
1444                                                    vectype, &this_offset);
1445       if (!next_ref)
1446         return NULL_TREE;
1447
1448       if (vectype &&
1449           TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
1450         {
1451           *offset = this_offset;
1452           *base_aligned_p = true;
1453           return next_ref;
1454         }
1455       break;
1456
1457     case PLUS_EXPR:
1458     case MINUS_EXPR:
1459       /* In case we have a PLUS_EXPR of the form
1460          (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1461          This is verified in  vect_get_symbl_and_dr.  */
1462       oprnd0 = TREE_OPERAND (expr, 0);
1463       oprnd1 = TREE_OPERAND (expr, 1);
1464
1465       base = vect_get_base_and_bit_offset
1466         (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
1467       if (vectype && !base)
1468         return NULL_TREE;
1469
1470       next_ref = oprnd0;
1471       break;
1472
1473     default:
1474       return NULL_TREE;
1475     }
1476
1477   base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
1478                                        loop_vinfo, offset, base_aligned_p);
1479
1480   if (vectype && base)
1481     {
1482       *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
1483       if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
1484         return NULL_TREE;
1485
1486       if (vect_debug_details (NULL))
1487         {
1488           print_generic_expr (dump_file, expr, TDF_SLIM);
1489           fprintf (dump_file, " --> total offset for ref: ");
1490           print_generic_expr (dump_file, *offset, TDF_SLIM);
1491         }
1492     }
1493   return base;
1494 }
1495
1496
1497 /* Function vect_force_dr_alignment_p.
1498
1499    Returns whether the alignment of a DECL can be forced to be aligned
1500    on ALIGNMENT bit boundary.  */
1501
1502 static bool
1503 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1504 {
1505   if (TREE_CODE (decl) != VAR_DECL)
1506     return false;
1507
1508   if (DECL_EXTERNAL (decl))
1509     return false;
1510
1511   if (TREE_STATIC (decl))
1512     return (alignment <= MAX_OFILE_ALIGNMENT);
1513   else
1514     /* This is not 100% correct.  The absolute correct stack alignment
1515        is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1516        PREFERRED_STACK_BOUNDARY is honored by all translation units.
1517        However, until someone implements forced stack alignment, SSE
1518        isn't really usable without this.  */
1519     return (alignment <= PREFERRED_STACK_BOUNDARY);
1520 }
1521
1522
1523 /* Function vect_get_new_vect_var.
1524
1525    Returns a name for a new variable. The current naming scheme appends the
1526    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1527    the name of vectorizer generated variables, and appends that to NAME if
1528    provided.  */
1529
1530 static tree
1531 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1532 {
1533   const char *prefix;
1534   int prefix_len;
1535   tree new_vect_var;
1536
1537   if (var_kind == vect_simple_var)
1538     prefix = "vect_";
1539   else
1540     prefix = "vect_p";
1541
1542   prefix_len = strlen (prefix);
1543
1544   if (name)
1545     new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1546   else
1547     new_vect_var = create_tmp_var (type, prefix);
1548
1549   return new_vect_var;
1550 }
1551
1552
1553 /* Function vect_create_index_for_vector_ref.
1554
1555    Create (and return) an index variable, along with it's update chain in the
1556    loop. This variable will be used to access a memory location in a vector
1557    operation.
1558
1559    Input:
1560    LOOP: The loop being vectorized.
1561    BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1562         function can be added here, or in the loop pre-header.
1563
1564    Output:
1565    Return an index that will be used to index a vector array.  It is expected
1566    that a pointer to the first vector will be used as the base address for the
1567    indexed reference.
1568
1569    FORNOW: we are not trying to be efficient, just creating a new index each
1570    time from scratch.  At this time all vector references could use the same
1571    index.
1572
1573    TODO: create only one index to be used by all vector references.  Record
1574    the index in the LOOP_VINFO the first time this procedure is called and
1575    return it on subsequent calls.  The increment of this index must be placed
1576    just before the conditional expression that ends the single block loop.  */
1577
1578 static tree
1579 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1580 {
1581   tree init, step;
1582   tree indx_before_incr, indx_after_incr;
1583
1584   /* It is assumed that the base pointer used for vectorized access contains
1585      the address of the first vector.  Therefore the index used for vectorized
1586      access must be initialized to zero and incremented by 1.  */
1587
1588   init = integer_zero_node;
1589   step = integer_one_node;
1590
1591   /* Assuming that bsi_insert is used with BSI_NEW_STMT  */
1592   create_iv (init, step, NULL_TREE, loop, bsi, false,
1593         &indx_before_incr, &indx_after_incr);
1594
1595   return indx_before_incr;
1596 }
1597
1598
1599 /* Function vect_create_addr_base_for_vector_ref.
1600
1601    Create an expression that computes the address of the first memory location
1602    that will be accessed for a data reference.
1603
1604    Input:
1605    STMT: The statement containing the data reference.
1606    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1607    OFFSET: Optional. If supplied, it is be added to the initial address.
1608
1609    Output:
1610    1. Return an SSA_NAME whose value is the address of the memory location of
1611       the first vector of the data reference.
1612    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1613       these statement(s) which define the returned SSA_NAME.
1614
1615    FORNOW: We are only handling array accesses with step 1.  */
1616
1617 static tree
1618 vect_create_addr_base_for_vector_ref (tree stmt,
1619                                       tree *new_stmt_list,
1620                                       tree offset)
1621 {
1622   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1623   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1624   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1625   tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1626   tree base_name = unshare_expr (DR_BASE_NAME (dr));
1627   tree ref = DR_REF (dr);
1628   tree data_ref_base_type = TREE_TYPE (data_ref_base);
1629   tree scalar_type = TREE_TYPE (ref);
1630   tree scalar_ptr_type = build_pointer_type (scalar_type);
1631   tree access_fn;
1632   tree init_val, step, init_oval;
1633   bool ok;
1634   bool is_ptr_ref, is_array_ref, is_addr_expr;
1635   tree array_base;
1636   tree vec_stmt;
1637   tree new_temp;
1638   tree array_ref;
1639   tree addr_base, addr_expr;
1640   tree dest, new_stmt;
1641
1642   /* Only the access function of the last index is relevant (i_n in
1643      a[i_1][i_2]...[i_n]), the others correspond to loop invariants.  */
1644   access_fn = DR_ACCESS_FN (dr, 0);
1645   ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step,
1646                                     true);
1647   if (!ok)
1648     init_oval = integer_zero_node;
1649
1650   is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
1651                && TREE_CODE (data_ref_base) == SSA_NAME;
1652   is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE;
1653   is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
1654                  || TREE_CODE (data_ref_base) == PLUS_EXPR
1655                  || TREE_CODE (data_ref_base) == MINUS_EXPR;
1656   gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
1657
1658   /** Create: &(base[init_val])
1659
1660       if data_ref_base is an ARRAY_TYPE:
1661          base = data_ref_base
1662
1663       if data_ref_base is the SSA_NAME of a POINTER_TYPE:
1664          base = *((scalar_array *) data_ref_base)
1665    **/
1666
1667   if (is_array_ref)
1668     array_base = data_ref_base;
1669   else /* is_ptr_ref  or is_addr_expr */
1670     {
1671       /* array_ptr = (scalar_array_ptr_type *) data_ref_base;  */
1672       tree scalar_array_type = build_array_type (scalar_type, 0);
1673       tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1674       tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1675       add_referenced_tmp_var (array_ptr);
1676
1677       dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1678       add_referenced_tmp_var (dest);
1679       data_ref_base =
1680         force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1681       append_to_statement_list_force (new_stmt, new_stmt_list);
1682
1683       vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
1684       vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1685       new_temp = make_ssa_name (array_ptr, vec_stmt);
1686       TREE_OPERAND (vec_stmt, 0) = new_temp;
1687       append_to_statement_list_force (vec_stmt, new_stmt_list);
1688
1689       /* (*array_ptr)  */
1690       array_base = build_fold_indirect_ref (new_temp);
1691     }
1692
1693   dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
1694   add_referenced_tmp_var (dest);
1695   init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
1696   append_to_statement_list_force (new_stmt, new_stmt_list);
1697
1698   if (offset)
1699     {
1700       tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
1701       add_referenced_tmp_var (tmp);
1702       vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
1703       vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
1704       init_val = make_ssa_name (tmp, vec_stmt);
1705       TREE_OPERAND (vec_stmt, 0) = init_val;
1706       append_to_statement_list_force (vec_stmt, new_stmt_list);
1707     }
1708
1709   array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
1710                       NULL_TREE, NULL_TREE);
1711   addr_base = build_fold_addr_expr (array_ref);
1712
1713   /* addr_expr = addr_base */
1714   addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1715                                      get_name (base_name));
1716   add_referenced_tmp_var (addr_expr);
1717   vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1718   new_temp = make_ssa_name (addr_expr, vec_stmt);
1719   TREE_OPERAND (vec_stmt, 0) = new_temp;
1720   append_to_statement_list_force (vec_stmt, new_stmt_list);
1721
1722   return new_temp;
1723 }
1724
1725
1726 /* Function get_vectype_for_scalar_type.
1727
1728    Returns the vector type corresponding to SCALAR_TYPE as supported
1729    by the target.  */
1730
1731 static tree
1732 get_vectype_for_scalar_type (tree scalar_type)
1733 {
1734   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1735   int nbytes = GET_MODE_SIZE (inner_mode);
1736   int nunits;
1737   tree vectype;
1738
1739   if (nbytes == 0)
1740     return NULL_TREE;
1741
1742   /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1743      is expected.  */
1744   nunits = UNITS_PER_SIMD_WORD / nbytes;
1745
1746   vectype = build_vector_type (scalar_type, nunits);
1747   if (vect_debug_details (NULL))
1748     {
1749       fprintf (dump_file, "get vectype with %d units of type ", nunits);
1750       print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1751     }
1752
1753   if (!vectype)
1754     return NULL_TREE;
1755
1756   if (vect_debug_details (NULL))
1757     {
1758       fprintf (dump_file, "vectype: ");
1759       print_generic_expr (dump_file, vectype, TDF_SLIM);
1760     }
1761
1762   if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1763     {
1764       /* TODO: tree-complex.c sometimes can parallelize operations
1765          on generic vectors.  We can vectorize the loop in that case,
1766          but then we should re-run the lowering pass.  */
1767       if (vect_debug_details (NULL))
1768         fprintf (dump_file, "mode not supported by target.");
1769       return NULL_TREE;
1770     }
1771
1772   return vectype;
1773 }
1774
1775
1776 /* Function vect_align_data_ref.
1777
1778    Handle mislignment of a memory accesses.
1779
1780    FORNOW: Can't handle misaligned accesses.
1781    Make sure that the dataref is aligned.  */
1782
1783 static void
1784 vect_align_data_ref (tree stmt)
1785 {
1786   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1787   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1788
1789   /* FORNOW: can't handle misaligned accesses;
1790              all accesses expected to be aligned.  */
1791   gcc_assert (aligned_access_p (dr));
1792 }
1793
1794
1795 /* Function vect_create_data_ref_ptr.
1796
1797    Create a memory reference expression for vector access, to be used in a
1798    vector load/store stmt. The reference is based on a new pointer to vector
1799    type (vp).
1800
1801    Input:
1802    1. STMT: a stmt that references memory. Expected to be of the form
1803          MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
1804    2. BSI: block_stmt_iterator where new stmts can be added.
1805    3. OFFSET (optional): an offset to be added to the initial address accessed
1806         by the data-ref in STMT.
1807    4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
1808         pointing to the initial address.
1809
1810    Output:
1811    1. Declare a new ptr to vector_type, and have it point to the base of the
1812       data reference (initial addressed accessed by the data reference).
1813       For example, for vector of type V8HI, the following code is generated:
1814
1815       v8hi *vp;
1816       vp = (v8hi *)initial_address;
1817
1818       if OFFSET is not supplied:
1819          initial_address = &a[init];
1820       if OFFSET is supplied:
1821          initial_address = &a[init + OFFSET];
1822
1823       Return the initial_address in INITIAL_ADDRESS.
1824
1825    2. Create a data-reference in the loop based on the new vector pointer vp,
1826       and using a new index variable 'idx' as follows:
1827
1828       vp' = vp + update
1829
1830       where if ONLY_INIT is true:
1831          update = zero
1832       and otherwise
1833          update = idx + vector_type_size
1834
1835       Return the pointer vp'.
1836
1837
1838    FORNOW: handle only aligned and consecutive accesses.  */
1839
1840 static tree
1841 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
1842                           tree *initial_address, bool only_init)
1843 {
1844   tree base_name;
1845   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1846   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1847   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1848   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1849   tree vect_ptr_type;
1850   tree vect_ptr;
1851   tree tag;
1852   v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1853   v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1854   vuse_optype vuses = STMT_VUSE_OPS (stmt);
1855   int nvuses, nv_may_defs, nv_must_defs;
1856   int i;
1857   tree new_temp;
1858   tree vec_stmt;
1859   tree new_stmt_list = NULL_TREE;
1860   tree idx;
1861   edge pe = loop_preheader_edge (loop);
1862   basic_block new_bb;
1863   tree vect_ptr_init;
1864   tree vectype_size;
1865   tree ptr_update;
1866   tree data_ref_ptr;
1867
1868   base_name = unshare_expr (DR_BASE_NAME (dr));
1869   if (vect_debug_details (NULL))
1870     {
1871       tree data_ref_base = base_name;
1872       fprintf (dump_file, "create array_ref of type: ");
1873       print_generic_expr (dump_file, vectype, TDF_SLIM);
1874       if (TREE_CODE (data_ref_base) == VAR_DECL)
1875         fprintf (dump_file, "vectorizing a one dimensional array ref: ");
1876       else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1877         fprintf (dump_file, "vectorizing a multidimensional array ref: ");
1878       else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1879         fprintf (dump_file, "vectorizing a record based array ref: ");
1880       else if (TREE_CODE (data_ref_base) == SSA_NAME)
1881         fprintf (dump_file, "vectorizing a pointer ref: ");
1882       print_generic_expr (dump_file, base_name, TDF_SLIM);
1883     }
1884
1885   /** (1) Create the new vector-pointer variable:  **/
1886
1887   vect_ptr_type = build_pointer_type (vectype);
1888   vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1889                                     get_name (base_name));
1890   add_referenced_tmp_var (vect_ptr);
1891
1892
1893   /** (2) Handle aliasing information of the new vector-pointer:  **/
1894
1895   tag = STMT_VINFO_MEMTAG (stmt_info);
1896   gcc_assert (tag);
1897   get_var_ann (vect_ptr)->type_mem_tag = tag;
1898
1899   /* Mark for renaming all aliased variables
1900      (i.e, the may-aliases of the type-mem-tag).  */
1901   nvuses = NUM_VUSES (vuses);
1902   nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1903   nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1904   for (i = 0; i < nvuses; i++)
1905     {
1906       tree use = VUSE_OP (vuses, i);
1907       if (TREE_CODE (use) == SSA_NAME)
1908         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
1909     }
1910   for (i = 0; i < nv_may_defs; i++)
1911     {
1912       tree def = V_MAY_DEF_RESULT (v_may_defs, i);
1913       if (TREE_CODE (def) == SSA_NAME)
1914         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1915     }
1916   for (i = 0; i < nv_must_defs; i++)
1917     {
1918       tree def = V_MUST_DEF_RESULT (v_must_defs, i);
1919       if (TREE_CODE (def) == SSA_NAME)
1920         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1921     }
1922
1923
1924   /** (3) Calculate the initial address the vector-pointer, and set
1925           the vector-pointer to point to it before the loop:  **/
1926
1927   /* Create: (&(base[init_val+offset]) in the loop preheader.  */
1928   new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1929                                                    offset);
1930   pe = loop_preheader_edge (loop);
1931   new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1932   gcc_assert (!new_bb);
1933   *initial_address = new_temp;
1934
1935   /* Create: p = (vectype *) initial_base  */
1936   vec_stmt = fold_convert (vect_ptr_type, new_temp);
1937   vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1938   new_temp = make_ssa_name (vect_ptr, vec_stmt);
1939   TREE_OPERAND (vec_stmt, 0) = new_temp;
1940   new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1941   gcc_assert (!new_bb);
1942   vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1943
1944
1945   /** (4) Handle the updating of the vector-pointer inside the loop: **/
1946
1947   if (only_init) /* No update in loop is required.  */
1948     return vect_ptr_init;
1949
1950   idx = vect_create_index_for_vector_ref (loop, bsi);
1951
1952   /* Create: update = idx * vectype_size  */
1953   ptr_update = create_tmp_var (integer_type_node, "update");
1954   add_referenced_tmp_var (ptr_update);
1955   vectype_size = build_int_cst (integer_type_node,
1956                                 GET_MODE_SIZE (TYPE_MODE (vectype)));
1957   vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1958   vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1959   new_temp = make_ssa_name (ptr_update, vec_stmt);
1960   TREE_OPERAND (vec_stmt, 0) = new_temp;
1961   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1962
1963   /* Create: data_ref_ptr = vect_ptr_init + update  */
1964   vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1965   vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1966   new_temp = make_ssa_name (vect_ptr, vec_stmt);
1967   TREE_OPERAND (vec_stmt, 0) = new_temp;
1968   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1969   data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1970
1971   return data_ref_ptr;
1972 }
1973
1974
1975 /* Function vect_create_destination_var.
1976
1977    Create a new temporary of type VECTYPE.  */
1978
1979 static tree
1980 vect_create_destination_var (tree scalar_dest, tree vectype)
1981 {
1982   tree vec_dest;
1983   const char *new_name;
1984
1985   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1986
1987   new_name = get_name (scalar_dest);
1988   if (!new_name)
1989     new_name = "var_";
1990   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
1991   add_referenced_tmp_var (vec_dest);
1992
1993   return vec_dest;
1994 }
1995
1996
1997 /* Function vect_init_vector.
1998
1999    Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2000    the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2001    used in the vectorization of STMT.  */
2002
2003 static tree
2004 vect_init_vector (tree stmt, tree vector_var)
2005 {
2006   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2007   struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2008   tree new_var;
2009   tree init_stmt;
2010   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2011   tree vec_oprnd;
2012   edge pe;
2013   tree new_temp;
2014   basic_block new_bb;
2015
2016   new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2017   add_referenced_tmp_var (new_var);
2018
2019   init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2020   new_temp = make_ssa_name (new_var, init_stmt);
2021   TREE_OPERAND (init_stmt, 0) = new_temp;
2022
2023   pe = loop_preheader_edge (loop);
2024   new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2025   gcc_assert (!new_bb);
2026
2027   if (vect_debug_details (NULL))
2028     {
2029       fprintf (dump_file, "created new init_stmt: ");
2030       print_generic_expr (dump_file, init_stmt, TDF_SLIM);
2031     }
2032
2033   vec_oprnd = TREE_OPERAND (init_stmt, 0);
2034   return vec_oprnd;
2035 }
2036
2037
2038 /* Function vect_get_vec_def_for_operand.
2039
2040    OP is an operand in STMT. This function returns a (vector) def that will be
2041    used in the vectorized stmt for STMT.
2042
2043    In the case that OP is an SSA_NAME which is defined in the loop, then
2044    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2045
2046    In case OP is an invariant or constant, a new stmt that creates a vector def
2047    needs to be introduced.  */
2048
2049 static tree
2050 vect_get_vec_def_for_operand (tree op, tree stmt)
2051 {
2052   tree vec_oprnd;
2053   tree vec_stmt;
2054   tree def_stmt;
2055   stmt_vec_info def_stmt_info = NULL;
2056   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2057   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2058   int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2059   struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2060   basic_block bb;
2061   tree vec_inv;
2062   tree t = NULL_TREE;
2063   tree def;
2064   int i;
2065
2066   if (vect_debug_details (NULL))
2067     {
2068       fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2069       print_generic_expr (dump_file, op, TDF_SLIM);
2070     }
2071
2072   /** ===> Case 1: operand is a constant.  **/
2073
2074   if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2075     {
2076       /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
2077
2078       tree vec_cst;
2079
2080       /* Build a tree with vector elements.  */
2081       if (vect_debug_details (NULL))
2082         fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2083
2084       for (i = nunits - 1; i >= 0; --i)
2085         {
2086           t = tree_cons (NULL_TREE, op, t);
2087         }
2088       vec_cst = build_vector (vectype, t);
2089       return vect_init_vector (stmt, vec_cst);
2090     }
2091
2092   gcc_assert (TREE_CODE (op) == SSA_NAME);
2093
2094   /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it.  **/
2095
2096   def_stmt = SSA_NAME_DEF_STMT (op);
2097   def_stmt_info = vinfo_for_stmt (def_stmt);
2098
2099   if (vect_debug_details (NULL))
2100     {
2101       fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2102       print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2103     }
2104
2105
2106   /** ==> Case 2.1: operand is defined inside the loop.  **/
2107
2108   if (def_stmt_info)
2109     {
2110       /* Get the def from the vectorized stmt.  */
2111
2112       vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2113       gcc_assert (vec_stmt);
2114       vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2115       return vec_oprnd;
2116     }
2117
2118
2119   /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2120                     it is a reduction/induction.  **/
2121
2122   bb = bb_for_stmt (def_stmt);
2123   if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2124     {
2125       if (vect_debug_details (NULL))
2126         fprintf (dump_file, "reduction/induction - unsupported.");
2127       internal_error ("no support for reduction/induction"); /* FORNOW */
2128     }
2129
2130
2131   /** ==> Case 2.3: operand is defined outside the loop -
2132                     it is a loop invariant.  */
2133
2134   switch (TREE_CODE (def_stmt))
2135     {
2136     case PHI_NODE:
2137       def = PHI_RESULT (def_stmt);
2138       break;
2139     case MODIFY_EXPR:
2140       def = TREE_OPERAND (def_stmt, 0);
2141       break;
2142     case NOP_EXPR:
2143       def = TREE_OPERAND (def_stmt, 0);
2144       gcc_assert (IS_EMPTY_STMT (def_stmt));
2145       def = op;
2146       break;
2147     default:
2148       if (vect_debug_details (NULL))
2149         {
2150           fprintf (dump_file, "unsupported defining stmt: ");
2151           print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2152         }
2153       internal_error ("unsupported defining stmt");
2154     }
2155
2156   /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}'  */
2157
2158   if (vect_debug_details (NULL))
2159     fprintf (dump_file, "Create vector_inv.");
2160
2161   for (i = nunits - 1; i >= 0; --i)
2162     {
2163       t = tree_cons (NULL_TREE, def, t);
2164     }
2165
2166   vec_inv = build_constructor (vectype, t);
2167   return vect_init_vector (stmt, vec_inv);
2168 }
2169
2170
2171 /* Function vect_finish_stmt_generation.
2172
2173    Insert a new stmt.  */
2174
2175 static void
2176 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2177 {
2178   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2179
2180   if (vect_debug_details (NULL))
2181     {
2182       fprintf (dump_file, "add new stmt: ");
2183       print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2184     }
2185
2186   /* Make sure bsi points to the stmt that is being vectorized.  */
2187
2188   /* Assumption: any stmts created for the vectorization of stmt S were
2189      inserted before S. BSI is expected to point to S or some new stmt before S.
2190    */
2191
2192   while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2193     bsi_next (bsi);
2194   gcc_assert (stmt == bsi_stmt (*bsi));
2195 }
2196
2197
2198 /* Function vectorizable_assignment.
2199
2200    Check if STMT performs an assignment (copy) that can be vectorized.
2201    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2202    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2203    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2204
2205 static bool
2206 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2207 {
2208   tree vec_dest;
2209   tree scalar_dest;
2210   tree op;
2211   tree vec_oprnd;
2212   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2213   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2214   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2215   tree new_temp;
2216
2217   /* Is vectorizable assignment?  */
2218
2219   if (TREE_CODE (stmt) != MODIFY_EXPR)
2220     return false;
2221
2222   scalar_dest = TREE_OPERAND (stmt, 0);
2223   if (TREE_CODE (scalar_dest) != SSA_NAME)
2224     return false;
2225
2226   op = TREE_OPERAND (stmt, 1);
2227   if (!vect_is_simple_use (op, loop, NULL))
2228     {
2229       if (vect_debug_details (NULL))
2230         fprintf (dump_file, "use not simple.");
2231       return false;
2232     }
2233
2234   if (!vec_stmt) /* transformation not required.  */
2235     {
2236       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2237       return true;
2238     }
2239
2240   /** Trasform.  **/
2241   if (vect_debug_details (NULL))
2242     fprintf (dump_file, "transform assignment.");
2243
2244   /* Handle def.  */
2245   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2246
2247   /* Handle use.  */
2248   op = TREE_OPERAND (stmt, 1);
2249   vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2250
2251   /* Arguments are ready. create the new vector stmt.  */
2252   *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2253   new_temp = make_ssa_name (vec_dest, *vec_stmt);
2254   TREE_OPERAND (*vec_stmt, 0) = new_temp;
2255   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2256
2257   return true;
2258 }
2259
2260
2261 /* Function vectorizable_operation.
2262
2263    Check if STMT performs a binary or unary operation that can be vectorized.
2264    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2265    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2266    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2267
2268 static bool
2269 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2270 {
2271   tree vec_dest;
2272   tree scalar_dest;
2273   tree operation;
2274   tree op0, op1 = NULL;
2275   tree vec_oprnd0, vec_oprnd1=NULL;
2276   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2277   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2278   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2279   int i;
2280   enum tree_code code;
2281   enum machine_mode vec_mode;
2282   tree new_temp;
2283   int op_type;
2284   tree op;
2285   optab optab;
2286
2287   /* Is STMT a vectorizable binary/unary operation?   */
2288   if (TREE_CODE (stmt) != MODIFY_EXPR)
2289     return false;
2290
2291   if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2292     return false;
2293
2294   operation = TREE_OPERAND (stmt, 1);
2295   code = TREE_CODE (operation);
2296   optab = optab_for_tree_code (code, vectype);
2297
2298   /* Support only unary or binary operations.  */
2299   op_type = TREE_CODE_LENGTH (code);
2300   if (op_type != unary_op && op_type != binary_op)
2301     {
2302       if (vect_debug_details (NULL))
2303         fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2304       return false;
2305     }
2306
2307   for (i = 0; i < op_type; i++)
2308     {
2309       op = TREE_OPERAND (operation, i);
2310       if (!vect_is_simple_use (op, loop, NULL))
2311         {
2312           if (vect_debug_details (NULL))
2313             fprintf (dump_file, "use not simple.");
2314           return false;
2315         }
2316     }
2317
2318   /* Supportable by target?  */
2319   if (!optab)
2320     {
2321       if (vect_debug_details (NULL))
2322         fprintf (dump_file, "no optab.");
2323       return false;
2324     }
2325   vec_mode = TYPE_MODE (vectype);
2326   if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2327     {
2328       if (vect_debug_details (NULL))
2329         fprintf (dump_file, "op not supported by target.");
2330       return false;
2331     }
2332
2333   if (!vec_stmt) /* transformation not required.  */
2334     {
2335       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2336       return true;
2337     }
2338
2339   /** Transform.  **/
2340
2341   if (vect_debug_details (NULL))
2342     fprintf (dump_file, "transform binary/unary operation.");
2343
2344   /* Handle def.  */
2345   scalar_dest = TREE_OPERAND (stmt, 0);
2346   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2347
2348   /* Handle uses.  */
2349   op0 = TREE_OPERAND (operation, 0);
2350   vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2351
2352   if (op_type == binary_op)
2353     {
2354       op1 = TREE_OPERAND (operation, 1);
2355       vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2356     }
2357
2358   /* Arguments are ready. create the new vector stmt.  */
2359
2360   if (op_type == binary_op)
2361     *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2362                 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2363   else
2364     *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2365                 build1 (code, vectype, vec_oprnd0));
2366   new_temp = make_ssa_name (vec_dest, *vec_stmt);
2367   TREE_OPERAND (*vec_stmt, 0) = new_temp;
2368   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2369
2370   return true;
2371 }
2372
2373
2374 /* Function vectorizable_store.
2375
2376    Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2377    can be vectorized.
2378    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2379    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2380    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2381
2382 static bool
2383 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2384 {
2385   tree scalar_dest;
2386   tree data_ref;
2387   tree op;
2388   tree vec_oprnd1;
2389   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2390   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2391   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2392   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2393   enum machine_mode vec_mode;
2394   tree dummy;
2395   enum dr_alignment_support alignment_support_cheme;
2396
2397   /* Is vectorizable store? */
2398
2399   if (TREE_CODE (stmt) != MODIFY_EXPR)
2400     return false;
2401
2402   scalar_dest = TREE_OPERAND (stmt, 0);
2403   if (TREE_CODE (scalar_dest) != ARRAY_REF
2404       && TREE_CODE (scalar_dest) != INDIRECT_REF)
2405     return false;
2406
2407   op = TREE_OPERAND (stmt, 1);
2408   if (!vect_is_simple_use (op, loop, NULL))
2409     {
2410       if (vect_debug_details (NULL))
2411         fprintf (dump_file, "use not simple.");
2412       return false;
2413     }
2414
2415   vec_mode = TYPE_MODE (vectype);
2416   /* FORNOW. In some cases can vectorize even if data-type not supported
2417      (e.g. - array initialization with 0).  */
2418   if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2419     return false;
2420
2421   if (!STMT_VINFO_DATA_REF (stmt_info))
2422     return false;
2423
2424
2425   if (!vec_stmt) /* transformation not required.  */
2426     {
2427       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2428       return true;
2429     }
2430
2431   /** Trasform.  **/
2432
2433   if (vect_debug_details (NULL))
2434     fprintf (dump_file, "transform store");
2435
2436   alignment_support_cheme = vect_supportable_dr_alignment (dr);
2437   gcc_assert (alignment_support_cheme);
2438   gcc_assert (alignment_support_cheme = dr_aligned);  /* FORNOW */
2439
2440   /* Handle use - get the vectorized def from the defining stmt.  */
2441   vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2442
2443   /* Handle def.  */
2444   /* FORNOW: make sure the data reference is aligned.  */
2445   vect_align_data_ref (stmt);
2446   data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2447   data_ref = build_fold_indirect_ref (data_ref);
2448
2449   /* Arguments are ready. create the new vector stmt.  */
2450   *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2451   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2452
2453   return true;
2454 }
2455
2456
2457 /* vectorizable_load.
2458
2459    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2460    can be vectorized.
2461    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2462    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2463    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2464
2465 static bool
2466 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2467 {
2468   tree scalar_dest;
2469   tree vec_dest = NULL;
2470   tree data_ref = NULL;
2471   tree op;
2472   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2473   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2474   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2475   tree new_temp;
2476   int mode;
2477   tree init_addr;
2478   tree new_stmt;
2479   tree dummy;
2480   basic_block new_bb;
2481   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2482   edge pe = loop_preheader_edge (loop);
2483   enum dr_alignment_support alignment_support_cheme;
2484
2485   /* Is vectorizable load? */
2486
2487   if (TREE_CODE (stmt) != MODIFY_EXPR)
2488     return false;
2489
2490   scalar_dest = TREE_OPERAND (stmt, 0);
2491   if (TREE_CODE (scalar_dest) != SSA_NAME)
2492     return false;
2493
2494   op = TREE_OPERAND (stmt, 1);
2495   if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2496     return false;
2497
2498   if (!STMT_VINFO_DATA_REF (stmt_info))
2499     return false;
2500
2501   mode = (int) TYPE_MODE (vectype);
2502
2503   /* FORNOW. In some cases can vectorize even if data-type not supported
2504     (e.g. - data copies).  */
2505   if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2506     {
2507       if (vect_debug_details (loop))
2508         fprintf (dump_file, "Aligned load, but unsupported type.");
2509       return false;
2510     }
2511
2512   if (!vec_stmt) /* transformation not required.  */
2513     {
2514       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2515       return true;
2516     }
2517
2518   /** Trasform.  **/
2519
2520   if (vect_debug_details (NULL))
2521     fprintf (dump_file, "transform load.");
2522
2523   alignment_support_cheme = vect_supportable_dr_alignment (dr);
2524   gcc_assert (alignment_support_cheme);
2525
2526   if (alignment_support_cheme == dr_aligned
2527       || alignment_support_cheme == dr_unaligned_supported)
2528     {
2529       /* Create:
2530          p = initial_addr;
2531          indx = 0;
2532          loop {
2533            vec_dest = *(p);
2534            indx = indx + 1;
2535          }
2536       */
2537
2538       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2539       data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2540       if (aligned_access_p (dr))
2541         data_ref = build_fold_indirect_ref (data_ref);
2542       else
2543         {
2544           int mis = DR_MISALIGNMENT (dr);
2545           tree tmis = (mis == -1 ?
2546                        integer_zero_node :
2547                        build_int_cst (integer_type_node, mis));
2548           tmis = int_const_binop (MULT_EXPR, tmis,
2549                         build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
2550           data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2551         }
2552       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2553       new_temp = make_ssa_name (vec_dest, new_stmt);
2554       TREE_OPERAND (new_stmt, 0) = new_temp;
2555       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2556     }
2557   else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2558     {
2559       /* Create:
2560          p1 = initial_addr;
2561          msq_init = *(floor(p1))
2562          p2 = initial_addr + VS - 1;
2563          magic = have_builtin ? builtin_result : initial_address;
2564          indx = 0;
2565          loop {
2566            p2' = p2 + indx * vectype_size
2567            lsq = *(floor(p2'))
2568            vec_dest = realign_load (msq, lsq, magic)
2569            indx = indx + 1;
2570            msq = lsq;
2571          }
2572       */
2573
2574       tree offset;
2575       tree magic;
2576       tree phi_stmt;
2577       tree msq_init;
2578       tree msq, lsq;
2579       tree dataref_ptr;
2580       tree params;
2581
2582       /* <1> Create msq_init = *(floor(p1)) in the loop preheader  */
2583       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2584       data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2585                                            &init_addr, true);
2586       data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2587       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2588       new_temp = make_ssa_name (vec_dest, new_stmt);
2589       TREE_OPERAND (new_stmt, 0) = new_temp;
2590       new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2591       gcc_assert (!new_bb);
2592       msq_init = TREE_OPERAND (new_stmt, 0);
2593
2594
2595       /* <2> Create lsq = *(floor(p2')) in the loop  */
2596       offset = build_int_cst (integer_type_node,
2597                               GET_MODE_NUNITS (TYPE_MODE (vectype)));
2598       offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2599       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2600       dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2601       data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2602       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2603       new_temp = make_ssa_name (vec_dest, new_stmt);
2604       TREE_OPERAND (new_stmt, 0) = new_temp;
2605       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2606       lsq = TREE_OPERAND (new_stmt, 0);
2607
2608
2609       /* <3> */
2610       if (targetm.vectorize.builtin_mask_for_load)
2611         {
2612           /* Create permutation mask, if required, in loop preheader.  */
2613           tree builtin_decl;
2614           params = build_tree_list (NULL_TREE, init_addr);
2615           vec_dest = vect_create_destination_var (scalar_dest, vectype);
2616           builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2617           new_stmt = build_function_call_expr (builtin_decl, params);
2618           new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2619           new_temp = make_ssa_name (vec_dest, new_stmt);
2620           TREE_OPERAND (new_stmt, 0) = new_temp;
2621           new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2622           gcc_assert (!new_bb);
2623           magic = TREE_OPERAND (new_stmt, 0);
2624         }
2625       else
2626         {
2627           /* Use current address instead of init_addr for reduced reg pressure.
2628            */
2629           magic = dataref_ptr;
2630         }
2631
2632
2633       /* <4> Create msq = phi <msq_init, lsq> in loop  */
2634       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2635       msq = make_ssa_name (vec_dest, NULL_TREE);
2636       phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2637       SSA_NAME_DEF_STMT (msq) = phi_stmt;
2638       add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop));
2639       add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));
2640
2641
2642       /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop  */
2643       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2644       new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2645       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2646       new_temp = make_ssa_name (vec_dest, new_stmt);
2647       TREE_OPERAND (new_stmt, 0) = new_temp;
2648       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2649     }
2650   else
2651     gcc_unreachable ();
2652
2653   *vec_stmt = new_stmt;
2654   return true;
2655 }
2656
2657
2658 /* Function vect_supportable_dr_alignment
2659
2660    Return whether the data reference DR is supported with respect to its
2661    alignment.  */
2662
2663 static enum dr_alignment_support
2664 vect_supportable_dr_alignment (struct data_reference *dr)
2665 {
2666   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2667   enum machine_mode mode = (int) TYPE_MODE (vectype);
2668
2669   if (aligned_access_p (dr))
2670     return dr_aligned;
2671
2672   /* Possibly unaligned access.  */
2673
2674   if (DR_IS_READ (dr))
2675     {
2676       if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2677           && (!targetm.vectorize.builtin_mask_for_load
2678               || targetm.vectorize.builtin_mask_for_load ()))
2679         return dr_unaligned_software_pipeline;
2680
2681       if (targetm.vectorize.misaligned_mem_ok (mode))
2682         /* Can't software pipeline the loads.  */
2683         return dr_unaligned_supported;
2684     }
2685
2686   /* Unsupported.  */
2687   return dr_unaligned_unsupported;
2688 }
2689
2690
2691 /* Function vect_transform_stmt.
2692
2693    Create a vectorized stmt to replace STMT, and insert it at BSI.  */
2694
2695 static bool
2696 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2697 {
2698   bool is_store = false;
2699   tree vec_stmt = NULL_TREE;
2700   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2701   bool done;
2702
2703   switch (STMT_VINFO_TYPE (stmt_info))
2704     {
2705     case op_vec_info_type:
2706       done = vectorizable_operation (stmt, bsi, &vec_stmt);
2707       gcc_assert (done);
2708       break;
2709
2710     case assignment_vec_info_type:
2711       done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2712       gcc_assert (done);
2713       break;
2714
2715     case load_vec_info_type:
2716       done = vectorizable_load (stmt, bsi, &vec_stmt);
2717       gcc_assert (done);
2718       break;
2719
2720     case store_vec_info_type:
2721       done = vectorizable_store (stmt, bsi, &vec_stmt);
2722       gcc_assert (done);
2723       is_store = true;
2724       break;
2725     default:
2726       if (vect_debug_details (NULL))
2727         fprintf (dump_file, "stmt not supported.");
2728       gcc_unreachable ();
2729     }
2730
2731   STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2732
2733   return is_store;
2734 }
2735
2736
2737 /* This function builds ni_name = number of iterations loop executes
2738    on the loop preheader.  */
2739
2740 static tree
2741 vect_build_loop_niters (loop_vec_info loop_vinfo)
2742 {
2743   tree ni_name, stmt, var;
2744   edge pe;
2745   basic_block new_bb = NULL;
2746   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2747   tree ni = unshare_expr (LOOP_VINFO_NITERS(loop_vinfo));
2748
2749   var = create_tmp_var (TREE_TYPE (ni), "niters");
2750   add_referenced_tmp_var (var);
2751   if (TREE_CODE (ni) == INTEGER_CST)
2752     {
2753       /* This case is generated when treating a known loop bound
2754          indivisible by VF. Here we cannot use force_gimple_operand.  */
2755       stmt = build (MODIFY_EXPR, void_type_node, var, ni);
2756       ni_name = make_ssa_name (var, stmt);
2757       TREE_OPERAND (stmt, 0) = ni_name;
2758     }
2759   else
2760     ni_name = force_gimple_operand (ni, &stmt, false, var);
2761
2762   pe = loop_preheader_edge (loop);
2763   if (stmt)
2764     new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2765   if (new_bb)
2766     add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2767
2768   return ni_name;
2769 }
2770
2771
2772 /* This function generates the following statements:
2773
2774  ni_name = number of iterations loop executes
2775  ratio = ni_name / vf
2776  ratio_mult_vf_name = ratio * vf
2777
2778  and places them at the loop preheader edge.  */
2779
2780 static void
2781 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree *ni_name_p,
2782                                  tree *ratio_mult_vf_name_p, tree *ratio_p)
2783 {
2784
2785   edge pe;
2786   basic_block new_bb;
2787   tree stmt, ni_name;
2788   tree ratio;
2789   tree ratio_mult_vf_name, ratio_mult_vf;
2790   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2791   tree ni = LOOP_VINFO_NITERS(loop_vinfo);
2792
2793   int vf, i;
2794
2795   /* Generate temporary variable that contains
2796      number of iterations loop executes.  */
2797
2798   ni_name = vect_build_loop_niters (loop_vinfo);
2799
2800   /* ratio = ni / vf.
2801      vf is power of 2; then if ratio =  = n >> log2 (vf).  */
2802   vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2803   ratio = vect_build_symbol_bound (ni_name, vf, loop);
2804
2805   /* Update initial conditions of loop copy.  */
2806
2807   /* ratio_mult_vf = ratio * vf;
2808      then if ratio_mult_vf = ratio << log2 (vf).  */
2809
2810   i = exact_log2 (vf);
2811   ratio_mult_vf = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2812   add_referenced_tmp_var (ratio_mult_vf);
2813
2814   ratio_mult_vf_name = make_ssa_name (ratio_mult_vf, NULL_TREE);
2815
2816   stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2817                 build2 (LSHIFT_EXPR, TREE_TYPE (ratio),
2818                        ratio, build_int_cst (unsigned_type_node,
2819                                              i)));
2820
2821   SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2822
2823   pe = loop_preheader_edge (loop);
2824   new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2825   if (new_bb)
2826     add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2827
2828   *ni_name_p = ni_name;
2829   *ratio_mult_vf_name_p = ratio_mult_vf_name;
2830   *ratio_p = ratio;
2831
2832   return;
2833 }
2834
2835
2836 /* This function generates stmt
2837
2838    tmp = n / vf;
2839
2840    and attaches it to preheader of LOOP.  */
2841
2842 static tree
2843 vect_build_symbol_bound (tree n, int vf, struct loop * loop)
2844 {
2845   tree var, stmt, var_name;
2846   edge pe;
2847   basic_block new_bb;
2848   int i;
2849
2850   /* create temporary variable */
2851   var = create_tmp_var (TREE_TYPE (n), "bnd");
2852   add_referenced_tmp_var (var);
2853
2854   var_name = make_ssa_name (var, NULL_TREE);
2855
2856   /* vf is power of 2; then n/vf = n >> log2 (vf).  */
2857
2858   i = exact_log2 (vf);
2859   stmt = build2 (MODIFY_EXPR, void_type_node, var_name,
2860                 build2 (RSHIFT_EXPR, TREE_TYPE (n),
2861                        n, build_int_cst (unsigned_type_node,i)));
2862
2863   SSA_NAME_DEF_STMT (var_name) = stmt;
2864
2865   pe = loop_preheader_edge (loop);
2866   new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2867   if (new_bb)
2868     add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2869   else
2870     if (vect_debug_details (NULL))
2871       fprintf (dump_file, "New bb on preheader edge was not generated.");
2872
2873   return var_name;
2874 }
2875
2876
2877 /* Function vect_transform_loop_bound.
2878
2879    Create a new exit condition for the loop.  */
2880
2881 static void
2882 vect_transform_loop_bound (loop_vec_info loop_vinfo, tree niters)
2883 {
2884   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2885   tree orig_cond_expr;
2886   HOST_WIDE_INT old_N = 0;
2887   int vf;
2888   tree new_loop_bound;
2889   bool symbol_niters;
2890   tree lb_type;
2891
2892   symbol_niters = !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
2893
2894   if (!symbol_niters)
2895     old_N = LOOP_VINFO_INT_NITERS (loop_vinfo);
2896
2897   vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2898
2899   orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
2900 #ifdef ENABLE_CHECKING
2901   gcc_assert (orig_cond_expr);
2902 #endif
2903
2904   /* new loop exit test:  */
2905   lb_type = TREE_TYPE (TREE_OPERAND (COND_EXPR_COND (orig_cond_expr), 1));
2906   if (!symbol_niters)
2907     new_loop_bound =
2908         fold_convert (lb_type, build_int_cst (unsigned_type_node, old_N/vf));
2909   else
2910     new_loop_bound = niters;
2911
2912   slpeel_make_loop_iterate_ntimes (loop, new_loop_bound);
2913 }
2914
2915
2916 /*   Function vect_update_ivs_after_vectorizer.
2917
2918      "Advance" the induction variables of LOOP to the value they should take
2919      after the execution of LOOP.  This is currently necessary because the
2920      vectorizer does not handle induction variables that are used after the
2921      loop.  Such a situation occurs when the last iterations of LOOP are
2922      peeled, because:
2923      1. We introduced new uses after LOOP for IVs that were not originally used
2924         after LOOP: the IVs of LOOP are now used by an epilog loop.
2925      2. LOOP is going to be vectorized; this means that it will iterate N/VF
2926         times, whereas the loop IVs should be bumped N times.
2927
2928      Input:
2929      - LOOP - a loop that is going to be vectorized. The last few iterations
2930               of LOOP were peeled.
2931      - NITERS - the number of iterations that LOOP executes (before it is
2932                 vectorized). i.e, the number of times the ivs should be bumped.
2933      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2934                   coming out from LOOP on which there are uses of the LOOP ivs
2935                   (this is the path from LOOP->exit to epilog_loop->preheader).
2936
2937                   The new definitions of the ivs are placed in LOOP->exit.
2938                   The phi args associated with the edge UPDATE_E in the bb
2939                   UPDATE_E->dest are updated accordingly.
2940
2941      Assumption 1: Like the rest of the vectorizer, this function assumes
2942      a single loop exit that has a single predecessor.
2943
2944      Assumption 2: The phi nodes in the LOOP header and in update_bb are
2945      organized in the same order.
2946
2947      Assumption 3: The access function of the ivs is simple enough (see
2948      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
2949
2950      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2951      coming out of LOOP on which the ivs of LOOP are used (this is the path
2952      that leads to the epilog loop; other paths skip the epilog loop).  This
2953      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2954      needs to have its phis updated.
2955  */
2956
2957 static void
2958 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e)
2959 {
2960   basic_block exit_bb = loop->exit_edges[0]->dest;
2961   tree phi, phi1;
2962   basic_block update_bb = update_e->dest;
2963
2964   /* gcc_assert (vect_can_advance_ivs_p (loop)); */
2965
2966   /* Make sure there exists a single-predecessor exit bb:  */
2967   gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
2968
2969   for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2970        phi && phi1;
2971        phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2972     {
2973       tree access_fn = NULL;
2974       tree evolution_part;
2975       tree init_expr;
2976       tree step_expr;
2977       tree var, stmt, ni, ni_name;
2978       block_stmt_iterator last_bsi;
2979
2980       /* Skip virtual phi's.  */
2981       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2982         {
2983           if (vect_debug_details (NULL))
2984             fprintf (dump_file, "virtual phi. skip.");
2985           continue;
2986         }
2987
2988       access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2989       gcc_assert (access_fn);
2990       evolution_part =
2991          unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2992       gcc_assert (evolution_part != NULL_TREE);
2993
2994       /* FORNOW: We do not support IVs whose evolution function is a polynomial
2995          of degree >= 2 or exponential.  */
2996       gcc_assert (!tree_is_chrec (evolution_part));
2997
2998       step_expr = evolution_part;
2999       init_expr = unshare_expr (initial_condition (access_fn));
3000
3001       ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
3002                   build2 (MULT_EXPR, TREE_TYPE (niters),
3003                        niters, step_expr), init_expr);
3004
3005       var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
3006       add_referenced_tmp_var (var);
3007
3008       ni_name = force_gimple_operand (ni, &stmt, false, var);
3009
3010       /* Insert stmt into exit_bb.  */
3011       last_bsi = bsi_last (exit_bb);
3012       if (stmt)
3013         bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
3014
3015       /* Fix phi expressions in the successor bb.  */
3016       gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
3017                   PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
3018       SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
3019     }
3020 }
3021
3022
3023 /* Function vect_do_peeling_for_loop_bound
3024
3025    Peel the last iterations of the loop represented by LOOP_VINFO.
3026    The peeled iterations form a new epilog loop.  Given that the loop now
3027    iterates NITERS times, the new epilog loop iterates
3028    NITERS % VECTORIZATION_FACTOR times.
3029
3030    The original loop will later be made to iterate
3031    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).  */
3032
3033 static void
3034 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
3035                                 struct loops *loops)
3036 {
3037
3038   tree ni_name, ratio_mult_vf_name;
3039   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3040   struct loop *new_loop;
3041   edge update_e;
3042 #ifdef ENABLE_CHECKING
3043   int loop_num;
3044 #endif
3045
3046   if (vect_debug_details (NULL))
3047     fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
3048
3049   /* Generate the following variables on the preheader of original loop:
3050
3051      ni_name = number of iteration the original loop executes
3052      ratio = ni_name / vf
3053      ratio_mult_vf_name = ratio * vf  */
3054   vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3055                                    &ratio_mult_vf_name, ratio);
3056
3057   /* Update loop info.  */
3058   loop->pre_header = loop_preheader_edge (loop)->src;
3059   loop->pre_header_edges[0] = loop_preheader_edge (loop);
3060
3061 #ifdef ENABLE_CHECKING
3062   loop_num  = loop->num;
3063 #endif
3064   new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
3065                                             ratio_mult_vf_name, ni_name, false);
3066 #ifdef ENABLE_CHECKING
3067   gcc_assert (new_loop);
3068   gcc_assert (loop_num == loop->num);
3069   slpeel_verify_cfg_after_peeling (loop, new_loop);
3070 #endif
3071
3072   /* A guard that controls whether the new_loop is to be executed or skipped
3073      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
3074      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
3075      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
3076      is on the path where the LOOP IVs are used and need to be updated.  */
3077
3078   if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3079     update_e = EDGE_PRED (new_loop->pre_header, 0);
3080   else
3081     update_e = EDGE_PRED (new_loop->pre_header, 1);
3082
3083   /* Update IVs of original loop as if they were advanced
3084      by ratio_mult_vf_name steps.  */
3085   vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e);
3086
3087   /* After peeling we have to reset scalar evolution analyzer.  */
3088   scev_reset ();
3089
3090   return;
3091 }
3092
3093
3094 /* Function vect_gen_niters_for_prolog_loop
3095
3096    Set the number of iterations for the loop represented by LOOP_VINFO
3097    to the minimum between NITERS (the original iteration count of the loop)
3098    and the misalignment of DR - the first data reference recorded in
3099    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
3100    this loop, the data reference DR will refer to an aligned location.  */
3101
3102 static tree
3103 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree niters)
3104 {
3105   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3106   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3107   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3108   tree var, stmt;
3109   tree iters, iters_name;
3110   edge pe;
3111   basic_block new_bb;
3112   tree dr_stmt = DR_STMT (dr);
3113   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3114   tree start_addr, byte_miss_align, elem_miss_align;
3115   int vec_type_align =
3116     GET_MODE_ALIGNMENT (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3117                                                         / BITS_PER_UNIT;
3118   tree tmp1, tmp2;
3119   tree new_stmt_list = NULL_TREE;
3120
3121   start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
3122                                                      &new_stmt_list, NULL_TREE);
3123
3124   pe = loop_preheader_edge (loop);
3125   new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
3126   if (new_bb)
3127     add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3128
3129   byte_miss_align =
3130         build (BIT_AND_EXPR, integer_type_node, start_addr,
3131                   build (MINUS_EXPR, integer_type_node,
3132                          build_int_cst (unsigned_type_node,
3133                                         vec_type_align), integer_one_node));
3134   tmp1 = build_int_cst (unsigned_type_node, vec_type_align/vf);
3135   elem_miss_align = build (FLOOR_DIV_EXPR, integer_type_node,
3136                            byte_miss_align, tmp1);
3137
3138   tmp2 =
3139         build (BIT_AND_EXPR, integer_type_node,
3140           build (MINUS_EXPR, integer_type_node,
3141                 build_int_cst (unsigned_type_node, vf), elem_miss_align),
3142           build (MINUS_EXPR, integer_type_node,
3143                 build_int_cst (unsigned_type_node, vf), integer_one_node));
3144
3145   iters = build2 (MIN_EXPR, TREE_TYPE (tmp2), tmp2, niters);
3146   var = create_tmp_var (TREE_TYPE (iters), "iters");
3147   add_referenced_tmp_var (var);
3148   iters_name = force_gimple_operand (iters, &stmt, false, var);
3149
3150   /* Insert stmt on loop preheader edge.  */
3151   pe = loop_preheader_edge (loop);
3152   if (stmt)
3153     new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3154   if (new_bb)
3155     add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3156
3157   return iters_name;
3158 }
3159
3160
3161 /* Function vect_update_inits_of_dr
3162
3163    NITERS iterations were peeled from LOOP.  DR represents a data reference
3164    in LOOP.  This function updates the information recorded in DR to
3165    account for the fact that the first NITERS iterations had already been
3166    executed.  Specifically, it updates the initial_condition of the
3167    access_function of DR.  */
3168
3169 static void
3170 vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop,
3171                          tree niters)
3172 {
3173   tree access_fn = DR_ACCESS_FN (dr, 0);
3174   tree init, init_new, step;
3175
3176   step = evolution_part_in_loop_num (access_fn, loop->num);
3177   init = initial_condition (access_fn);
3178
3179   init_new = build (PLUS_EXPR, TREE_TYPE (init),
3180                   build (MULT_EXPR, TREE_TYPE (niters),
3181                          niters, step), init);
3182   DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
3183
3184   return;
3185 }
3186
3187
3188 /* Function vect_update_inits_of_drs
3189
3190    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3191    This function updates the information recorded for the data references in
3192    the loop to account for the fact that the first NITERS iterations had
3193    already been executed.  Specifically, it updates the initial_condition of the
3194    access_function of all the data_references in the loop.  */
3195
3196 static void
3197 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3198 {
3199   unsigned int i;
3200   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3201   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3202   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3203
3204   if (dump_file && (dump_flags & TDF_DETAILS))
3205     fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3206
3207   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3208     {
3209       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3210       vect_update_inits_of_dr (dr, loop, niters);
3211     }
3212
3213   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3214     {
3215       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3216       vect_update_inits_of_dr (dr, loop, niters);
3217     }
3218 }
3219
3220
3221 /* Function vect_do_peeling_for_alignment
3222
3223    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3224    'niters' is set to the misalignment of one of the data references in the
3225    loop, thereby forcing it to refer to an aligned location at the beginning
3226    of the execution of this loop.  The data reference for which we are
3227    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
3228
3229 static void
3230 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3231 {
3232   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3233   tree niters_of_prolog_loop, ni_name;
3234   tree n_iters;
3235   struct loop *new_loop;
3236
3237   if (vect_debug_details (NULL))
3238     fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3239
3240   ni_name = vect_build_loop_niters (loop_vinfo);
3241   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3242
3243   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
3244   new_loop =
3245         slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3246                                        niters_of_prolog_loop, ni_name, true);
3247 #ifdef ENABLE_CHECKING
3248   gcc_assert (new_loop);
3249   slpeel_verify_cfg_after_peeling (new_loop, loop);
3250 #endif
3251
3252   /* Update number of times loop executes.  */
3253   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3254   LOOP_VINFO_NITERS (loop_vinfo) =
3255     build (MINUS_EXPR, integer_type_node, n_iters, niters_of_prolog_loop);
3256
3257   /* Update the init conditions of the access functions of all data refs.  */
3258   vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3259
3260   /* After peeling we have to reset scalar evolution analyzer.  */
3261   scev_reset ();
3262
3263   return;
3264 }
3265
3266
3267 /* Function vect_transform_loop.
3268
3269    The analysis phase has determined that the loop is vectorizable.
3270    Vectorize the loop - created vectorized stmts to replace the scalar
3271    stmts in the loop, and update the loop exit condition.  */
3272
3273 static void
3274 vect_transform_loop (loop_vec_info loop_vinfo,
3275                      struct loops *loops ATTRIBUTE_UNUSED)
3276 {
3277   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3278   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3279   int nbbs = loop->num_nodes;
3280   block_stmt_iterator si;
3281   int i;
3282   tree ratio = NULL;
3283   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3284
3285   if (vect_debug_details (NULL))
3286     fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3287
3288
3289   /* Peel the loop if there are data refs with unknown alignment.
3290      Only one data ref with unknown store is allowed.  */
3291
3292   if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3293     vect_do_peeling_for_alignment (loop_vinfo, loops);
3294
3295   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3296      compile time constant), or it is a constant that doesn't divide by the
3297      vectorization factor, then an epilog loop needs to be created.
3298      We therefore duplicate the loop: the original loop will be vectorized,
3299      and will compute the first (n/VF) iterations. The second copy of the loop
3300      will remain scalar and will compute the remaining (n%VF) iterations.
3301      (VF is the vectorization factor).  */
3302
3303   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3304       || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3305           && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3306     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3307
3308   /* 1) Make sure the loop header has exactly two entries
3309      2) Make sure we have a preheader basic block.  */
3310
3311   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3312
3313   loop_split_edge_with (loop_preheader_edge (loop), NULL);
3314
3315
3316   /* FORNOW: the vectorizer supports only loops which body consist
3317      of one basic block (header + empty latch). When the vectorizer will
3318      support more involved loop forms, the order by which the BBs are
3319      traversed need to be reconsidered.  */
3320
3321   for (i = 0; i < nbbs; i++)
3322     {
3323       basic_block bb = bbs[i];
3324
3325       for (si = bsi_start (bb); !bsi_end_p (si);)
3326         {
3327           tree stmt = bsi_stmt (si);
3328           stmt_vec_info stmt_info;
3329           bool is_store;
3330
3331           if (vect_debug_details (NULL))
3332             {
3333               fprintf (dump_file, "------>vectorizing statement: ");
3334               print_generic_expr (dump_file, stmt, TDF_SLIM);
3335             }
3336           stmt_info = vinfo_for_stmt (stmt);
3337           gcc_assert (stmt_info);
3338           if (!STMT_VINFO_RELEVANT_P (stmt_info))
3339             {
3340               bsi_next (&si);
3341               continue;
3342             }
3343 #ifdef ENABLE_CHECKING
3344           /* FORNOW: Verify that all stmts operate on the same number of
3345                      units and no inner unrolling is necessary.  */
3346           gcc_assert
3347                 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3348                  == vectorization_factor);
3349 #endif
3350           /* -------- vectorize statement ------------ */
3351           if (vect_debug_details (NULL))
3352             fprintf (dump_file, "transform statement.");
3353
3354           is_store = vect_transform_stmt (stmt, &si);
3355           if (is_store)
3356             {
3357               /* free the attached stmt_vec_info and remove the stmt.  */
3358               stmt_ann_t ann = stmt_ann (stmt);
3359               free (stmt_info);
3360               set_stmt_info (ann, NULL);
3361               bsi_remove (&si);
3362               continue;
3363             }
3364
3365           bsi_next (&si);
3366         }                       /* stmts in BB */
3367     }                           /* BBs in loop */
3368
3369   vect_transform_loop_bound (loop_vinfo, ratio);
3370
3371   if (vect_debug_details (loop))
3372     fprintf (dump_file,"Success! loop vectorized.");
3373   if (vect_debug_stats (loop))
3374     fprintf (dump_file, "LOOP VECTORIZED.");
3375 }
3376
3377
3378 /* Function vect_is_simple_use.
3379
3380    Input:
3381    LOOP - the loop that is being vectorized.
3382    OPERAND - operand of a stmt in LOOP.
3383    DEF - the defining stmt in case OPERAND is an SSA_NAME.
3384
3385    Returns whether a stmt with OPERAND can be vectorized.
3386    Supportable operands are constants, loop invariants, and operands that are
3387    defined by the current iteration of the loop. Unsupportable operands are
3388    those that are defined by a previous iteration of the loop (as is the case
3389    in reduction/induction computations).  */
3390
3391 static bool
3392 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3393 {
3394   tree def_stmt;
3395   basic_block bb;
3396
3397   if (def)
3398     *def = NULL_TREE;
3399
3400   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3401     return true;
3402
3403   if (TREE_CODE (operand) != SSA_NAME)
3404     return false;
3405
3406   def_stmt = SSA_NAME_DEF_STMT (operand);
3407   if (def_stmt == NULL_TREE )
3408     {
3409       if (vect_debug_details (NULL))
3410         fprintf (dump_file, "no def_stmt.");
3411       return false;
3412     }
3413
3414   /* empty stmt is expected only in case of a function argument.
3415      (Otherwise - we expect a phi_node or a modify_expr).  */
3416   if (IS_EMPTY_STMT (def_stmt))
3417     {
3418       tree arg = TREE_OPERAND (def_stmt, 0);
3419       if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3420         return true;
3421       if (vect_debug_details (NULL))
3422         {
3423           fprintf (dump_file, "Unexpected empty stmt: ");
3424           print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3425         }
3426       return false;
3427     }
3428
3429   /* phi_node inside the loop indicates an induction/reduction pattern.
3430      This is not supported yet.  */
3431   bb = bb_for_stmt (def_stmt);
3432   if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3433     {
3434       if (vect_debug_details (NULL))
3435         fprintf (dump_file, "reduction/induction - unsupported.");
3436       return false; /* FORNOW: not supported yet.  */
3437     }
3438
3439   /* Expecting a modify_expr or a phi_node.  */
3440   if (TREE_CODE (def_stmt) == MODIFY_EXPR
3441       || TREE_CODE (def_stmt) == PHI_NODE)
3442     {
3443       if (def)
3444         *def = def_stmt;
3445       return true;
3446     }
3447
3448   return false;
3449 }
3450
3451
3452 /* Function vect_analyze_operations.
3453
3454    Scan the loop stmts and make sure they are all vectorizable.  */
3455
3456 static bool
3457 vect_analyze_operations (loop_vec_info loop_vinfo)
3458 {
3459   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3460   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3461   int nbbs = loop->num_nodes;
3462   block_stmt_iterator si;
3463   int vectorization_factor = 0;
3464   int i;
3465   bool ok;
3466   tree scalar_type;
3467
3468   if (vect_debug_details (NULL))
3469     fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3470
3471   for (i = 0; i < nbbs; i++)
3472     {
3473       basic_block bb = bbs[i];
3474
3475       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3476         {
3477           tree stmt = bsi_stmt (si);
3478           int nunits;
3479           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3480           tree vectype;
3481
3482           if (vect_debug_details (NULL))
3483             {
3484               fprintf (dump_file, "==> examining statement: ");
3485               print_generic_expr (dump_file, stmt, TDF_SLIM);
3486             }
3487
3488           gcc_assert (stmt_info);
3489
3490           /* skip stmts which do not need to be vectorized.
3491              this is expected to include:
3492              - the COND_EXPR which is the loop exit condition
3493              - any LABEL_EXPRs in the loop
3494              - computations that are used only for array indexing or loop
3495              control  */
3496
3497           if (!STMT_VINFO_RELEVANT_P (stmt_info))
3498             {
3499               if (vect_debug_details (NULL))
3500                 fprintf (dump_file, "irrelevant.");
3501               continue;
3502             }
3503
3504           if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3505             {
3506               if (vect_debug_stats (loop) || vect_debug_details (loop))
3507                 {
3508                   fprintf (dump_file, "not vectorized: vector stmt in loop:");
3509                   print_generic_expr (dump_file, stmt, TDF_SLIM);
3510                 }
3511               return false;
3512             }
3513
3514           if (STMT_VINFO_DATA_REF (stmt_info))
3515             scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3516           else if (TREE_CODE (stmt) == MODIFY_EXPR)
3517             scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3518           else
3519             scalar_type = TREE_TYPE (stmt);
3520
3521           if (vect_debug_details (NULL))
3522             {
3523               fprintf (dump_file, "get vectype for scalar type:  ");
3524               print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3525             }
3526
3527           vectype = get_vectype_for_scalar_type (scalar_type);
3528           if (!vectype)
3529             {
3530               if (vect_debug_stats (loop) || vect_debug_details (loop))
3531                 {
3532                   fprintf (dump_file, "not vectorized: unsupported data-type ");
3533                   print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3534                 }
3535               return false;
3536             }
3537
3538           if (vect_debug_details (NULL))
3539             {
3540               fprintf (dump_file, "vectype: ");
3541               print_generic_expr (dump_file, vectype, TDF_SLIM);
3542             }
3543           STMT_VINFO_VECTYPE (stmt_info) = vectype;
3544
3545           ok = (vectorizable_operation (stmt, NULL, NULL)
3546                 || vectorizable_assignment (stmt, NULL, NULL)
3547                 || vectorizable_load (stmt, NULL, NULL)
3548                 || vectorizable_store (stmt, NULL, NULL));
3549
3550           if (!ok)
3551             {
3552               if (vect_debug_stats (loop) || vect_debug_details (loop))
3553                 {
3554                   fprintf (dump_file, "not vectorized: stmt not supported: ");
3555                   print_generic_expr (dump_file, stmt, TDF_SLIM);
3556                 }
3557               return false;
3558             }
3559
3560           nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3561           if (vect_debug_details (NULL))
3562             fprintf (dump_file, "nunits = %d", nunits);
3563
3564           if (vectorization_factor)
3565             {
3566               /* FORNOW: don't allow mixed units.
3567                  This restriction will be relaxed in the future.  */
3568               if (nunits != vectorization_factor)
3569                 {
3570                   if (vect_debug_stats (loop) || vect_debug_details (loop))
3571                     fprintf (dump_file, "not vectorized: mixed data-types");
3572                   return false;
3573                 }
3574             }
3575           else
3576             vectorization_factor = nunits;
3577
3578 #ifdef ENABLE_CHECKING
3579           gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3580                         * vectorization_factor == UNITS_PER_SIMD_WORD);
3581 #endif
3582         }
3583     }
3584
3585   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
3586
3587   if (vectorization_factor <= 1)
3588     {
3589       if (vect_debug_stats (loop) || vect_debug_details (loop))
3590         fprintf (dump_file, "not vectorized: unsupported data-type");
3591       return false;
3592     }
3593   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3594
3595   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3596     fprintf (dump_file,
3597         "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3598         vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3599
3600   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3601       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3602     {
3603       if (vect_debug_stats (loop) || vect_debug_details (loop))
3604         fprintf (dump_file, "epilog loop required.");
3605       if (!vect_can_advance_ivs_p (loop))
3606         {
3607           if (vect_debug_stats (loop) || vect_debug_details (loop))
3608             fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3609           return false;
3610         }
3611       if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3612         {
3613           if (vect_debug_stats (loop) || vect_debug_details (loop))
3614             fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3615           return false;
3616         }
3617     }
3618
3619   return true;
3620 }
3621
3622
3623 /* Function exist_non_indexing_operands_for_use_p
3624
3625    USE is one of the uses attached to STMT. Check if USE is
3626    used in STMT for anything other than indexing an array.  */
3627
3628 static bool
3629 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3630 {
3631   tree operand;
3632   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3633
3634   /* USE corresponds to some operand in STMT. If there is no data
3635      reference in STMT, then any operand that corresponds to USE
3636      is not indexing an array.  */
3637   if (!STMT_VINFO_DATA_REF (stmt_info))
3638     return true;
3639
3640   /* STMT has a data_ref. FORNOW this means that its of one of
3641      the following forms:
3642      -1- ARRAY_REF = var
3643      -2- var = ARRAY_REF
3644      (This should have been verified in analyze_data_refs).
3645
3646      'var' in the second case corresponds to a def, not a use,
3647      so USE cannot correspond to any operands that are not used
3648      for array indexing.
3649
3650      Therefore, all we need to check is if STMT falls into the
3651      first case, and whether var corresponds to USE.  */
3652
3653   if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3654     return false;
3655
3656   operand = TREE_OPERAND (stmt, 1);
3657
3658   if (TREE_CODE (operand) != SSA_NAME)
3659     return false;
3660
3661   if (operand == use)
3662     return true;
3663
3664   return false;
3665 }
3666
3667
3668 /* Function vect_is_simple_iv_evolution.
3669
3670    FORNOW: A simple evolution of an induction variables in the loop is
3671    considered a polynomial evolution with constant step.  */
3672
3673 static bool
3674 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3675                                 tree * step, bool strict)
3676 {
3677   tree init_expr;
3678   tree step_expr;
3679
3680   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3681
3682   /* When there is no evolution in this loop, the evolution function
3683      is not "simple".  */
3684   if (evolution_part == NULL_TREE)
3685     return false;
3686
3687   /* When the evolution is a polynomial of degree >= 2
3688      the evolution function is not "simple".  */
3689   if (tree_is_chrec (evolution_part))
3690     return false;
3691
3692   step_expr = evolution_part;
3693   init_expr = unshare_expr (initial_condition (access_fn));
3694
3695   if (vect_debug_details (NULL))
3696     {
3697       fprintf (dump_file, "step: ");
3698       print_generic_expr (dump_file, step_expr, TDF_SLIM);
3699       fprintf (dump_file, ",  init: ");
3700       print_generic_expr (dump_file, init_expr, TDF_SLIM);
3701     }
3702
3703   *init = init_expr;
3704   *step = step_expr;
3705
3706   if (TREE_CODE (step_expr) != INTEGER_CST)
3707     {
3708       if (vect_debug_details (NULL))
3709         fprintf (dump_file, "step unknown.");
3710       return false;
3711     }
3712
3713   if (strict)
3714     if (!integer_onep (step_expr))
3715       {
3716         if (vect_debug_details (NULL))
3717           print_generic_expr (dump_file, step_expr, TDF_SLIM);
3718         return false;
3719       }
3720
3721   return true;
3722 }
3723
3724
3725 /* Function vect_analyze_scalar_cycles.
3726
3727    Examine the cross iteration def-use cycles of scalar variables, by
3728    analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3729    cycles that they represent do not impede vectorization.
3730
3731    FORNOW: Reduction as in the following loop, is not supported yet:
3732               loop1:
3733               for (i=0; i<N; i++)
3734                  sum += a[i];
3735            The cross-iteration cycle corresponding to variable 'sum' will be
3736            considered too complicated and will impede vectorization.
3737
3738    FORNOW: Induction as in the following loop, is not supported yet:
3739               loop2:
3740               for (i=0; i<N; i++)
3741                  a[i] = i;
3742
3743            However, the following loop *is* vectorizable:
3744               loop3:
3745               for (i=0; i<N; i++)
3746                  a[i] = b[i];
3747
3748            In both loops there exists a def-use cycle for the variable i:
3749               loop: i_2 = PHI (i_0, i_1)
3750                     a[i_2] = ...;
3751                     i_1 = i_2 + 1;
3752                     GOTO loop;
3753
3754            The evolution of the above cycle is considered simple enough,
3755            however, we also check that the cycle does not need to be
3756            vectorized, i.e - we check that the variable that this cycle
3757            defines is only used for array indexing or in stmts that do not
3758            need to be vectorized. This is not the case in loop2, but it
3759            *is* the case in loop3.  */
3760
3761 static bool
3762 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3763 {
3764   tree phi;
3765   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3766   basic_block bb = loop->header;
3767   tree dummy;
3768
3769   if (vect_debug_details (NULL))
3770     fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3771
3772   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3773     {
3774       tree access_fn = NULL;
3775
3776       if (vect_debug_details (NULL))
3777         {
3778           fprintf (dump_file, "Analyze phi: ");
3779           print_generic_expr (dump_file, phi, TDF_SLIM);
3780         }
3781
3782       /* Skip virtual phi's. The data dependences that are associated with
3783          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
3784
3785       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3786         {
3787           if (vect_debug_details (NULL))
3788             fprintf (dump_file, "virtual phi. skip.");
3789           continue;
3790         }
3791
3792       /* Analyze the evolution function.  */
3793
3794       /* FORNOW: The only scalar cross-iteration cycles that we allow are
3795          those of loop induction variables; This property is verified here.
3796
3797          Furthermore, if that induction variable is used in an operation
3798          that needs to be vectorized (i.e, is not solely used to index
3799          arrays and check the exit condition) - we do not support its
3800          vectorization yet. This property is verified in vect_is_simple_use,
3801          during vect_analyze_operations.  */
3802
3803       access_fn = /* instantiate_parameters
3804                      (loop,*/
3805          analyze_scalar_evolution (loop, PHI_RESULT (phi));
3806
3807       if (!access_fn)
3808         {
3809           if (vect_debug_stats (loop) || vect_debug_details (loop))
3810             fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3811           return false;
3812         }
3813
3814       if (vect_debug_details (NULL))
3815         {
3816            fprintf (dump_file, "Access function of PHI: ");
3817            print_generic_expr (dump_file, access_fn, TDF_SLIM);
3818         }
3819
3820       if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3821                                         &dummy, false))
3822         {
3823           if (vect_debug_stats (loop) || vect_debug_details (loop))
3824             fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3825           return false;
3826         }
3827     }
3828
3829   return true;
3830 }
3831
3832
3833 /* Function vect_analyze_data_ref_dependence.
3834
3835    Return TRUE if there (might) exist a dependence between a memory-reference
3836    DRA and a memory-reference DRB.  */
3837
3838 static bool
3839 vect_analyze_data_ref_dependence (struct data_reference *dra,
3840                                   struct data_reference *drb,
3841                                   struct loop *loop)
3842 {
3843   bool differ_p;
3844   struct data_dependence_relation *ddr;
3845
3846   if (!array_base_name_differ_p (dra, drb, &differ_p))
3847     {
3848       if (vect_debug_stats (loop) || vect_debug_details (loop))
3849         {
3850           fprintf (dump_file,
3851                 "not vectorized: can't determine dependence between: ");
3852           print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3853           fprintf (dump_file, " and ");
3854           print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3855         }
3856       return true;
3857     }
3858
3859   if (differ_p)
3860     return false;
3861
3862   ddr = initialize_data_dependence_relation (dra, drb);
3863   compute_affine_dependence (ddr);
3864
3865   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
3866     return false;
3867
3868   if (vect_debug_stats (loop) || vect_debug_details (loop))
3869     {
3870       fprintf (dump_file,
3871         "not vectorized: possible dependence between data-refs ");
3872       print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3873       fprintf (dump_file, " and ");
3874       print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3875     }
3876
3877   return true;
3878 }
3879
3880
3881 /* Function vect_analyze_data_ref_dependences.
3882
3883    Examine all the data references in the loop, and make sure there do not
3884    exist any data dependences between them.
3885
3886    TODO: dependences which distance is greater than the vectorization factor
3887          can be ignored.  */
3888
3889 static bool
3890 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
3891 {
3892   unsigned int i, j;
3893   varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3894   varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3895   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3896
3897   /* Examine store-store (output) dependences.  */
3898
3899   if (vect_debug_details (NULL))
3900     fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
3901
3902   if (vect_debug_details (NULL))
3903     fprintf (dump_file, "compare all store-store pairs.");
3904
3905   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
3906     {
3907       for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3908         {
3909           struct data_reference *dra =
3910             VARRAY_GENERIC_PTR (loop_write_refs, i);
3911           struct data_reference *drb =
3912             VARRAY_GENERIC_PTR (loop_write_refs, j);
3913           if (vect_analyze_data_ref_dependence (dra, drb, loop))
3914             return false;
3915         }
3916     }
3917
3918   /* Examine load-store (true/anti) dependences.  */
3919
3920   if (vect_debug_details (NULL))
3921     fprintf (dump_file, "compare all load-store pairs.");
3922
3923   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
3924     {
3925       for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3926         {
3927           struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
3928           struct data_reference *drb =
3929             VARRAY_GENERIC_PTR (loop_write_refs, j);
3930           if (vect_analyze_data_ref_dependence (dra, drb, loop))
3931             return false;
3932         }
3933     }
3934
3935   return true;
3936 }
3937
3938
3939 /* Function vect_get_first_index.
3940
3941    REF is a data reference.
3942    If it is an ARRAY_REF: if its lower bound is simple enough,
3943    put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
3944    If it is not an ARRAY_REF: REF has no "first index";
3945    ARRAY_FIRST_INDEX in zero, and the function returns TRUE.  */
3946
3947 static bool
3948 vect_get_first_index (tree ref, tree *array_first_index)
3949 {
3950   tree array_start;
3951
3952   if (TREE_CODE (ref) != ARRAY_REF)
3953     *array_first_index = size_zero_node;
3954   else
3955     {
3956       array_start = array_ref_low_bound (ref);
3957       if (!host_integerp (array_start,0))
3958         {
3959           if (vect_debug_details (NULL))
3960             {
3961               fprintf (dump_file, "array min val not simple integer cst.");
3962               print_generic_expr (dump_file, array_start, TDF_DETAILS);
3963             }
3964           return false;
3965         }
3966       *array_first_index = array_start;
3967     }
3968
3969   return true;
3970 }
3971
3972
3973 /* Function vect_compute_array_base_alignment.
3974    A utility function of vect_compute_array_ref_alignment.
3975
3976    Compute the misalignment of ARRAY in bits.
3977
3978    Input:
3979    ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
3980    VECTYPE - we are interested in the misalignment modulo the size of vectype.
3981              if NULL: don't compute misalignment, just return the base of ARRAY.
3982    PREV_DIMENSIONS - initialized to one.
3983    MISALIGNMENT - the computed misalignment in bits.
3984
3985    Output:
3986    If VECTYPE is not NULL:
3987      Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
3988      the base of the array, and put the computed misalignment in MISALIGNMENT.
3989    If VECTYPE is NULL:
3990      Return the base of the array.
3991
3992    For a[idx_N]...[idx_2][idx_1][idx_0], the address of
3993    a[idx_N]...[idx_2][idx_1] is
3994    {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
3995     ... + idx_N * dim_0 * ... * dim_N-1}.
3996    (The misalignment of &a is not checked here).
3997    Note, that every term contains dim_0, therefore, if dim_0 is a
3998    multiple of NUNITS, the whole sum is a multiple of NUNITS.
3999    Otherwise, if idx_1 is constant, and dim_1 is a multiple of
4000    NUINTS, we can say that the misalignment of the sum is equal to
4001    the misalignment of {idx_1 * dim_0}.  If idx_1 is not constant,
4002    we can't determine this array misalignment, and we return
4003    false.
4004    We proceed recursively in this manner, accumulating total misalignment
4005    and the multiplication of previous dimensions for correct misalignment
4006    calculation.  */
4007
4008 static tree
4009 vect_compute_array_base_alignment (tree array,
4010                                    tree vectype,
4011                                    tree *prev_dimensions,
4012                                    tree *misalignment)
4013 {
4014   tree index;
4015   tree domain;
4016   tree dimension_size;
4017   tree mis;
4018   tree bits_per_vectype;
4019   tree bits_per_vectype_unit;
4020
4021   /* The 'stop condition' of the recursion.  */
4022   if (TREE_CODE (array) != ARRAY_REF)
4023     return array;
4024
4025   if (!vectype)
4026     /* Just get the base decl.  */
4027     return vect_compute_array_base_alignment
4028                 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4029
4030   if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
4031       !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
4032     return NULL_TREE;
4033
4034   domain = TYPE_DOMAIN (TREE_TYPE (array));
4035   dimension_size =
4036         int_const_binop (PLUS_EXPR,
4037                 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
4038                                              TYPE_MIN_VALUE (domain), 1),
4039                 size_one_node, 1);
4040
4041   /* Check if the dimension size is a multiple of NUNITS, the remaining sum
4042      is a multiple of NUNITS:
4043
4044      dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
4045    */
4046   mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
4047          build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
4048   if (integer_zerop (mis))
4049     /* This array is aligned. Continue just in order to get the base decl.  */
4050     return vect_compute_array_base_alignment
4051                 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4052
4053   index = TREE_OPERAND (array, 1);
4054   if (!host_integerp (index, 1))
4055     /* The current index is not constant.  */
4056     return NULL_TREE;
4057
4058   index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
4059
4060   bits_per_vectype = fold_convert (unsigned_type_node,
4061     build_int_cst (NULL_TREE, BITS_PER_UNIT *
4062                  GET_MODE_SIZE (TYPE_MODE (vectype))));
4063   bits_per_vectype_unit =  fold_convert (unsigned_type_node,
4064     build_int_cst (NULL_TREE, BITS_PER_UNIT *
4065                  GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
4066
4067   /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
4068      earlier:
4069
4070      *misalignment =
4071        (*misalignment + index_val * dimension_size * *prev_dimensions)
4072                                                         % vectype_nunits;
4073    */
4074
4075   mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
4076   mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
4077   mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
4078   mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
4079   *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
4080
4081
4082   *prev_dimensions = int_const_binop (MULT_EXPR,
4083                                 *prev_dimensions, dimension_size, 1);
4084
4085   return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
4086                                             prev_dimensions,
4087                                             misalignment);
4088 }
4089
4090
4091 /* Function vect_compute_data_ref_alignment
4092
4093    Compute the misalignment of the data reference DR.
4094
4095    Output:
4096    1. If during the misalignment computation it is found that the data reference
4097       cannot be vectorized then false is returned.
4098    2. DR_MISALIGNMENT (DR) is defined.
4099
4100    FOR NOW: No analysis is actually performed. Misalignment is calculated
4101    only for trivial cases. TODO.  */
4102
4103 static bool
4104 vect_compute_data_ref_alignment (struct data_reference *dr,
4105                                  loop_vec_info loop_vinfo)
4106 {
4107   tree stmt = DR_STMT (dr);
4108   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4109   tree ref = DR_REF (dr);
4110   tree vectype;
4111   tree scalar_type;
4112   tree offset = size_zero_node;
4113   tree base, bit_offset, alignment;
4114   tree unit_bits = fold_convert (unsigned_type_node,
4115                                  build_int_cst (NULL_TREE, BITS_PER_UNIT));
4116   tree dr_base;
4117   bool base_aligned_p;
4118
4119   if (vect_debug_details (NULL))
4120     fprintf (dump_file, "vect_compute_data_ref_alignment:");
4121
4122   /* Initialize misalignment to unknown.  */
4123   DR_MISALIGNMENT (dr) = -1;
4124
4125   scalar_type = TREE_TYPE (ref);
4126   vectype = get_vectype_for_scalar_type (scalar_type);
4127   if (!vectype)
4128     {
4129       if (vect_debug_details (NULL))
4130         {
4131           fprintf (dump_file, "no vectype for stmt: ");
4132           print_generic_expr (dump_file, stmt, TDF_SLIM);
4133           fprintf (dump_file, " scalar_type: ");
4134           print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4135         }
4136       /* It is not possible to vectorize this data reference.  */
4137       return false;
4138     }
4139   STMT_VINFO_VECTYPE (stmt_info) = vectype;
4140   gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
4141
4142   if (TREE_CODE (ref) == ARRAY_REF)
4143     dr_base = ref;
4144   else
4145     dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4146
4147   base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
4148                           loop_vinfo, &bit_offset, &base_aligned_p);
4149   if (!base)
4150     {
4151       if (vect_debug_details (NULL))
4152         {
4153           fprintf (dump_file, "Unknown alignment for access: ");
4154           print_generic_expr (dump_file,
4155                               STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
4156         }
4157       return true;
4158     }
4159
4160   if (!base_aligned_p)
4161     {
4162       if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4163         {
4164           if (vect_debug_details (NULL))
4165             {
4166               fprintf (dump_file, "can't force alignment of ref: ");
4167               print_generic_expr (dump_file, ref, TDF_SLIM);
4168             }
4169           return true;
4170         }
4171
4172       /* Force the alignment of the decl.
4173          NOTE: This is the only change to the code we make during
4174          the analysis phase, before deciding to vectorize the loop.  */
4175       if (vect_debug_details (NULL))
4176         fprintf (dump_file, "force alignment");
4177       DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4178       DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype);
4179     }
4180
4181   /* At this point we assume that the base is aligned, and the offset from it
4182      (including index, if relevant) has been computed and is in BIT_OFFSET.  */
4183   gcc_assert (base_aligned_p
4184               || (TREE_CODE (base) == VAR_DECL
4185                   && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4186
4187   /* Convert into bytes.  */
4188   offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
4189   /* Check that there is no remainder in bits.  */
4190   bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
4191   if (!integer_zerop (bit_offset))
4192     {
4193       if (vect_debug_details (NULL))
4194         {
4195           fprintf (dump_file, "bit offset alignment: ");
4196           print_generic_expr (dump_file, bit_offset, TDF_SLIM);
4197         }
4198       return false;
4199     }
4200
4201   /* Alignment required, in bytes:  */
4202   alignment = fold_convert (unsigned_type_node,
4203             build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
4204
4205   /* Modulo alignment.  */
4206   offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
4207   if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
4208     {
4209       if (vect_debug_details (NULL))
4210         fprintf (dump_file, "unexpected misalign value");
4211       return false;
4212     }
4213
4214   DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
4215
4216   if (vect_debug_details (NULL))
4217     fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4218
4219   return true;
4220 }
4221
4222
4223 /* Function vect_compute_array_ref_alignment
4224
4225    Compute the alignment of an array-ref.
4226    The alignment we compute here is relative to
4227    TYPE_ALIGN(VECTYPE) boundary.
4228
4229    Output:
4230    OFFSET - the alignment in bits
4231    Return value - the base of the array-ref. E.g,
4232                   if the array-ref is a.b[k].c[i][j] the returned
4233                   base is a.b[k].c
4234 */
4235
4236 static tree
4237 vect_compute_array_ref_alignment (struct data_reference *dr,
4238                                   loop_vec_info loop_vinfo,
4239                                   tree vectype,
4240                                   tree *offset)
4241 {
4242   tree array_first_index = size_zero_node;
4243   tree init;
4244   tree ref = DR_REF (dr);
4245   tree scalar_type = TREE_TYPE (ref);
4246   tree oprnd0 = TREE_OPERAND (ref, 0);
4247   tree dims = size_one_node;
4248   tree misalign = size_zero_node;
4249   tree next_ref, this_offset = size_zero_node;
4250   tree nunits;
4251   tree nbits;
4252
4253   if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
4254     /* The reference is an array without its last index.  */
4255     next_ref = vect_compute_array_base_alignment (ref, vectype, &dims,
4256                                                   &misalign);
4257   else
4258     next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims,
4259                                                   &misalign);
4260   if (!vectype)
4261     /* Alignment is not requested. Just return the base.  */
4262     return next_ref;
4263
4264   /* Compute alignment.  */
4265   if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
4266     return NULL_TREE;
4267   this_offset = misalign;
4268
4269   /* Check the first index accessed.  */
4270   if (!vect_get_first_index (ref, &array_first_index))
4271     {
4272       if (vect_debug_details (NULL))
4273         fprintf (dump_file, "no first_index for array.");
4274       return NULL_TREE;
4275     }
4276
4277   /* Check the index of the array_ref.  */
4278   init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
4279                                         LOOP_VINFO_LOOP (loop_vinfo)->num);
4280
4281   /* FORNOW: In order to simplify the handling of alignment, we make sure
4282      that the first location at which the array is accessed ('init') is on an
4283      'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
4284      This is too conservative, since we require that
4285      both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
4286      NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
4287      This should be relaxed in the future.  */
4288
4289   if (!init || !host_integerp (init, 0))
4290     {
4291       if (vect_debug_details (NULL))
4292         fprintf (dump_file, "non constant init. ");
4293       return NULL_TREE;
4294     }
4295
4296   /* bytes per scalar element: */
4297   nunits = fold_convert (unsigned_type_node,
4298         build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
4299   nbits = int_const_binop (MULT_EXPR, nunits,
4300                            build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
4301
4302   /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
4303   misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
4304   misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
4305   misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
4306
4307   /* TODO: allow negative misalign values.  */
4308   if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
4309     {
4310       if (vect_debug_details (NULL))
4311         fprintf (dump_file, "unexpected misalign value");
4312       return NULL_TREE;
4313     }
4314   *offset = misalign;
4315   return next_ref;
4316 }
4317
4318
4319 /* Function vect_compute_data_refs_alignment
4320
4321    Compute the misalignment of data references in the loop.
4322    This pass may take place at function granularity instead of at loop
4323    granularity.
4324
4325    FOR NOW: No analysis is actually performed. Misalignment is calculated
4326    only for trivial cases. TODO.  */
4327
4328 static bool
4329 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4330 {
4331   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4332   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4333   unsigned int i;
4334
4335   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4336     {
4337       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4338       if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4339         return false;
4340     }
4341
4342   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4343     {
4344       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4345       if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4346         return false;
4347     }
4348
4349   return true;
4350 }
4351
4352
4353 /* Function vect_enhance_data_refs_alignment
4354
4355    This pass will use loop versioning and loop peeling in order to enhance
4356    the alignment of data references in the loop.
4357
4358    FOR NOW: we assume that whatever versioning/peeling takes place, only the
4359    original loop is to be vectorized; Any other loops that are created by
4360    the transformations performed in this pass - are not supposed to be
4361    vectorized. This restriction will be relaxed.  */
4362
4363 static void
4364 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4365 {
4366   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4367   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4368   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4369   unsigned int i;
4370
4371   /*
4372      This pass will require a cost model to guide it whether to apply peeling
4373      or versioning or a combination of the two. For example, the scheme that
4374      intel uses when given a loop with several memory accesses, is as follows:
4375      choose one memory access ('p') which alignment you want to force by doing
4376      peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4377      other accesses are not necessarily aligned, or (2) use loop versioning to
4378      generate one loop in which all accesses are aligned, and another loop in
4379      which only 'p' is necessarily aligned.
4380
4381      ("Automatic Intra-Register Vectorization for the Intel Architecture",
4382       Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4383       Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4384
4385      Devising a cost model is the most critical aspect of this work. It will
4386      guide us on which access to peel for, whether to use loop versioning, how
4387      many versions to create, etc. The cost model will probably consist of
4388      generic considerations as well as target specific considerations (on
4389      powerpc for example, misaligned stores are more painful than misaligned
4390      loads).
4391
4392      Here is the general steps involved in alignment enhancements:
4393
4394      -- original loop, before alignment analysis:
4395         for (i=0; i<N; i++){
4396           x = q[i];                     # DR_MISALIGNMENT(q) = unknown
4397           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
4398         }
4399
4400      -- After vect_compute_data_refs_alignment:
4401         for (i=0; i<N; i++){
4402           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4403           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
4404         }
4405
4406      -- Possibility 1: we do loop versioning:
4407      if (p is aligned) {
4408         for (i=0; i<N; i++){    # loop 1A
4409           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4410           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
4411         }
4412      }
4413      else {
4414         for (i=0; i<N; i++){    # loop 1B
4415           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4416           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
4417         }
4418      }
4419
4420      -- Possibility 2: we do loop peeling:
4421      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
4422         x = q[i];
4423         p[i] = y;
4424      }
4425      for (i = 3; i < N; i++){   # loop 2A
4426         x = q[i];                       # DR_MISALIGNMENT(q) = 0
4427         p[i] = y;                       # DR_MISALIGNMENT(p) = unknown
4428      }
4429
4430      -- Possibility 3: combination of loop peeling and versioning:
4431      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
4432         x = q[i];
4433         p[i] = y;
4434      }
4435      if (p is aligned) {
4436         for (i = 3; i<N; i++){  # loop 3A
4437           x = q[i];                     # DR_MISALIGNMENT(q) = 0
4438           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
4439         }
4440      }
4441      else {
4442         for (i = 3; i<N; i++){  # loop 3B
4443           x = q[i];                     # DR_MISALIGNMENT(q) = 0
4444           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
4445         }
4446      }
4447
4448      These loops are later passed to loop_transform to be vectorized. The
4449      vectorizer will use the alignment information to guide the transformation
4450      (whether to generate regular loads/stores, or with special handling for
4451      misalignment).
4452    */
4453
4454   /* (1) Peeling to force alignment.  */
4455
4456   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4457      Considerations:
4458      + How many accesses will become aligned due to the peeling
4459      - How many accesses will become unaligned due to the peeling,
4460        and the cost of misaligned accesses.
4461      - The cost of peeling (the extra runtime checks, the increase
4462        in code size).
4463
4464      The scheme we use FORNOW: peel to force the alignment of the first
4465      misaligned store in the loop.
4466      Rationale: misaligned stores are not yet supported.
4467
4468      TODO: Use a better cost model.  */
4469
4470   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4471     {
4472       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4473       if (!aligned_access_p (dr))
4474         {
4475           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4476           LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4477           break;
4478         }
4479     }
4480
4481   if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4482     {
4483       if (vect_debug_details (loop))
4484         fprintf (dump_file, "Peeling for alignment will not be applied.");
4485       return;
4486     }
4487   else
4488     if (vect_debug_details (loop))
4489       fprintf (dump_file, "Peeling for alignment will be applied.");
4490
4491
4492   /* (1.2) Update the alignment info according to the peeling factor.
4493            If the misalignment of the DR we peel for is M, then the
4494            peeling factor is VF - M, and the misalignment of each access DR_i
4495            in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4496            If the misalignment of the DR we peel for is unknown, then the
4497            misalignment of each access DR_i in the loop is also unknown.
4498
4499            FORNOW: set the misalignment of the accesses to unknown even
4500                    if the peeling factor is known at compile time.
4501
4502            TODO: - if the peeling factor is known at compile time, use that
4503                    when updating the misalignment info of the loop DRs.
4504                  - consider accesses that are known to have the same
4505                    alignment, even if that alignment is unknown.  */
4506
4507   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4508     {
4509       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4510       if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4511         DR_MISALIGNMENT (dr) = 0;
4512       else
4513         DR_MISALIGNMENT (dr) = -1;
4514     }
4515   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4516     {
4517       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4518       if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4519         DR_MISALIGNMENT (dr) = 0;
4520       else
4521         DR_MISALIGNMENT (dr) = -1;
4522     }
4523 }
4524
4525
4526 /* Function vect_analyze_data_refs_alignment
4527
4528    Analyze the alignment of the data-references in the loop.
4529    FOR NOW: Until support for misliagned accesses is in place, only if all
4530    accesses are aligned can the loop be vectorized. This restriction will be
4531    relaxed.  */
4532
4533 static bool
4534 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4535 {
4536   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4537   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4538   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4539   enum dr_alignment_support supportable_dr_alignment;
4540   unsigned int i;
4541
4542   if (vect_debug_details (NULL))
4543     fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4544
4545
4546   /* This pass may take place at function granularity instead of at loop
4547      granularity.  */
4548
4549   if (!vect_compute_data_refs_alignment (loop_vinfo))
4550     {
4551       if (vect_debug_details (loop) || vect_debug_stats (loop))
4552         fprintf (dump_file,
4553                  "not vectorized: can't calculate alignment for data ref.");
4554       return false;
4555     }
4556
4557
4558   /* This pass will decide on using loop versioning and/or loop peeling in
4559      order to enhance the alignment of data references in the loop.  */
4560
4561   vect_enhance_data_refs_alignment (loop_vinfo);
4562
4563
4564   /* Finally, check that all the data references in the loop can be
4565      handled with respect to their alignment.  */
4566
4567   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4568     {
4569       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4570       supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4571       if (!supportable_dr_alignment)
4572         {
4573           if (vect_debug_details (loop) || vect_debug_stats (loop))
4574             fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4575           return false;
4576         }
4577     }
4578   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4579     {
4580       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4581       supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4582       if (!supportable_dr_alignment)
4583         {
4584           if (vect_debug_details (loop) || vect_debug_stats (loop))
4585             fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4586           return false;
4587         }
4588     }
4589
4590   return true;
4591 }
4592
4593
4594 /* Function vect_analyze_data_ref_access.
4595
4596    Analyze the access pattern of the data-reference DR. For now, a data access
4597    has to consecutive and aligned to be considered vectorizable.  */
4598
4599 static bool
4600 vect_analyze_data_ref_access (struct data_reference *dr)
4601 {
4602   varray_type access_fns = DR_ACCESS_FNS (dr);
4603   tree access_fn;
4604   tree init, step;
4605   unsigned int dimensions, i;
4606
4607   /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
4608      i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
4609      access is contiguous).  */
4610   dimensions = VARRAY_ACTIVE_SIZE (access_fns);
4611
4612   for (i = 1; i < dimensions; i++) /* Not including the last dimension.  */
4613     {
4614       access_fn = DR_ACCESS_FN (dr, i);
4615
4616       if (evolution_part_in_loop_num (access_fn,
4617                                       loop_containing_stmt (DR_STMT (dr))->num))
4618         {
4619           /* Evolution part is not NULL in this loop (it is neither constant
4620              nor invariant).  */
4621           if (vect_debug_details (NULL))
4622             {
4623               fprintf (dump_file,
4624                        "not vectorized: complicated multidim. array access.");
4625               print_generic_expr (dump_file, access_fn, TDF_SLIM);
4626             }
4627           return false;
4628         }
4629     }
4630
4631   access_fn = DR_ACCESS_FN (dr, 0); /*  The last dimension access function.  */
4632   if (!evolution_function_is_constant_p (access_fn)
4633       && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
4634                                        access_fn, &init, &step, true))
4635     {
4636       if (vect_debug_details (NULL))
4637         {
4638           fprintf (dump_file, "not vectorized: complicated access function.");
4639           print_generic_expr (dump_file, access_fn, TDF_SLIM);
4640         }
4641       return false;
4642     }
4643
4644   return true;
4645 }
4646
4647
4648 /* Function vect_analyze_data_ref_accesses.
4649
4650    Analyze the access pattern of all the data references in the loop.
4651
4652    FORNOW: the only access pattern that is considered vectorizable is a
4653            simple step 1 (consecutive) access.
4654
4655    FORNOW: handle only arrays and pointer accesses.  */
4656
4657 static bool
4658 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4659 {
4660   unsigned int i;
4661   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4662   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4663
4664   if (vect_debug_details (NULL))
4665     fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4666
4667   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4668     {
4669       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4670       bool ok = vect_analyze_data_ref_access (dr);
4671       if (!ok)
4672         {
4673           if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4674               || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4675             fprintf (dump_file, "not vectorized: complicated access pattern.");
4676           return false;
4677         }
4678     }
4679
4680   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4681     {
4682       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4683       bool ok = vect_analyze_data_ref_access (dr);
4684       if (!ok)
4685         {
4686           if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4687               || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4688             fprintf (dump_file, "not vectorized: complicated access pattern.");
4689           return false;
4690         }
4691     }
4692
4693   return true;
4694 }
4695
4696
4697 /* Function vect_analyze_pointer_ref_access.
4698
4699    Input:
4700    STMT - a stmt that contains a data-ref
4701    MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4702
4703    If the data-ref access is vectorizable, return a data_reference structure
4704    that represents it (DR). Otherwise - return NULL.  */
4705
4706 static struct data_reference *
4707 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4708 {
4709   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4710   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4711   tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4712   tree init, step;
4713   int step_val;
4714   tree reftype, innertype;
4715   enum machine_mode innermode;
4716   tree indx_access_fn;
4717   int loopnum = loop->num;
4718   struct data_reference *dr;
4719
4720   if (!access_fn)
4721     {
4722       if (vect_debug_stats (loop) || vect_debug_details (loop))
4723         fprintf (dump_file, "not vectorized: complicated pointer access.");
4724       return NULL;
4725     }
4726
4727   if (vect_debug_details (NULL))
4728     {
4729       fprintf (dump_file, "Access function of ptr: ");
4730       print_generic_expr (dump_file, access_fn, TDF_SLIM);
4731     }
4732
4733   if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4734     {
4735       if (vect_debug_stats (loop) || vect_debug_details (loop))
4736         fprintf (dump_file, "not vectorized: pointer access is not simple.");
4737       return NULL;
4738     }
4739
4740   STRIP_NOPS (init);
4741
4742   if (!host_integerp (step,0))
4743     {
4744       if (vect_debug_stats (loop) || vect_debug_details (loop))
4745         fprintf (dump_file,
4746                 "not vectorized: non constant step for pointer access.");
4747       return NULL;
4748     }
4749
4750   step_val = TREE_INT_CST_LOW (step);
4751
4752   reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4753   if (TREE_CODE (reftype) != POINTER_TYPE)
4754     {
4755       if (vect_debug_stats (loop) || vect_debug_details (loop))
4756         fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4757       return NULL;
4758     }
4759
4760   reftype = TREE_TYPE (init);
4761   if (TREE_CODE (reftype) != POINTER_TYPE)
4762     {
4763       if (vect_debug_stats (loop) || vect_debug_details (loop))
4764         fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4765       return NULL;
4766     }
4767
4768   innertype = TREE_TYPE (reftype);
4769   innermode = TYPE_MODE (innertype);
4770   if (GET_MODE_SIZE (innermode) != step_val)
4771     {
4772       /* FORNOW: support only consecutive access */
4773       if (vect_debug_stats (loop) || vect_debug_details (loop))
4774         fprintf (dump_file, "not vectorized: non consecutive access.");
4775       return NULL;
4776     }
4777
4778   indx_access_fn =
4779         build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4780   if (vect_debug_details (NULL))
4781     {
4782       fprintf (dump_file, "Access function of ptr indx: ");
4783       print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4784     }
4785   dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4786   return dr;
4787 }
4788
4789
4790 /* Function vect_get_symbl_and_dr.
4791
4792    The function returns SYMBL - the relevant variable for
4793    memory tag (for aliasing purposes).
4794    Also data reference structure DR is created.
4795
4796    Input:
4797    MEMREF - data reference in STMT
4798    IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4799
4800    Output:
4801    DR - data_reference struct for MEMREF
4802    return value - the relevant variable for memory tag (for aliasing purposes).
4803
4804 */
4805
4806 static tree
4807 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
4808                        loop_vec_info loop_vinfo, struct data_reference **dr)
4809 {
4810   tree symbl, oprnd0, oprnd1;
4811   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4812   tree offset;
4813   tree array_base, base;
4814   struct data_reference *new_dr;
4815   bool base_aligned_p;
4816
4817   *dr = NULL;
4818   switch (TREE_CODE (memref))
4819     {
4820     case INDIRECT_REF:
4821       new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4822       if (! new_dr)
4823         return NULL_TREE;
4824       *dr = new_dr;
4825       symbl = DR_BASE_NAME (new_dr);
4826       STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
4827
4828       switch (TREE_CODE (symbl))
4829         {
4830         case PLUS_EXPR:
4831         case MINUS_EXPR:
4832           oprnd0 = TREE_OPERAND (symbl, 0);
4833           oprnd1 = TREE_OPERAND (symbl, 1);
4834
4835           STRIP_NOPS(oprnd1);
4836           /* Only {address_base + offset} expressions are supported,
4837              where address_base can be POINTER_TYPE or ARRAY_TYPE and
4838              offset can be anything but POINTER_TYPE or ARRAY_TYPE.
4839              TODO: swap operands if {offset + address_base}.  */
4840           if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4841                && TREE_CODE (oprnd1) != INTEGER_CST)
4842               || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4843             return NULL_TREE;
4844
4845           if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
4846             symbl = oprnd0;
4847           else
4848             symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
4849                                            loop_vinfo, &new_dr);
4850
4851         case SSA_NAME:
4852         case ADDR_EXPR:
4853           /* symbl remains unchanged.  */
4854           break;
4855
4856         default:
4857           if (vect_debug_details (NULL))
4858             {
4859               fprintf (dump_file, "unhandled data ref: ");
4860               print_generic_expr (dump_file, memref, TDF_SLIM);
4861               fprintf (dump_file, " (symbl ");
4862               print_generic_expr (dump_file, symbl, TDF_SLIM);
4863               fprintf (dump_file, ") in stmt  ");
4864               print_generic_expr (dump_file, stmt, TDF_SLIM);
4865             }
4866           return NULL_TREE;
4867         }
4868       break;
4869
4870     case ARRAY_REF:
4871       offset = size_zero_node;
4872
4873       /* Store the array base in the stmt info.
4874          For one dimensional array ref a[i], the base is a,
4875          for multidimensional a[i1][i2]..[iN], the base is
4876          a[i1][i2]..[iN-1].  */
4877       array_base = TREE_OPERAND (memref, 0);
4878       STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
4879
4880       new_dr = analyze_array (stmt, memref, is_read);
4881       *dr = new_dr;
4882
4883       /* Find the relevant symbol for aliasing purposes.  */
4884       base = DR_BASE_NAME (new_dr);
4885       switch (TREE_CODE (base))
4886         {
4887         case VAR_DECL:
4888           symbl = base;
4889           break;
4890
4891         case INDIRECT_REF:
4892           symbl = TREE_OPERAND (base, 0);
4893           break;
4894
4895         case COMPONENT_REF:
4896           /* Could have recorded more accurate information -
4897              i.e, the actual FIELD_DECL that is being referenced -
4898              but later passes expect VAR_DECL as the nmt.  */
4899           symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
4900                                         loop_vinfo, &offset, &base_aligned_p);
4901           if (symbl)
4902             break;
4903           /* fall through */
4904         default:
4905           if (vect_debug_details (NULL))
4906             {
4907               fprintf (dump_file, "unhandled struct/class field access ");
4908               print_generic_expr (dump_file, stmt, TDF_SLIM);
4909             }
4910           return NULL_TREE;
4911         }
4912       break;
4913
4914     default:
4915       if (vect_debug_details (NULL))
4916         {
4917           fprintf (dump_file, "unhandled data ref: ");
4918           print_generic_expr (dump_file, memref, TDF_SLIM);
4919           fprintf (dump_file, " in stmt  ");
4920           print_generic_expr (dump_file, stmt, TDF_SLIM);
4921         }
4922       return NULL_TREE;
4923     }
4924   return symbl;
4925 }
4926
4927
4928 /* Function vect_analyze_data_refs.
4929
4930    Find all the data references in the loop.
4931
4932    FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4933            which base is really an array (not a pointer) and which alignment
4934            can be forced. This restriction will be relaxed.  */
4935
4936 static bool
4937 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4938 {
4939   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4940   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4941   int nbbs = loop->num_nodes;
4942   block_stmt_iterator si;
4943   int j;
4944   struct data_reference *dr;
4945   tree tag;
4946   tree address_base;
4947   bool base_aligned_p;
4948   tree offset;
4949
4950   if (vect_debug_details (NULL))
4951     fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4952
4953   for (j = 0; j < nbbs; j++)
4954     {
4955       basic_block bb = bbs[j];
4956       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4957         {
4958           bool is_read = false;
4959           tree stmt = bsi_stmt (si);
4960           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4961           v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4962           v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4963           vuse_optype vuses = STMT_VUSE_OPS (stmt);
4964           varray_type *datarefs = NULL;
4965           int nvuses, nv_may_defs, nv_must_defs;
4966           tree memref = NULL;
4967           tree symbl;
4968
4969           /* Assumption: there exists a data-ref in stmt, if and only if
4970              it has vuses/vdefs.  */
4971
4972           if (!vuses && !v_may_defs && !v_must_defs)
4973             continue;
4974
4975           nvuses = NUM_VUSES (vuses);
4976           nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4977           nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4978
4979           if (nvuses && (nv_may_defs || nv_must_defs))
4980             {
4981               if (vect_debug_details (NULL))
4982                 {
4983                   fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4984                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4985                 }
4986               return false;
4987             }
4988
4989           if (TREE_CODE (stmt) != MODIFY_EXPR)
4990             {
4991               if (vect_debug_details (NULL))
4992                 {
4993                   fprintf (dump_file, "unexpected vops in stmt: ");
4994                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4995                 }
4996               return false;
4997             }
4998
4999           if (vuses)
5000             {
5001               memref = TREE_OPERAND (stmt, 1);
5002               datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
5003               is_read = true;
5004             }
5005           else /* vdefs */
5006             {
5007               memref = TREE_OPERAND (stmt, 0);
5008               datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
5009               is_read = false;
5010             }
5011
5012           /* Analyze MEMREF. If it is of a supported form, build data_reference
5013              struct for it (DR) and find the relevant symbol for aliasing
5014              purposes.  */
5015           symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo,
5016                                          &dr);
5017           if (!symbl)
5018             {
5019               if (vect_debug_stats (loop) || vect_debug_details (loop))
5020                 {
5021                   fprintf (dump_file, "not vectorized: unhandled data ref: ");
5022                   print_generic_expr (dump_file, stmt, TDF_SLIM);
5023                 }
5024               return false;
5025             }
5026
5027           /* Find and record the memtag assigned to this data-ref.  */
5028            switch (TREE_CODE (symbl))
5029             {
5030             case VAR_DECL:
5031               STMT_VINFO_MEMTAG (stmt_info) = symbl;
5032               break;
5033
5034             case SSA_NAME:
5035               symbl = SSA_NAME_VAR (symbl);
5036               tag = get_var_ann (symbl)->type_mem_tag;
5037               if (!tag)
5038                 {
5039                   tree ptr = TREE_OPERAND (memref, 0);
5040                   if (TREE_CODE (ptr) == SSA_NAME)
5041                     tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
5042                 }
5043               if (!tag)
5044                 {
5045                   if (vect_debug_stats (loop) || vect_debug_details (loop))
5046                     fprintf (dump_file, "not vectorized: no memtag for ref.");
5047                   return false;
5048                 }
5049               STMT_VINFO_MEMTAG (stmt_info) = tag;
5050               break;
5051
5052             case ADDR_EXPR:
5053               address_base = TREE_OPERAND (symbl, 0);
5054
5055               switch (TREE_CODE (address_base))
5056                 {
5057                 case ARRAY_REF:
5058                   dr = analyze_array (stmt, TREE_OPERAND (symbl, 0),
5059                                       DR_IS_READ(dr));
5060                   STMT_VINFO_MEMTAG (stmt_info) =
5061                      vect_get_base_and_bit_offset (dr, DR_BASE_NAME (dr), NULL_TREE,
5062                                                    loop_vinfo, &offset,
5063                                                    &base_aligned_p);
5064                   break;
5065
5066                 case VAR_DECL:
5067                   STMT_VINFO_MEMTAG (stmt_info) = address_base;
5068                   break;
5069
5070                 default:
5071                   if (vect_debug_stats (loop) || vect_debug_details (loop))
5072                     {
5073                       fprintf (dump_file,
5074                                "not vectorized: unhandled address expr: ");
5075                       print_generic_expr (dump_file, stmt, TDF_SLIM);
5076                     }
5077                   return false;
5078                 }
5079               break;
5080
5081             default:
5082               if (vect_debug_stats (loop) || vect_debug_details (loop))
5083                 {
5084                   fprintf (dump_file, "not vectorized: unsupported data-ref: ");
5085                   print_generic_expr (dump_file, memref, TDF_SLIM);
5086                 }
5087               return false;
5088             }
5089
5090           VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5091           STMT_VINFO_DATA_REF (stmt_info) = dr;
5092         }
5093     }
5094
5095   return true;
5096 }
5097
5098
5099 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
5100
5101 /* Function vect_mark_relevant.
5102
5103    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
5104
5105 static void
5106 vect_mark_relevant (varray_type worklist, tree stmt)
5107 {
5108   stmt_vec_info stmt_info;
5109
5110   if (vect_debug_details (NULL))
5111     fprintf (dump_file, "mark relevant.");
5112
5113   if (TREE_CODE (stmt) == PHI_NODE)
5114     {
5115       VARRAY_PUSH_TREE (worklist, stmt);
5116       return;
5117     }
5118
5119   stmt_info = vinfo_for_stmt (stmt);
5120
5121   if (!stmt_info)
5122     {
5123       if (vect_debug_details (NULL))
5124         {
5125           fprintf (dump_file, "mark relevant: no stmt info!!.");
5126           print_generic_expr (dump_file, stmt, TDF_SLIM);
5127         }
5128       return;
5129     }
5130
5131   if (STMT_VINFO_RELEVANT_P (stmt_info))
5132     {
5133       if (vect_debug_details (NULL))
5134         fprintf (dump_file, "already marked relevant.");
5135       return;
5136     }
5137
5138   STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5139   VARRAY_PUSH_TREE (worklist, stmt);
5140 }
5141
5142
5143 /* Function vect_stmt_relevant_p.
5144
5145    Return true if STMT in loop that is represented by LOOP_VINFO is
5146    "relevant for vectorization".
5147
5148    A stmt is considered "relevant for vectorization" if:
5149    - it has uses outside the loop.
5150    - it has vdefs (it alters memory).
5151    - control stmts in the loop (except for the exit condition).
5152
5153    CHECKME: what other side effects would the vectorizer allow?  */
5154
5155 static bool
5156 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5157 {
5158   v_may_def_optype v_may_defs;
5159   v_must_def_optype v_must_defs;
5160   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5161   int i;
5162   dataflow_t df;
5163   int num_uses;
5164
5165   /* cond stmt other than loop exit cond.  */
5166   if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5167     return true;
5168
5169   /* changing memory.  */
5170   v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5171   v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5172   if (v_may_defs || v_must_defs)
5173     {
5174       if (vect_debug_details (NULL))
5175         fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5176       return true;
5177     }
5178
5179   /* uses outside the loop.  */
5180   df = get_immediate_uses (stmt);
5181   num_uses = num_immediate_uses (df);
5182   for (i = 0; i < num_uses; i++)
5183     {
5184       tree use = immediate_use (df, i);
5185       basic_block bb = bb_for_stmt (use);
5186       if (!flow_bb_inside_loop_p (loop, bb))
5187         {
5188           if (vect_debug_details (NULL))
5189             fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5190           return true;
5191         }
5192     }
5193
5194   return false;
5195 }
5196
5197
5198 /* Function vect_mark_stmts_to_be_vectorized.
5199
5200    Not all stmts in the loop need to be vectorized. For example:
5201
5202      for i...
5203        for j...
5204    1.    T0 = i + j
5205    2.    T1 = a[T0]
5206
5207    3.    j = j + 1
5208
5209    Stmt 1 and 3 do not need to be vectorized, because loop control and
5210    addressing of vectorized data-refs are handled differently.
5211
5212    This pass detects such stmts.  */
5213
5214 static bool
5215 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5216 {
5217   varray_type worklist;
5218   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5219   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5220   unsigned int nbbs = loop->num_nodes;
5221   block_stmt_iterator si;
5222   tree stmt;
5223   stmt_ann_t ann;
5224   unsigned int i;
5225   int j;
5226   use_optype use_ops;
5227   stmt_vec_info stmt_info;
5228
5229   if (vect_debug_details (NULL))
5230     fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5231
5232   VARRAY_TREE_INIT (worklist, 64, "work list");
5233
5234   /* 1. Init worklist.  */
5235
5236   for (i = 0; i < nbbs; i++)
5237     {
5238       basic_block bb = bbs[i];
5239       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5240         {
5241           stmt = bsi_stmt (si);
5242
5243           if (vect_debug_details (NULL))
5244             {
5245               fprintf (dump_file, "init: stmt relevant? ");
5246               print_generic_expr (dump_file, stmt, TDF_SLIM);
5247             }
5248
5249           stmt_info = vinfo_for_stmt (stmt);
5250           STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5251
5252           if (vect_stmt_relevant_p (stmt, loop_vinfo))
5253             vect_mark_relevant (worklist, stmt);
5254         }
5255     }
5256
5257
5258   /* 2. Process_worklist */
5259
5260   while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5261     {
5262       stmt = VARRAY_TOP_TREE (worklist);
5263       VARRAY_POP (worklist);
5264
5265       if (vect_debug_details (NULL))
5266         {
5267           fprintf (dump_file, "worklist: examine stmt: ");
5268           print_generic_expr (dump_file, stmt, TDF_SLIM);
5269         }
5270
5271       /* Examine the USES in this statement. Mark all the statements which
5272          feed this statement's uses as "relevant", unless the USE is used as
5273          an array index.  */
5274
5275       if (TREE_CODE (stmt) == PHI_NODE)
5276         {
5277           /* follow the def-use chain inside the loop.  */
5278           for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5279             {
5280               tree arg = PHI_ARG_DEF (stmt, j);
5281               tree def_stmt = NULL_TREE;
5282               basic_block bb;
5283               if (!vect_is_simple_use (arg, loop, &def_stmt))
5284                 {
5285                   if (vect_debug_details (NULL))
5286                     fprintf (dump_file, "worklist: unsupported use.");
5287                   varray_clear (worklist);
5288                   return false;
5289                 }
5290               if (!def_stmt)
5291                 continue;
5292
5293               if (vect_debug_details (NULL))
5294                 {
5295                   fprintf (dump_file, "worklist: def_stmt: ");
5296                   print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5297                 }
5298
5299               bb = bb_for_stmt (def_stmt);
5300               if (flow_bb_inside_loop_p (loop, bb))
5301                 vect_mark_relevant (worklist, def_stmt);
5302             }
5303         }
5304
5305       ann = stmt_ann (stmt);
5306       use_ops = USE_OPS (ann);
5307
5308       for (i = 0; i < NUM_USES (use_ops); i++)
5309         {
5310           tree use = USE_OP (use_ops, i);
5311
5312           /* We are only interested in uses that need to be vectorized. Uses
5313              that are used for address computation are not considered relevant.
5314            */
5315           if (exist_non_indexing_operands_for_use_p (use, stmt))
5316             {
5317               tree def_stmt = NULL_TREE;
5318               basic_block bb;
5319               if (!vect_is_simple_use (use, loop, &def_stmt))
5320                 {
5321                   if (vect_debug_details (NULL))
5322                     fprintf (dump_file, "worklist: unsupported use.");
5323                   varray_clear (worklist);
5324                   return false;
5325                 }
5326
5327               if (!def_stmt)
5328                 continue;
5329
5330               if (vect_debug_details (NULL))
5331                 {
5332                   fprintf (dump_file, "worklist: examine use %d: ", i);
5333                   print_generic_expr (dump_file, use, TDF_SLIM);
5334                 }
5335
5336               bb = bb_for_stmt (def_stmt);
5337               if (flow_bb_inside_loop_p (loop, bb))
5338                 vect_mark_relevant (worklist, def_stmt);
5339             }
5340         }
5341     }                           /* while worklist */
5342
5343   varray_clear (worklist);
5344   return true;
5345 }
5346
5347
5348 /* Function vect_can_advance_ivs_p
5349
5350    In case the number of iterations that LOOP iterates in unknown at compile
5351    time, an epilog loop will be generated, and the loop induction variables
5352    (IVs) will be "advanced" to the value they are supposed to take just before
5353    the epilog loop.  Here we check that the access function of the loop IVs
5354    and the expression that represents the loop bound are simple enough.
5355    These restrictions will be relaxed in the future.  */
5356
5357 static bool
5358 vect_can_advance_ivs_p (struct loop *loop)
5359 {
5360   basic_block bb = loop->header;
5361   tree phi;
5362
5363   /* Analyze phi functions of the loop header.  */
5364
5365   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5366     {
5367       tree access_fn = NULL;
5368       tree evolution_part;
5369
5370       if (vect_debug_details (NULL))
5371         {
5372           fprintf (dump_file, "Analyze phi: ");
5373           print_generic_expr (dump_file, phi, TDF_SLIM);
5374         }
5375
5376       /* Skip virtual phi's. The data dependences that are associated with
5377          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
5378
5379       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5380         {
5381           if (vect_debug_details (NULL))
5382             fprintf (dump_file, "virtual phi. skip.");
5383           continue;
5384         }
5385
5386       /* Analyze the evolution function.  */
5387
5388       access_fn = instantiate_parameters
5389         (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5390
5391       if (!access_fn)
5392         {
5393           if (vect_debug_details (NULL))
5394             fprintf (dump_file, "No Access function.");
5395           return false;
5396         }
5397
5398       if (vect_debug_details (NULL))
5399         {
5400           fprintf (dump_file, "Access function of PHI: ");
5401           print_generic_expr (dump_file, access_fn, TDF_SLIM);
5402         }
5403
5404       evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5405
5406       if (evolution_part == NULL_TREE)
5407         return false;
5408
5409       /* FORNOW: We do not transform initial conditions of IVs
5410          which evolution functions are a polynomial of degree >= 2.  */
5411
5412       if (tree_is_chrec (evolution_part))
5413         return false;
5414     }
5415
5416   return true;
5417 }
5418
5419
5420 /* Function vect_get_loop_niters.
5421
5422    Determine how many iterations the loop is executed.
5423    If an expression that represents the number of iterations
5424    can be constructed, place it in NUMBER_OF_ITERATIONS.
5425    Return the loop exit condition.  */
5426
5427 static tree
5428 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5429 {
5430   tree niters;
5431
5432   if (vect_debug_details (NULL))
5433     fprintf (dump_file, "\n<<get_loop_niters>>\n");
5434
5435   niters = number_of_iterations_in_loop (loop);
5436
5437   if (niters != NULL_TREE
5438       && niters != chrec_dont_know)
5439     {
5440       *number_of_iterations = niters;
5441
5442       if (vect_debug_details (NULL))
5443         {
5444           fprintf (dump_file, "==> get_loop_niters:" );
5445           print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5446         }
5447     }
5448
5449   return get_loop_exit_condition (loop);
5450 }
5451
5452
5453 /* Function vect_analyze_loop_form.
5454
5455    Verify the following restrictions (some may be relaxed in the future):
5456    - it's an inner-most loop
5457    - number of BBs = 2 (which are the loop header and the latch)
5458    - the loop has a pre-header
5459    - the loop has a single entry and exit
5460    - the loop exit condition is simple enough, and the number of iterations
5461      can be analyzed (a countable loop).  */
5462
5463 static loop_vec_info
5464 vect_analyze_loop_form (struct loop *loop)
5465 {
5466   loop_vec_info loop_vinfo;
5467   tree loop_cond;
5468   tree number_of_iterations = NULL;
5469   bool rescan = false;
5470
5471   if (vect_debug_details (loop))
5472     fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5473
5474   if (loop->inner
5475       || !loop->single_exit
5476       || loop->num_nodes != 2
5477       || EDGE_COUNT (loop->header->preds) != 2
5478       || loop->num_entries != 1)
5479     {
5480       if (vect_debug_stats (loop) || vect_debug_details (loop))
5481         {
5482           fprintf (dump_file, "not vectorized: bad loop form. ");
5483           if (loop->inner)
5484             fprintf (dump_file, "nested loop.");
5485           else if (!loop->single_exit)
5486             fprintf (dump_file, "multiple exits.");
5487           else if (loop->num_nodes != 2)
5488             fprintf (dump_file, "too many BBs in loop.");
5489           else if (EDGE_COUNT (loop->header->preds) != 2)
5490             fprintf (dump_file, "too many incoming edges.");
5491           else if (loop->num_entries != 1)
5492             fprintf (dump_file, "too many entries.");
5493         }
5494
5495       return NULL;
5496     }
5497
5498   /* We assume that the loop exit condition is at the end of the loop. i.e,
5499      that the loop is represented as a do-while (with a proper if-guard
5500      before the loop if needed), where the loop header contains all the
5501      executable statements, and the latch is empty.  */
5502   if (!empty_block_p (loop->latch))
5503     {
5504       if (vect_debug_stats (loop) || vect_debug_details (loop))
5505         fprintf (dump_file, "not vectorized: unexpectd loop form.");
5506       return NULL;
5507     }
5508
5509   /* Make sure we have a preheader basic block.  */
5510   if (!loop->pre_header)
5511     {
5512       rescan = true;
5513       loop_split_edge_with (loop_preheader_edge (loop), NULL);
5514     }
5515
5516   /* Make sure there exists a single-predecessor exit bb:  */
5517   if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5518     {
5519       rescan = true;
5520       loop_split_edge_with (loop->exit_edges[0], NULL);
5521     }
5522
5523   if (rescan)
5524     {
5525       flow_loop_scan (loop, LOOP_ALL);
5526       /* Flow loop scan does not update loop->single_exit field.  */
5527       loop->single_exit = loop->exit_edges[0];
5528     }
5529
5530   if (empty_block_p (loop->header))
5531     {
5532       if (vect_debug_stats (loop) || vect_debug_details (loop))
5533         fprintf (dump_file, "not vectorized: empty loop.");
5534       return NULL;
5535     }
5536
5537   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5538   if (!loop_cond)
5539     {
5540       if (vect_debug_stats (loop) || vect_debug_details (loop))
5541         fprintf (dump_file, "not vectorized: complicated exit condition.");
5542       return NULL;
5543     }
5544
5545   if (!number_of_iterations)
5546     {
5547       if (vect_debug_stats (loop) || vect_debug_details (loop))
5548         fprintf (dump_file,
5549                  "not vectorized: number of iterations cannot be computed.");
5550       return NULL;
5551     }
5552
5553   if (chrec_contains_undetermined (number_of_iterations))
5554     {
5555       if (vect_debug_details (NULL))
5556         fprintf (dump_file, "Infinite number of iterations.");
5557       return false;
5558     }
5559
5560   loop_vinfo = new_loop_vec_info (loop);
5561   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5562
5563   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5564     {
5565       if (vect_debug_details (loop))
5566         {
5567           fprintf (dump_file, "loop bound unknown.\n");
5568           fprintf (dump_file, "Symbolic number of iterations is ");
5569           print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5570         }
5571     }
5572   else
5573   if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5574     {
5575       if (vect_debug_stats (loop) || vect_debug_details (loop))
5576         fprintf (dump_file, "not vectorized: number of iterations = 0.");
5577       return NULL;
5578     }
5579
5580   LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5581
5582   return loop_vinfo;
5583 }
5584
5585
5586 /* Function vect_analyze_loop.
5587
5588    Apply a set of analyses on LOOP, and create a loop_vec_info struct
5589    for it. The different analyses will record information in the
5590    loop_vec_info struct.  */
5591
5592 static loop_vec_info
5593 vect_analyze_loop (struct loop *loop)
5594 {
5595   bool ok;
5596   loop_vec_info loop_vinfo;
5597
5598   if (vect_debug_details (NULL))
5599     fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5600
5601   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
5602
5603   loop_vinfo = vect_analyze_loop_form (loop);
5604   if (!loop_vinfo)
5605     {
5606       if (vect_debug_details (loop))
5607         fprintf (dump_file, "bad loop form.");
5608       return NULL;
5609     }
5610
5611   /* Find all data references in the loop (which correspond to vdefs/vuses)
5612      and analyze their evolution in the loop.
5613
5614      FORNOW: Handle only simple, array references, which
5615      alignment can be forced, and aligned pointer-references.  */
5616
5617   ok = vect_analyze_data_refs (loop_vinfo);
5618   if (!ok)
5619     {
5620       if (vect_debug_details (loop))
5621         fprintf (dump_file, "bad data references.");
5622       destroy_loop_vec_info (loop_vinfo);
5623       return NULL;
5624     }
5625
5626   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
5627
5628   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5629   if (!ok)
5630     {
5631       if (vect_debug_details (loop))
5632         fprintf (dump_file, "unexpected pattern.");
5633       if (vect_debug_details (loop))
5634         fprintf (dump_file, "not vectorized: unexpected pattern.");
5635       destroy_loop_vec_info (loop_vinfo);
5636       return NULL;
5637     }
5638
5639   /* Check that all cross-iteration scalar data-flow cycles are OK.
5640      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
5641
5642   ok = vect_analyze_scalar_cycles (loop_vinfo);
5643   if (!ok)
5644     {
5645       if (vect_debug_details (loop))
5646         fprintf (dump_file, "bad scalar cycle.");
5647       destroy_loop_vec_info (loop_vinfo);
5648       return NULL;
5649     }
5650
5651   /* Analyze data dependences between the data-refs in the loop.
5652      FORNOW: fail at the first data dependence that we encounter.  */
5653
5654   ok = vect_analyze_data_ref_dependences (loop_vinfo);
5655   if (!ok)
5656     {
5657       if (vect_debug_details (loop))
5658         fprintf (dump_file, "bad data dependence.");
5659       destroy_loop_vec_info (loop_vinfo);
5660       return NULL;
5661     }
5662
5663   /* Analyze the access patterns of the data-refs in the loop (consecutive,
5664      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
5665
5666   ok = vect_analyze_data_ref_accesses (loop_vinfo);
5667   if (!ok)
5668     {
5669       if (vect_debug_details (loop))
5670         fprintf (dump_file, "bad data access.");
5671       destroy_loop_vec_info (loop_vinfo);
5672       return NULL;
5673     }
5674
5675   /* Analyze the alignment of the data-refs in the loop.
5676      FORNOW: Only aligned accesses are handled.  */
5677
5678   ok = vect_analyze_data_refs_alignment (loop_vinfo);
5679   if (!ok)
5680     {
5681       if (vect_debug_details (loop))
5682         fprintf (dump_file, "bad data alignment.");
5683       destroy_loop_vec_info (loop_vinfo);
5684       return NULL;
5685     }
5686
5687   /* Scan all the operations in the loop and make sure they are
5688      vectorizable.  */
5689
5690   ok = vect_analyze_operations (loop_vinfo);
5691   if (!ok)
5692     {
5693       if (vect_debug_details (loop))
5694         fprintf (dump_file, "bad operation or unsupported loop bound.");
5695       destroy_loop_vec_info (loop_vinfo);
5696       return NULL;
5697     }
5698
5699   LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5700
5701   return loop_vinfo;
5702 }
5703
5704
5705 /* Function need_imm_uses_for.
5706
5707    Return whether we ought to include information for 'var'
5708    when calculating immediate uses.  For this pass we only want use
5709    information for non-virtual variables.  */
5710
5711 static bool
5712 need_imm_uses_for (tree var)
5713 {
5714   return is_gimple_reg (var);
5715 }
5716
5717
5718 /* Function vectorize_loops.
5719
5720    Entry Point to loop vectorization phase.  */
5721
5722 void
5723 vectorize_loops (struct loops *loops)
5724 {
5725   unsigned int i, loops_num;
5726   unsigned int num_vectorized_loops = 0;
5727
5728   /* Does the target support SIMD?  */
5729   /* FORNOW: until more sophisticated machine modelling is in place.  */
5730   if (!UNITS_PER_SIMD_WORD)
5731     {
5732       if (vect_debug_details (NULL))
5733         fprintf (dump_file, "vectorizer: target vector size is not defined.");
5734       return;
5735     }
5736
5737 #ifdef ENABLE_CHECKING
5738   verify_loop_closed_ssa ();
5739 #endif
5740
5741   compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5742
5743   /*  ----------- Analyze loops. -----------  */
5744
5745   /* If some loop was duplicated, it gets bigger number
5746      than all previously defined loops. This fact allows us to run
5747      only over initial loops skipping newly generated ones.  */
5748   loops_num = loops->num;
5749   for (i = 1; i < loops_num; i++)
5750     {
5751       loop_vec_info loop_vinfo;
5752       struct loop *loop = loops->parray[i];
5753
5754       if (!loop)
5755         continue;
5756
5757       loop_vinfo = vect_analyze_loop (loop);
5758       loop->aux = loop_vinfo;
5759
5760       if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5761         continue;
5762
5763       vect_transform_loop (loop_vinfo, loops);
5764       num_vectorized_loops++;
5765     }
5766
5767   if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5768     fprintf (dump_file, "\nvectorized %u loops in function.\n",
5769              num_vectorized_loops);
5770
5771   /*  ----------- Finalize. -----------  */
5772
5773   free_df ();
5774   for (i = 1; i < loops_num; i++)
5775     {
5776       struct loop *loop = loops->parray[i];
5777       loop_vec_info loop_vinfo;
5778
5779       if (!loop)
5780         continue;
5781       loop_vinfo = loop->aux;
5782       destroy_loop_vec_info (loop_vinfo);
5783       loop->aux = NULL;
5784     }
5785
5786   rewrite_into_ssa (false);
5787   rewrite_into_loop_closed_ssa (); /* FORNOW */
5788   bitmap_clear (vars_to_rename);
5789 }